Re: rdomain, mpe, ldpd, OpenBGPD and PF

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Claudio Jeker
Date: Monday, May 24, 2010 - 6:11 am

On Mon, May 24, 2010 at 05:23:00PM +0700, Insan Praja SW wrote:

Passing traffic between VPNs is either done in pf(4) by setting the rtable
on a rule or by importing routes in BGP (import/export-target).
The first method is much more flexible but more static.

First of all you need the attached diff to play with the kernel MPLS part.
With that in you can start playing with the various parts.
1. You need to MPLS enable the interfaces that do MPLS
   In my test I use a vlan for this:
	# more /etc/hostname.vlan2003 
	vlan 2003 vlandev sis0
	inet 10.83.128.26 255.255.255.248 NONE
	mpls

2. Then it is best to have a loopback interface:
	# more /etc/hostname.lo1
	inet 10.83.66.23 255.255.255.255 NONE

3. LDP config:
	router-id 10.83.66.23
	distribution independent
	retention liberal
	advertisement unsolicited
	interface lo1 {
	}
	interface vlan2003 {
	}

4. I use ospfd as IGP, there is nothing special needed here.

5. create a rdomain 1:
	# more /etc/hostname.vlan2017
	rdomain 1
	vlan 2017 vlandev sis0
	inet 192.168.220.1 255.255.255.0

6. create a mpe(4) in rdomain 1:
	# more /etc/hostname.mpe0
	rdomain 1 mplslabel 543
	inet 10.83.66.129 255.255.255.255

Note: it is necessary to have an IP on mpe(4) but it does not matter which
one you pick. I normaly use the loopback IP but maybe using the vlan2017
IP would be smarter.

7. BGP config:
	AS 65003
	router-id 10.83.66.23
	listen on 10.83.66.23
	rdomain 1 {
		descr "CUSTOMER1"
		rd 65003:1
		import-target rt 65003:1
		export-target rt 65003:1
		depend on mpe0
		network 192.168.220/24
	}
	group ibgp {
		announce IPv4 unicast
		announce IPv4 vpn
		remote-as 65003
		local-address 10.83.66.23
		neighbor 10.83.66.2 {
			descr c2
		}
	}

Start ospfd, bgpd, and ldpd and hope for the best (check that all sessions
come up). Setup something similar on a second system.
Use e.g. ping -V1 -I 192.168.220.1 192.168.221.1 to test the VPN.

It is possible to use gif/gre instead of LDP -- just use a gre interface
in point 1 and skip everyting that needs LDP.

-- 
:wq Claudio

Index: sbin/ifconfig/ifconfig.8
===================================================================
RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v
retrieving revision 1.200
diff -u -p -r1.200 ifconfig.8
--- sbin/ifconfig/ifconfig.8	7 May 2010 06:17:34 -0000	1.200
+++ sbin/ifconfig/ifconfig.8	24 May 2010 12:48:34 -0000
@@ -347,6 +347,11 @@ this directive is used to select between
 and 802.11g
 .Pq Dq 11g
 operating modes.
+.It Cm mpls
+Enable Multiprotocol Label Switching (MPLS) on the interface. It will be
+able to send and receive MPLS traffic.
+.It Fl mpls
+Disable MPLS on the interface.
 .It Cm mtu Ar value
 Set the MTU for this device to the given
 .Ar value .
Index: sbin/ifconfig/ifconfig.c
===================================================================
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.232
diff -u -p -r1.232 ifconfig.c
--- sbin/ifconfig/ifconfig.c	6 May 2010 12:58:40 -0000	1.232
+++ sbin/ifconfig/ifconfig.c	6 May 2010 20:34:51 -0000
@@ -191,6 +191,7 @@ void	unsetmediaopt(const char *, int);
 void	setmediainst(const char *, int);
 void	settimeslot(const char *, int);
 void	timeslot_status(void);
+void	setifmpls(const char *, int);
 void	setmpelabel(const char *, int);
 void	setvlantag(const char *, int);
 void	setvlanprio(const char *, int);
@@ -346,6 +347,8 @@ const struct	cmd {
 	{ "-rtlabel",	-1,		0,		setifrtlabel },
 	{ "range",	NEXTARG,	0,		setatrange },
 	{ "phase",	NEXTARG,	0,		setatphase },
+	{ "mpls",	IFXF_MPLS,	0,		setifxflags },
+	{ "-mpls",	-IFXF_MPLS,	0,		setifxflags },
 	{ "mplslabel",	NEXTARG,	0,		setmpelabel },
 	{ "advbase",	NEXTARG,	0,		setcarp_advbase },
 	{ "advskew",	NEXTARG,	0,		setcarp_advskew },
@@ -3252,6 +3255,7 @@ mpe_status(void)
 	printf("\tmpls label: %d\n", shim.shim_label);
 }
 
+/* ARGSUSED */
 void
 setmpelabel(const char *val, int d)
 {
Index: sys/conf/GENERIC
===================================================================
RCS file: /cvs/src/sys/conf/GENERIC,v
retrieving revision 1.156
diff -u -p -r1.156 GENERIC
--- sys/conf/GENERIC	7 May 2010 13:16:18 -0000	1.156
+++ sys/conf/GENERIC	7 May 2010 13:38:24 -0000
@@ -68,7 +68,7 @@ option		PPP_DEFLATE
 #option		PIPEX		# Pppac IP EXtension, for npppd
 option		MROUTING	# Multicast router
 #option		PIM		# Protocol Independent Multicast
-#option		MPLS		# Multi-Protocol Label Switching
+option		MPLS		# Multi-Protocol Label Switching
 
 #mpath0		at root		# SCSI Multipathing
 #scsibus*	at mpath?
@@ -101,7 +101,7 @@ pseudo-device	carp		# CARP protocol supp
 pseudo-device	gif		# IPv[46] over IPv[46] tunnel (RFC1933)
 pseudo-device	gre		# GRE encapsulation interface
 pseudo-device	loop		# network loopback
-#pseudo-device	mpe		# MPLS PE interface
+pseudo-device	mpe		# MPLS PE interface
 pseudo-device	ppp		# PPP
 pseudo-device	pppoe		# PPP over Ethernet (RFC 2516)
 pseudo-device	sl		# CSLIP
Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.215
diff -u -p -r1.215 if.c
--- sys/net/if.c	8 May 2010 11:07:20 -0000	1.215
+++ sys/net/if.c	12 May 2010 08:42:56 -0000
@@ -107,6 +107,10 @@
 #include <netinet6/ip6_var.h>
 #endif
 
+#ifdef MPLS
+#include <netmpls/mpls.h>
+#endif
+
 #if NBPFILTER > 0
 #include <net/bpf.h>
 #endif
@@ -1345,6 +1349,26 @@ ifioctl(struct socket *so, u_long cmd, c
 			}
 		}
 #endif
+
+#ifdef MPLS
+		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
+		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
+			int s = splnet();
+			ifp->if_xflags |= IFXF_MPLS;
+			ifp->if_ll_output = ifp->if_output; 
+			ifp->if_output = mpls_output;
+			splx(s);
+		}
+		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
+		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
+			int s = splnet();
+			ifp->if_xflags &= ~IFXF_MPLS;
+			ifp->if_output = ifp->if_ll_output; 
+			ifp->if_ll_output = NULL;
+			splx(s);
+		}
+#endif
+
 
 		ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
 			(ifr->ifr_flags &~ IFXF_CANTCHANGE);
Index: sys/net/if.h
===================================================================
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.115
diff -u -p -r1.115 if.h
--- sys/net/if.h	17 Apr 2010 17:46:32 -0000	1.115
+++ sys/net/if.h	11 May 2010 11:49:48 -0000
@@ -266,6 +266,10 @@ struct ifnet {				/* and the entries */
 					/* output routine (enqueue) */
 	int	(*if_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
 		     struct rtentry *);
+
+					/* link level output function */
+	int	(*if_ll_output)(struct ifnet *, struct mbuf *,
+		    struct sockaddr *, struct rtentry *);
 					/* initiate output routine */
 	void	(*if_start)(struct ifnet *);
 					/* ioctl routine */
@@ -326,6 +330,7 @@ struct ifnet {				/* and the entries */
 #define IFXF_TXREADY		0x1		/* interface is ready to tx */
 #define	IFXF_NOINET6		0x2		/* don't do inet6 */
 #define	IFXF_INET6_PRIVACY	0x4		/* autoconf privacy extension */
+#define	IFXF_MPLS		0x8		/* supports MPLS */
 
 #define	IFXF_CANTCHANGE \
 	(IFXF_TXREADY)
Index: sys/net/if_ethersubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.142
diff -u -p -r1.142 if_ethersubr.c
--- sys/net/if_ethersubr.c	7 May 2010 13:33:16 -0000	1.142
+++ sys/net/if_ethersubr.c	7 May 2010 13:34:21 -0000
@@ -277,12 +277,7 @@ ether_output(ifp0, m0, dst, rt0)
 			else
 				senderr(EHOSTUNREACH);
 		}
-#ifdef MPLS
-		if (rt->rt_flags & RTF_MPLS) {
-			if ((m = mpls_output(m, rt)) == NULL)
-				senderr(EHOSTUNREACH);
-		}
-#endif
+
 		if (rt->rt_flags & RTF_GATEWAY) {
 			if (rt->rt_gwroute == 0)
 				goto lookup;
@@ -299,7 +294,6 @@ ether_output(ifp0, m0, dst, rt0)
 			    time_second < rt->rt_rmx.rmx_expire)
 				senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
 	}
-
 	switch (dst->sa_family) {
 
 #ifdef INET
@@ -310,12 +304,7 @@ ether_output(ifp0, m0, dst, rt0)
 		if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) &&
 		    !m->m_pkthdr.pf.routed)
 			mcopy = m_copy(m, 0, (int)M_COPYALL);
-#ifdef MPLS
-		if (rt0 != NULL && rt0->rt_flags & RTF_MPLS)
-			etype = htons(ETHERTYPE_MPLS);
-		else
-#endif
-			etype = htons(ETHERTYPE_IP);
+		etype = htons(ETHERTYPE_IP);
 		break;
 #endif
 #ifdef INET6
@@ -382,6 +371,9 @@ ether_output(ifp0, m0, dst, rt0)
 		else
 			senderr(EHOSTUNREACH);
 
+		if (!ISSET(ifp->if_xflags, IFXF_MPLS))
+			senderr(ENETUNREACH);
+
 		switch (dst->sa_family) {
 			case AF_LINK:
 				if (((struct sockaddr_dl *)dst)->sdl_alen <
@@ -490,7 +482,6 @@ ether_output(ifp0, m0, dst, rt0)
 		}
 	}
 #endif
-
 	mflags = m->m_flags;
 	len = m->m_pkthdr.len;
 	s = splnet();
Index: sys/net/if_mpe.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpe.c,v
retrieving revision 1.18
diff -u -p -r1.18 if_mpe.c
--- sys/net/if_mpe.c	9 Jan 2010 20:29:42 -0000	1.18
+++ sys/net/if_mpe.c	19 May 2010 07:46:51 -0000
@@ -64,6 +64,7 @@ int	mpeioctl(struct ifnet *, u_long, cad
 void	mpestart(struct ifnet *);
 int	mpe_clone_create(struct if_clone *, int);
 int	mpe_clone_destroy(struct ifnet *);
+int	mpe_newlabel(struct ifnet *, int, struct shim_hdr *);
 
 LIST_HEAD(, mpe_softc)	mpeif_list;
 struct if_clone	mpe_cloner =
@@ -90,7 +91,7 @@ mpe_clone_create(struct if_clone *ifc, i
 	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
 		return (ENOMEM);
 
-	mpeif->sc_shim.shim_label = MPLS_BOS_MASK | htonl(mpls_defttl);
+	mpeif->sc_shim.shim_label = 0;
 	mpeif->sc_unit = unit;
 	ifp = &mpeif->sc_if;
 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit);
@@ -107,7 +108,7 @@ mpe_clone_create(struct if_clone *ifc, i
 	if_attach(ifp);
 	if_alloc_sadl(ifp);
 #if NBPFILTER > 0
-	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, MPE_HDRLEN);
+	bpfattach(&ifp->if_bpf, ifp, DLT_NULL, sizeof(u_int32_t));
 #endif
 
 	s = splnet();
@@ -132,6 +133,7 @@ mpe_clone_destroy(struct ifnet *ifp)
 	return (0);
 }
 
+struct sockaddr_storage	 mpedst;
 /*
  * Start output on the mpe interface.
  */
@@ -139,9 +141,10 @@ void
 mpestart(struct ifnet *ifp)
 {
 	struct mbuf 		*m;
-	struct mpe_softc	*ifm;
-	struct shim_hdr		 shim;
+	struct sockaddr		*sa = (struct sockaddr *)&mpedst;
 	int			 s;
+	sa_family_t		 af;
+	struct rtentry		*rt;
 
 	for (;;) {
 		s = splnet();
@@ -151,30 +154,46 @@ mpestart(struct ifnet *ifp)
 		if (m == NULL)
 			return;
 
-#ifdef DIAGNOSTIC
-		if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.rdomain)) {
-			printf("%s: trying to send packet on wrong domain. "
-			    "if %d vs. mbuf %d\n", ifp->if_xname,
-			    ifp->if_rdomain, rtable_l2(m->m_pkthdr.rdomain));
+		af = *mtod(m, sa_family_t *);
+		m_adj(m, sizeof(af));
+		switch (af) {
+		case AF_INET:
+			bzero(sa, sizeof(struct sockaddr_in));
+			satosin(sa)->sin_family = af;
+			satosin(sa)->sin_len = sizeof(struct sockaddr_in);
+			bcopy(mtod(m, caddr_t), &satosin(sa)->sin_addr,
+			    sizeof(in_addr_t));
+			m_adj(m, sizeof(in_addr_t));
+			break;
+		default:
+			m_freem(m);
+			continue;
 		}
-#endif
 
-#if NBPFILTER > 0
-		if (ifp->if_bpf)
-			bpf_mtap_af(ifp->if_bpf, AF_INET, m, BPF_DIRECTION_OUT);
-#endif
-		ifm = ifp->if_softc;
-		shim.shim_label = ifm->sc_shim.shim_label;
-		M_PREPEND(m, sizeof(shim), M_DONTWAIT);
-		m_copyback(m, 0, sizeof(shim), (caddr_t)&shim);
-		if (m == NULL) {
-			ifp->if_ierrors++;
+		rt = rtalloc1(sa, RT_REPORT, 0);
+		if (rt == NULL) {
+			/* no route give up */
+			m_freem(m);
 			continue;
 		}
-		m->m_pkthdr.rcvif = ifp;
-		/* XXX assumes MPLS is always in rdomain 0 */
-		m->m_pkthdr.rdomain = 0;
-		mpls_output(m, NULL);
+
+#if NBPFILTER > 0
+		if (ifp->if_bpf) {
+			/* remove MPLS label before passing packet to bpf */
+			m->m_data += sizeof(struct shim_hdr);
+			m->m_len -= sizeof(struct shim_hdr);
+			m->m_pkthdr.len -= sizeof(struct shim_hdr);
+			bpf_mtap_af(ifp->if_bpf, af, m, BPF_DIRECTION_OUT);
+			m->m_data -= sizeof(struct shim_hdr);
+			m->m_len += sizeof(struct shim_hdr);
+			m->m_pkthdr.len += sizeof(struct shim_hdr);
+		}
+#endif
+		/* XXX lie, but mpls_output will only look at sa_family */
+		sa->sa_family = AF_MPLS;
+
+		mpls_output(rt->rt_ifp, m, sa, rt);
+		RTFREE(rt);
 	}
 }
 
@@ -182,25 +201,64 @@ int
 mpeoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 	struct rtentry *rt)
 {
-	int	s;
-	int	error;
+	struct shim_hdr	shim;
+	int		s;
+	int		error;
+	int		off;
+	u_int8_t	op = 0;
+
+#ifdef DIAGNOSTIC
+	if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.rdomain)) {
+		printf("%s: trying to send packet on wrong domain. "
+		    "if %d vs. mbuf %d\n", ifp->if_xname,
+		    ifp->if_rdomain, rtable_l2(m->m_pkthdr.rdomain));
+	}
+#endif
+	m->m_pkthdr.rcvif = ifp;
+	/* XXX assumes MPLS is always in rdomain 0 */
+	m->m_pkthdr.rdomain = 0;
 
 	error = 0;
 	switch (dst->sa_family) {
+#ifdef INET
 	case AF_INET:
+		if (rt && rt->rt_flags & RTF_MPLS) {
+			shim.shim_label =
+			    ((struct rt_mpls *)rt->rt_llinfo)->mpls_label;
+			shim.shim_label |= MPLS_BOS_MASK;
+			op =  ((struct rt_mpls *)rt->rt_llinfo)->mpls_operation;
+		}
+		if (op != MPLS_OP_PUSH) {
+			m_freem(m);
+			error = ENETUNREACH;
+			goto out;
+		}
+		if (mpls_mapttl_ip) {
+			struct ip	*ip;
+			ip = mtod(m, struct ip *);
+			shim.shim_label |= htonl(ip->ip_ttl) & MPLS_TTL_MASK;
+		} else
+			shim.shim_label |= htonl(mpls_defttl) & MPLS_TTL_MASK;
+		off = sizeof(sa_family_t) + sizeof(in_addr_t);
+		M_PREPEND(m, sizeof(shim) + off, M_DONTWAIT);
+		if (m == NULL) {
+			m_freem(m);
+			error = ENOBUFS;
+			goto out;
+		}
+		*mtod(m, sa_family_t *) = AF_INET;
+		m_copyback(m, sizeof(sa_family_t), sizeof(in_addr_t),
+		    (caddr_t)&((satosin(dst)->sin_addr)));
 		break;
-	case AF_MPLS:
-		/*
-		 * drop MPLS packets entering here. This is a hack to prevent
-		 * loops because of misconfiguration.
-		 */
-		m_freem(m);
-		error = ENETUNREACH;
-		return (error);
+#endif
 	default:
+		m_freem(m);
 		error = ENETDOWN;
 		goto out;
 	}
+
+	m_copyback(m, off, sizeof(shim), (caddr_t)&shim);
+
 	s = splnet();
 	IFQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
 	if (error) {
@@ -210,6 +268,7 @@ mpeoutput(struct ifnet *ifp, struct mbuf
 	}
 	if_start(ifp);
 	splx(s);
+
 out:
 	if (error)
 		ifp->if_oerrors++;
@@ -224,13 +283,13 @@ mpeioctl(struct ifnet *ifp, u_long cmd, 
 	struct mpe_softc	*ifm;
 	struct ifreq		*ifr;
 	struct shim_hdr		 shim;
-	u_int32_t		 ttl = htonl(mpls_defttl);
 
 	ifr = (struct ifreq *)data;
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFADDR:
-		ifp->if_flags |= IFF_UP;
+		if (!ISSET(ifp->if_flags, IFF_UP))
+			if_up(ifp);
 		break;
 	case SIOCSIFFLAGS:
 		if (ifp->if_flags & IFF_UP)
@@ -261,8 +320,7 @@ mpeioctl(struct ifnet *ifp, u_long cmd, 
 			error = EINVAL;
 			break;
 		}
-		shim.shim_label = (htonl(shim.shim_label << MPLS_LABEL_OFFSET))
-		    | MPLS_BOS_MASK | ttl;
+		shim.shim_label = htonl(shim.shim_label << MPLS_LABEL_OFFSET);
 		if (ifm->sc_shim.shim_label == shim.shim_label)
 			break;
 		LIST_FOREACH(ifm, &mpeif_list, sc_list) {
@@ -275,6 +333,14 @@ mpeioctl(struct ifnet *ifp, u_long cmd, 
 		if (error)
 			break;
 		ifm = ifp->if_softc;
+		if (ifm->sc_shim.shim_label) {
+			/* remove old MPLS route */
+			mpe_newlabel(ifp, RTM_DELETE, &ifm->sc_shim);
+		}
+		/* add new MPLS route */
+		error = mpe_newlabel(ifp, RTM_ADD, &shim);
+		if (error)
+			break;
 		ifm->sc_shim.shim_label = shim.shim_label;
 		break;
 	default:
@@ -324,7 +390,7 @@ mpe_input(struct mbuf *m, struct ifnet *
 
 #if NBPFILTER > 0
 	if (ifp && ifp->if_bpf)
-		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+		bpf_mtap_af(ifp->if_bpf, AF_INET, m, BPF_DIRECTION_IN);
 #endif
 	s = splnet();
 	IF_ENQUEUE(&ipintrq, m);
@@ -358,10 +424,45 @@ mpe_input6(struct mbuf *m, struct ifnet 
 
 #if NBPFILTER > 0
 	if (ifp && ifp->if_bpf)
-		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+		bpf_mtap_af(ifp->if_bpf, AF_INET6, m, BPF_DIRECTION_IN);
 #endif
 	s = splnet();
 	IF_ENQUEUE(&ip6intrq, m);
 	schednetisr(NETISR_IPV6);
 	splx(s);
+}
+
+int
+mpe_newlabel(struct ifnet *ifp, int cmd, struct shim_hdr *shim)
+{
+	struct rtentry *nrt;
+	struct sockaddr_mpls dst;
+	struct rt_addrinfo info;
+	int error;
+
+	bzero(&dst, sizeof(dst));
+	dst.smpls_len = sizeof(dst);
+	dst.smpls_family = AF_MPLS;
+	dst.smpls_label = shim->shim_label;
+
+	bzero(&info, sizeof(info));
+	info.rti_flags = RTF_UP | RTF_MPLS;
+	info.rti_mpls = MPLS_OP_POP;
+	info.rti_info[RTAX_DST] = smplstosa(&dst);
+	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)ifp->if_sadl;
+
+	error = rtrequest1(cmd, &info, RTP_CONNECTED, &nrt, 0);
+	rt_missmsg(cmd, &info, error ? 0 : nrt->rt_flags, ifp, error, 0);
+	if (cmd == RTM_DELETE) {
+		if (error == 0 && nrt != NULL) {
+			if (nrt->rt_refcnt <= 0) {
+				nrt->rt_refcnt++;
+				rtfree(nrt);
+			}
+		}
+	}
+	if (cmd == RTM_ADD && error == 0 && nrt != NULL) {
+		nrt->rt_refcnt--;
+	}
+	return (error);
 }
Index: sys/netmpls/mpls.h
===================================================================
RCS file: /cvs/src/sys/netmpls/mpls.h,v
retrieving revision 1.21
diff -u -p -r1.21 mpls.h
--- sys/netmpls/mpls.h	28 Apr 2009 12:07:43 -0000	1.21
+++ sys/netmpls/mpls.h	25 Mar 2010 11:34:00 -0000
@@ -179,7 +179,8 @@ struct mbuf	*mpls_shim_push(struct mbuf 
 
 int		 mpls_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 void		 mpls_input(struct mbuf *);
-struct mbuf	*mpls_output(struct mbuf *, struct rtentry *);
+int		 mpls_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+		    struct rtentry *);
 
 void		 mpls_ip_input(struct mbuf *, u_int8_t);
 void		 mpls_ip6_input(struct mbuf *, u_int8_t);
Index: sys/netmpls/mpls_input.c
===================================================================
RCS file: /cvs/src/sys/netmpls/mpls_input.c,v
retrieving revision 1.22
diff -u -p -r1.22 mpls_input.c
--- sys/netmpls/mpls_input.c	7 May 2010 13:33:17 -0000	1.22
+++ sys/netmpls/mpls_input.c	12 May 2010 08:41:45 -0000
@@ -95,7 +95,7 @@ mpls_input(struct mbuf *m)
 	u_int8_t ttl;
 	int i, hasbos;
 
-	if (!mpls_enable) {
+	if (!mpls_enable || !ISSET(ifp->if_xflags, IFXF_MPLS)) {
 		m_freem(m);
 		return;
 	}
@@ -132,11 +132,11 @@ mpls_input(struct mbuf *m)
 	}
 	ttl--;
 
+	bzero(&sa_mpls, sizeof(sa_mpls));
+	smpls = &sa_mpls;
+	smpls->smpls_family = AF_MPLS;
+	smpls->smpls_len = sizeof(*smpls);
 	for (i = 0; i < mpls_inkloop; i++) {
-		bzero(&sa_mpls, sizeof(sa_mpls));
-		smpls = &sa_mpls;
-		smpls->smpls_family = AF_MPLS;
-		smpls->smpls_len = sizeof(*smpls);
 		smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
 
 #ifdef MPLS_DEBUG
@@ -151,15 +151,13 @@ mpls_input(struct mbuf *m)
 			m = mpls_shim_pop(m);
 			shim = mtod(m, struct shim_hdr *);
 
-			switch (ntohl(smpls->smpls_label)) { 
-
+			switch (ntohl(smpls->smpls_label)) {
 			case MPLS_LABEL_IPV4NULL:
 				if (hasbos) {
 					mpls_ip_input(m, ttl);
 					goto done;
 				} else
 					continue;
-
 			case MPLS_LABEL_IPV6NULL:
 				if (hasbos) {
 					mpls_ip6_input(m, ttl);
@@ -184,7 +182,6 @@ mpls_input(struct mbuf *m)
 		}
 
 		rt->rt_use++;
-		smpls = satosmpls(rt_key(rt));
 		rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
 
 		if (rt_mpls == NULL || (rt->rt_flags & RTF_MPLS) == 0) {
@@ -196,17 +193,14 @@ mpls_input(struct mbuf *m)
 			goto done;
 		}
 
-		if (rt_mpls->mpls_operation == MPLS_OP_LOCAL) {
+		hasbos = MPLS_BOS_ISSET(shim->shim_label);
+		switch (rt_mpls->mpls_operation) {
+		case MPLS_OP_LOCAL:
 			/* Packet is for us */
-			hasbos = MPLS_BOS_ISSET(shim->shim_label);
-			if (!hasbos) {
-#ifdef MPLS_DEBUG
-				printf("MPLS_DEBUG: packet malformed\n");
-#endif
-				m_freem(m);
-				goto done;
-			}
 			m = mpls_shim_pop(m);
+			if (!hasbos)
+				/* redo lookup with next label */
+				break;
 
 			if (!rt->rt_gateway) {
 #ifdef MPLS_DEBUG
@@ -227,16 +221,13 @@ mpls_input(struct mbuf *m)
 			default:
 				m_freem(m);
 			}
-
 			goto done;
-		}
-
-		if (rt_mpls->mpls_operation & MPLS_OP_POP) {
-			hasbos = MPLS_BOS_ISSET(shim->shim_label);
+		case MPLS_OP_POP:
+			m = mpls_shim_pop(m);
 			if (hasbos) {
-				m = mpls_shim_pop(m);
 #if NMPE > 0
 				if (rt->rt_ifp->if_type == IFT_MPLS) {
+					smpls = satosmpls(rt_key(rt));
 					mpe_input(m, rt->rt_ifp, smpls, ttl);
 					goto done;
 				}
@@ -245,13 +236,23 @@ mpls_input(struct mbuf *m)
 				m_freem(m);
 				goto done;
 			}
+			break;
+		case MPLS_OP_PUSH:
+			m = mpls_shim_push(m, rt_mpls);
+			break;
+		case MPLS_OP_SWAP:
+			m = mpls_shim_swap(m, rt_mpls);
+			break;
 		}
 
+		if (m == NULL)
+			goto done;
+
 		/* refetch label */
 		shim = mtod(m, struct shim_hdr *);
-		ifp = rt->rt_ifp;
 
-		if (ifp != NULL)  
+		ifp = rt->rt_ifp;
+		if (ifp != NULL && rt_mpls->mpls_operation != MPLS_OP_LOCAL)
 			break;
 
 		RTFREE(rt);
@@ -273,14 +274,22 @@ mpls_input(struct mbuf *m)
 	    MPLS_LABEL_GET(rt_mpls->mpls_label));
 #endif
 
-	(*ifp->if_output)(ifp, m, smplstosa(smpls), rt);
+	/* Output iface is not MPLS-enabled */
+	if (!ISSET(ifp->if_xflags, IFXF_MPLS)) {
+#ifdef MPLS_DEBUG
+		printf("MPLS_DEBUG: interface not mpls enabled\n");
+#endif
+		goto done;
+	}
+
+	(*ifp->if_ll_output)(ifp, m, smplstosa(smpls), rt);
 done:
 	if (rt)
 		RTFREE(rt);
 }
 
 void
-mpls_ip_input(struct mbuf *m, u_int8_t ttl) 
+mpls_ip_input(struct mbuf *m, u_int8_t ttl)
 {
 	struct ip	*ip;
 	int		 s, hlen;
Index: sys/netmpls/mpls_output.c
===================================================================
RCS file: /cvs/src/sys/netmpls/mpls_output.c,v
retrieving revision 1.8
diff -u -p -r1.8 mpls_output.c
--- sys/netmpls/mpls_output.c	7 May 2010 13:33:17 -0000	1.8
+++ sys/netmpls/mpls_output.c	12 May 2010 08:42:21 -0000
@@ -27,66 +27,67 @@
 
 #include <netmpls/mpls.h>
 
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#endif
+
 extern int	mpls_inkloop;
 
 #ifdef MPLS_DEBUG
 #define MPLS_LABEL_GET(l)	((ntohl((l) & MPLS_LABEL_MASK)) >> MPLS_LABEL_OFFSET)
 #endif
 
-struct mbuf *
-mpls_output(struct mbuf *m, struct rtentry *rt0)
+void	mpls_do_cksum(struct mbuf *);
+
+int
+mpls_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt0)
 {
-	struct ifnet		*ifp = m->m_pkthdr.rcvif;
+	struct ifnet		*ifp = ifp0;
 	struct sockaddr_mpls	*smpls;
 	struct sockaddr_mpls	 sa_mpls;
 	struct shim_hdr		*shim;
 	struct rtentry		*rt = rt0;
 	struct rt_mpls		*rt_mpls;
-	int			 i;
+	int			 i, error;
 
-	if (!mpls_enable) {
-		m_freem(m);
-		goto bad;
+	if (!mpls_enable || rt0 == NULL || (dst->sa_family != AF_INET &&
+	    dst->sa_family != AF_INET6 && dst->sa_family != AF_MPLS)) {
+		if (!ISSET(ifp->if_xflags, IFXF_MPLS))
+			return (ifp->if_output(ifp, m, dst, rt));
+		else
+			return (ifp->if_ll_output(ifp, m, dst, rt));
 	}
 
-	/* reset broadcast and multicast flags, this is a P2P tunnel */
-	m->m_flags &= ~(M_BCAST | M_MCAST);
+	/* need to calculate checksums now if necessary */
+	if (m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT | M_TCPV4_CSUM_OUT |
+	    M_UDPV4_CSUM_OUT))
+		mpls_do_cksum(m);
+
+	/* initialize sockaddr_mpls */
+	bzero(&sa_mpls, sizeof(sa_mpls));
+	smpls = &sa_mpls;
+	smpls->smpls_family = AF_MPLS;
+	smpls->smpls_len = sizeof(*smpls);
 
 	for (i = 0; i < mpls_inkloop; i++) {
-		if (rt == NULL) {
-			shim = mtod(m, struct shim_hdr *);
-
-			bzero(&sa_mpls, sizeof(sa_mpls));
-			smpls = &sa_mpls;
-			smpls->smpls_family = AF_MPLS;
-			smpls->smpls_len = sizeof(*smpls);
-			smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
-
-			rt = rtalloc1(smplstosa(smpls), RT_REPORT, 0);
-			if (rt == NULL) {
-				/* no entry for this label */
-#ifdef MPLS_DEBUG
-				printf("MPLS_DEBUG: label not found\n");
-#endif
-				m_freem(m);
-				goto bad;
-			}
-			rt->rt_use++;
-		}
-
 		rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
 		if (rt_mpls == NULL || (rt->rt_flags & RTF_MPLS) == 0) {
 			/* no MPLS information for this entry */
+			if (!ISSET(ifp->if_xflags, IFXF_MPLS)) {
 #ifdef MPLS_DEBUG
-			printf("MPLS_DEBUG: no MPLS information attached\n");
+				printf("MPLS_DEBUG: interface not mpls enabled\n");
 #endif
-			m_freem(m);
-			goto bad;
-		}
+				error = ENETUNREACH;
+				goto bad;
+			}
 
-		switch (rt_mpls->mpls_operation & (MPLS_OP_PUSH | MPLS_OP_POP |
-		    MPLS_OP_SWAP)) {
+			return (ifp->if_ll_output(ifp0, m, dst, rt0));
+		}
 
+		switch (rt_mpls->mpls_operation) {
 		case MPLS_OP_PUSH:
 			m = mpls_shim_push(m, rt_mpls);
 			break;
@@ -97,29 +98,45 @@ mpls_output(struct mbuf *m, struct rtent
 			m = mpls_shim_swap(m, rt_mpls);
 			break;
 		default:
-			m_freem(m);
+			error = EINVAL;
 			goto bad;
 		}
 
-		if (m == NULL)
+		if (m == NULL) {
+			error = ENOBUFS;
 			goto bad;
+		}
 
 		/* refetch label */
 		shim = mtod(m, struct shim_hdr *);
-		ifp = rt->rt_ifp;
+		/* mark first label with BOS flag */
+		if (rt0 == rt && dst->sa_family != AF_MPLS)
+			shim->shim_label |= MPLS_BOS_MASK;
 
+		ifp = rt->rt_ifp;
 		if (ifp != NULL)
 			break;
 
-		if (rt0 != rt)
-			RTFREE(rt);
+		shim = mtod(m, struct shim_hdr *);
+		smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
 
-		rt = NULL;
+		rt = rtalloc1(smplstosa(smpls), RT_REPORT, 0);
+		if (rt == NULL) {
+			/* no entry for this label */
+#ifdef MPLS_DEBUG
+			printf("MPLS_DEBUG: label %d not found\n",
+			    MPLS_LABEL_GET(shim->shim_label));
+#endif
+			error = EHOSTUNREACH;
+			goto bad;
+		}
+		rt->rt_use++;
+		rt->rt_refcnt--;
 	}
 
 	/* write back TTL */
 	shim->shim_label &= ~MPLS_TTL_MASK;
-	shim->shim_label |= MPLS_BOS_MASK | htonl(mpls_defttl);
+	shim->shim_label |= htonl(mpls_defttl);
 
 #ifdef MPLS_DEBUG
 	printf("MPLS: sending on %s outshim %x outlabel %d\n",
@@ -127,13 +144,42 @@ mpls_output(struct mbuf *m, struct rtent
 	    MPLS_LABEL_GET(rt_mpls->mpls_label));
 #endif
 
-	if (rt != rt0)
-		RTFREE(rt);
+	/* Output iface is not MPLS-enabled */
+	if (!ISSET(ifp->if_xflags, IFXF_MPLS)) {
+#ifdef MPLS_DEBUG
+		printf("MPLS_DEBUG: interface not mpls enabled\n");
+#endif
+		error = ENETUNREACH;
+		goto bad;
+	}
+
+	/* reset broadcast and multicast flags, this is a P2P tunnel */
+	m->m_flags &= ~(M_BCAST | M_MCAST);
 
-	return (m);
+	smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
+	return (ifp->if_ll_output(ifp, m, smplstosa(smpls), rt));
 bad:
-	if (rt != rt0)
-		RTFREE(rt);
+	if (m)
+		m_freem(m);
+	return (error);
+}
 
-	return (NULL);
+void
+mpls_do_cksum(struct mbuf *m)
+{
+#ifdef INET
+	struct ip *ip;
+	u_int16_t hlen;
+
+	if (m->m_pkthdr.csum_flags & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {
+		in_delayed_cksum(m);
+		m->m_pkthdr.csum_flags &= ~(M_UDPV4_CSUM_OUT|M_TCPV4_CSUM_OUT);
+	}
+	if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) {
+		ip = mtod(m, struct ip *);
+		hlen = ip->ip_hl << 2;
+		ip->ip_sum = in_cksum(m, hlen);
+		m->m_pkthdr.csum_flags &= ~M_IPV4_CSUM_OUT;
+	}
+#endif
 }
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
rdomain, mpe, ldpd, OpenBGPD and PF, Insan Praja SW, (Mon May 24, 3:23 am)
Re: rdomain, mpe, ldpd, OpenBGPD and PF, Claudio Jeker, (Mon May 24, 6:11 am)
Re: rdomain, mpe, ldpd, OpenBGPD and PF, Insan Praja SW, (Mon May 24, 6:25 am)