1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; u32 hash; int stats_last_writer; /* NUMA-node id of the last writer on * 'stats[0]'. */ struct sw_flow_key key; //流表匹配时候用到的key值 struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; struct flow_stats __rcu *stats; /* One for each NUMA node. First one * is allocated at flow creation time, * the rest are allocated on demand * while holding the 'stats[0].lock'. */ }; struct sw_flow_key { u8 tun_opts[255]; u8 tun_opts_len; struct ovs_key_ipv4_tunnel tun_key; //该流表对应的tunnel信息 ....... } struct ovs_key_ipv4_tunnel { __be64 tun_id; __be32 nsp; /* it contains (nsp - 24 bits | nsi - 8 bits) here */ __be32 nshc1; /* NSH context headers */ __be32 nshc2; __be32 nshc3; __be32 nshc4; __be32 ipv4_src; __be32 ipv4_dst; __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; } __packed __aligned(4); /* Minimize padding. */ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | struct nsh_base { #if defined(__LITTLE_ENDIAN_BITFIELD) //小端序 __u8 res1:4; __u8 c:1; __u8 o:1; __u8 ver:2; __u8 len:6; __u8 res2:2; #elif defined(__BIG_ENDIAN_BITFIELD) //大端序 __u8 ver:2; __u8 o:1; __u8 c:1; __u8 res1:4; __u8 res2:2; __u8 len:6; #else #error "Bitfield Endianess not defined." #endif __u8 mdtype; //mdtype决定了ctx的格式 __u8 proto; //next protocol union { struct { __u8 svc_path[3]; //spi __u8 svc_idx; //si }; __be32 b2; }; }; |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | static struct vport *vxlan_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); struct nlattr *options = parms->options;//vxlan struct vxlan_port *vxlan_port; struct vxlan_sock *vs; struct vport *vport; struct nlattr *a; u16 dst_port; int err; if (!options) { //option数据不能为空 err = -EINVAL; goto error; } a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); //从option中提取dst port if (a && nla_len(a) == sizeof(u16)) { dst_port = nla_get_u16(a); } else { /* Require destination port from userspace. */ err = -EINVAL; goto error; } vport = ovs_vport_alloc(sizeof(struct vxlan_port), //初始化vport,&ovs_vxlan_vport_ops是 vport相关的操作,此处为默认的几种,可以自己添加 &ovs_vxlan_vport_ops, parms); /*struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, const struct vport_parms *parms)*/ if (IS_ERR(vport)) //初始化是否成功 return vport; vxlan_port = vxlan_vport(vport); //为vxlan_vport分配私有数据区 strncpy(vxlan_port->name, parms->name, IFNAMSIZ); vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false); //为vport创建 vxlan_sock if (IS_ERR(vs)) { //判断是否创建成功 ovs_vport_free(vport); return (void *)vs; } vxlan_port->vs = vs; return vport; error: return ERR_PTR(err); } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, bool no_share, bool ipv6) { return vxlan_socket_create(net, port, rcv, data); } static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data) { struct vxlan_sock *vs; //最后返回的vs struct sock *sk; struct sockaddr_in vxlan_addr = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = port, }; int rc; vs = kmalloc(sizeof(*vs), GFP_KERNEL); //为vs分配内存 if (!vs) { pr_debug("memory alocation failure\n"); return ERR_PTR(-ENOMEM); } INIT_WORK(&vs->del_work, vxlan_del_work); //调用linux内核中工作队列,去执行del工作 /* Create UDP socket for encapsulation receive. */ rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vs->sock);//创建用于接收udp包的socket if (rc < 0) { pr_debug("UDP socket create failed\n"); kfree(vs); return ERR_PTR(rc); } /* Put in proper namespace */ sk = vs->sock->sk; sk_change_net(sk, net); rc = kernel_bind(vs->sock, (struct sockaddr *) &vxlan_addr, //将刚刚创建的socket和正在创建的vxlan绑定 , 其中vs->sk指向该socket kernel_bind为Linux内核里面的绑定函数 sizeof(vxlan_addr)); if (rc < 0) { pr_debug("bind for UDP socket %pI4:%u (%d)\n", &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc); sk_release_kernel(sk); kfree(vs); return ERR_PTR(rc); } vs->rcv = rcv; vs->data = data; /* Disable multicast loopback */ inet_sk(sk)->mc_loop = 0; rcu_assign_sk_user_data(vs->sock->sk, vs); /* Mark socket as an encapsulation socket. */ udp_sk(sk)->encap_type = 1; //将sk转换成udp内部使用的sk类型, udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; //绑定vxlan收到udp包后的处理函数 udp_encap_enable; return vs; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | /* Callback from net/ipv4/udp.c to receive packets */ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct vxlan_sock *vs; struct vxlanhdr *vxh; struct udphdr *udp; bool isnsh = false; __be32 nsp = 0; __be32 c1 = 0; __be32 c2 = 0; __be32 c3 = 0; __be32 c4 = 0; udp = (struct udphdr *)udp_hdr(skb); if (udp->dest == htons(NSH_DST_PORT)) //根据udp目的端口(6633)来判断是否含有nsh isnsh = true; //htons HBO to NBO /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, isnsh ? NSH_HLEN : VXLAN_HLEN)) // 检查skb长度是否能够满足把后面的头部去掉 如果不够返回-1 报错 goto error; /* Return packets with reserved bits set */ vxh = vxlan_hdr(skb); //提取vxlanheader if (vxh->vx_flags != htonl(VXLAN_FLAGS) || (vxh->vx_vni & htonl(0xff))) { pr_warn("invalid vxlan flags=%#x vni=%#x\n", ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); goto error; } if (isnsh) { struct nshhdr *nsh = nsh_hdr(skb); if (unlikely(nsh->b.svc_idx == 0 || nsh->b.ver || //如果 si == 0 sfc结束,直接drop nsh->b.len != 6 || nsh->b.mdtype != 0x01 || nsh->b.proto != NSH_P_ETHERNET)) { pr_warn("NSH service index reached zero or not supported\n"); goto drop; } nsp = nsh->b.b2; /* same as svc_path | htonl(svc_idx) */ c1 = nsh->c.c1; /* NSH Contexts */ c2 = nsh->c.c2; c3 = nsh->c.c3; c4 = nsh->c.c4; } if (iptunnel_pull_header(skb, isnsh ? NSH_HLEN : VXLAN_HLEN, htons(ETH_P_TEB))) goto drop; vs = rcu_dereference_sk_user_data(sk); if (!vs) goto drop; vs->rcv(vs, skb, vxh->vx_vni, nsp, c1, c2, c3, c3); //调用vs->rcv,即 return 0; drop: /* Consume bad packet */ kfree_skb(skb); return 0; error: /* Return non vxlan pkt */ return 1; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp, __be32 nshc1, __be32 nshc2, __be32 nshc3, __be32 nshc4) { bool isnsh = (dst_port == htons(NSH_DST_PORT)); struct vxlanhdr *vxh; struct udphdr *uh; int min_headroom; int err; min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + (isnsh ? NSH_HLEN : VXLAN_HLEN) + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);//最小长度 /* Need space for new headers (invalidates iph ptr) */ err = skb_cow_head(skb, min_headroom); //验证skb长度是否足够 if (unlikely(err)) return err; if (vlan_tx_tag_present(skb)) { if (unlikely(!__vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)))) return -ENOMEM; vlan_set_tci(skb, 0); } skb_reset_inner_headers(skb); if (isnsh) { struct nshhdr *nsh; uint8_t nsi = ntohl(nsp) & NSH_M_NSI; nsh = (struct nshhdr *) __skb_push(skb, sizeof(*nsh));//把nsh头push到skb中 nshheader封装 memset(&nsh->b, 0, sizeof nsh->b); //初始化nsh_b nsh->b.len = 6; nsh->b.mdtype = NSH_M_TYPE1; nsh->b.proto = NSH_P_ETHERNET; /* b2 should precede svc_idx, else svc_idx will be zero */ nsh->b.b2 = nsp & htonl(NSH_M_NSP); nsh->b.svc_idx = nsi ? nsi : 0x01; nsh->c.c1 = nshc1; nsh->c.c2 = nshc2; nsh->c.c3 = nshc3; nsh->c.c4 = nshc4; } vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); //vxlanheader封装 vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; __skb_push(skb, sizeof(*uh)); //udp header封装 skb_reset_transport_header(skb); uh = udp_hdr(skb); uh->dest = dst_port; uh->source = src_port; uh->len = htons(skb->len); uh->check = 0; vxlan_set_owner(vs->sock->sk, skb); //把skb 绑定的到某个vxlan的sk, 实际是为vxlan设置skb err = handle_offloads(skb); if (err) return err; return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, //iptunnel封装 tos, ttl, df, false); } |
欢迎光临 51学通信论坛2017新版 (http://bbs.51xuetongxin.com/) | Powered by Discuz! X3 |