> 文档中心 > DPDK——TCP/UDP协议栈服务端实现(二)

DPDK——TCP/UDP协议栈服务端实现(二)

文章目录

    • 一、概述
    • 二、UDP数据结构
    • 三、arp表处理
    • 四、UDP套接字函数实现
      • 3.1 socket函数
      • 3.2 bind函数
      • 3.3 recvfrom函数
      • 3.4 sendto函数
      • 3.5 close函数
    • 五、UDP包处理
      • 5.1 协议栈接收
      • 5.2 协议栈发送
      • 5.3 项目地址及相关文章

一、概述

在《DPDK——TCP/UDP协议栈服务端实现(一)》中已经讲述TCP/UDP简易协议栈项目的框架和基本信息,本篇文章要完成的工作如下:

  1. udp包结构设计,包括udp控制块结构、协议栈与应用层传输包结构
  2. ARP表处理
  3. UDP套接字函数实现
  4. UDP包处理

二、UDP数据结构

如下图所示,UDP的数据结构主要分为两个部分:数据传输块与控制块。
DPDK——TCP/UDP协议栈服务端实现(二)

UDP控制块是在应用层创建socket的同时生成的,主要包含一个发送队列和接收队列,线程同步变量,以及相关参数,如下:

struct localhost {int fd;uint32_t localip; // ip --> macunsigned char localmac[RTE_ETHER_ADDR_LEN];uint16_t localport;unsigned char protocol;struct rte_ring *sndbuf;struct rte_ring *rcvbuf;struct localhost *prev; struct localhost *next;pthread_cond_t cond;pthread_mutex_t mutex;};

传输块作为协议栈向UDP应用通信的数据封装,DPDK协议栈在收到网卡发送的数据后,按照传输块的结构来封装数据,并发送至UDP控制块中的接收队列,结构如下:

struct offload { uint32_t sip;uint32_t dip;uint16_t sport;uint16_t dport; int protocol;unsigned char *data;uint16_t length;};

三、arp表处理

在网络传输中,ARP协议承担着“护送数据包最后一公里”的任务,因为在数据包到达局域网后,ip地址作为一个不可靠的标识,显然不能确保数据包具体送达目的主机,这个时候我们就要获知主机的MAC地址了。具体做法就是向局域网内同一个路由器下所有设备广播,内容就是“你们谁是ip为xxx.xxx.xxx.xxx的主机呀?看到消息请马上告知你的MAC地址,你的快递到了哈!”

所以,本项目也必须维护一个ip和mac映射的arp表,这样才能准确将数据包发给对方主机。

维护arp表的地方主要在两个地方:协议栈收到网卡数据包时存储ip和mac地址信息;协议栈发送数据包之前,查询arp表,若无mac地址,则先广播arp包。

主要函数如下:

int ng_arp_entry_insert(uint32_t ip, unsigned char *mac){    struct arp_table *pstTbl = arp_table_instance();    struct arp_entry *pstEntry = NULL;    unsigned char *pstHwaddr = NULL;    pstHwaddr = ng_get_dst_macaddr(ip);    if(pstHwaddr == NULL)    { pstEntry = rte_malloc("arp_entry", sizeof(struct arp_entry), 0);if (pstEntry)  {memset(pstEntry, 0, sizeof(struct arp_entry));pstEntry->ip = ip;rte_memcpy(pstEntry->hwaddr, mac, RTE_ETHER_ADDR_LEN);pstEntry->type = 0;pthread_spin_lock(&pstTbl->spinlock);LL_ADD(pstEntry, pstTbl->entries);pstTbl->count ++;pthread_spin_unlock(&pstTbl->spinlock);} return 1;    }    return 0;}

四、UDP套接字函数实现

3.1 socket函数

该函数主要实现获取fd、创建控制块。

int nsocket(__attribute__((unused)) int domain, int type, __attribute__((unused))  int protocol){    int iFd;    struct localhost *pstHost;    pthread_cond_t pctCond = PTHREAD_COND_INITIALIZER;    pthread_mutex_t pmtMutex = PTHREAD_MUTEX_INITIALIZER;    iFd = get_fd_frombitmap();    if(type == SOCK_DGRAM) // udp    { pstHost = rte_malloc("localhost", sizeof(struct localhost), 0); if(pstHost == NULL) {     printf("[%s][%d]: rte_malloc fail!\n", __FUNCTION__, __LINE__);     return -1; } memset(pstHost, 0x00, sizeof(struct localhost)); pstHost->fd = iFd; pstHost->protocol = IPPROTO_UDP; pstHost->rcvbuf = rte_ring_create("recv buffer", D_RING_SIZE, rte_socket_id(), RING_F_SP_ENQ | RING_F_SC_DEQ); if (pstHost->rcvbuf == NULL)  {     printf("[%s][%d]: rte_ring_create fail!\n", __FUNCTION__, __LINE__);rte_free(pstHost);return -1;} pstHost->sndbuf = rte_ring_create("send buffer", D_RING_SIZE, rte_socket_id(), RING_F_SP_ENQ | RING_F_SC_DEQ); if (pstHost->sndbuf == NULL)  {     printf("[%s][%d]: rte_ring_create fail!\n", __FUNCTION__, __LINE__);     rte_ring_free(pstHost->rcvbuf);rte_free(pstHost);return -1;}rte_memcpy(&pstHost->cond, &pctCond, sizeof(pthread_cond_t));rte_memcpy(&pstHost->mutex, &pmtMutex, sizeof(pthread_mutex_t));LL_ADD(pstHost, g_pstHost);    }return iFd;}

3.2 bind函数

bind函数的任务是将ip和端口信息绑定到socket函数创建的控制块结构当中。

int nbind(int sockfd, const struct sockaddr *addr, __attribute__((unused))  socklen_t addrlen){    void *info = NULL;    info = get_hostinfo_fromfd(sockfd);    if(info == NULL)  return -1;    struct localhost *pstHostInfo = (struct localhost *)info;    if(pstHostInfo->protocol == IPPROTO_UDP)    { const struct sockaddr_in *pstAddr = (const struct sockaddr_in *)addr;pstHostInfo->localport = pstAddr->sin_port;rte_memcpy(&pstHostInfo->localip, &pstAddr->sin_addr.s_addr, sizeof(uint32_t));rte_memcpy(pstHostInfo->localmac, &g_stCpuMac, RTE_ETHER_ADDR_LEN);    }    return 0;}

3.3 recvfrom函数

目前实现的recvfrom函数为阻塞式的,使用条件变量+互斥量等待接收队列中数据到来。

ssize_t nrecvfrom(int sockfd, void *buf, size_t len, __attribute__((unused))  int flags,   struct sockaddr *src_addr, __attribute__((unused))  socklen_t *addrlen){    struct localhost *pstHostInfo = NULL;    struct offload *pstOffLoad = NULL;    struct sockaddr_in *pstAddr = NULL;unsigned char *pucPtr = NULL;    int iLen = 0;    int iRet = -1;    pstHostInfo = (struct localhost *)get_hostinfo_fromfd(sockfd);    if(pstHostInfo == NULL)  return -1; pthread_mutex_lock(&pstHostInfo->mutex);    while((iRet = rte_ring_mc_dequeue(pstHostInfo->rcvbuf, (void**)&pstOffLoad)) < 0)    { pthread_cond_wait(&pstHostInfo->cond, &pstHostInfo->mutex);    }    pthread_mutex_unlock(&pstHostInfo->mutex);    pstAddr = (struct sockaddr_in *)src_addr;    pstAddr->sin_port = pstOffLoad->sport;    rte_memcpy(&pstAddr->sin_addr.s_addr, &pstOffLoad->sip, sizeof(uint32_t));    if(len < pstOffLoad->length)    { rte_memcpy(buf, pstOffLoad->data, len); pucPtr = rte_malloc("unsigned char *", pstOffLoad->length - len, 0);rte_memcpy(pucPtr, pstOffLoad->data + len, pstOffLoad->length - len);pstOffLoad->length -= len;rte_free(pstOffLoad->data);pstOffLoad->data = pucPtr;rte_ring_mp_enqueue(pstHostInfo->rcvbuf, pstOffLoad);return len;    }    iLen = pstOffLoad->length;    rte_memcpy(buf, pstOffLoad->data, pstOffLoad->length); rte_free(pstOffLoad->data);    rte_free(pstOffLoad); return iLen;}   

3.4 sendto函数

sento函数则是将待发送数据封装成传输块,放入发送队列当中,交由协议栈发送至网卡。

ssize_t nsendto(int sockfd, const void *buf, size_t len, __attribute__((unused))  int flags, const struct sockaddr *dest_addr, __attribute__((unused))  socklen_t addrlen){    struct localhost *pstHostInfo = NULL;    struct offload *pstOffLoad = NULL;    const struct sockaddr_in *pstAddr = (const struct sockaddr_in *)dest_addr;    pstHostInfo = (struct localhost *)get_hostinfo_fromfd(sockfd);    if(pstHostInfo == NULL)  return -1;    pstOffLoad = rte_malloc("offload", sizeof(struct offload), 0);if (pstOffLoad == NULL)  return -1;    pstOffLoad->dip = pstAddr->sin_addr.s_addr;pstOffLoad->dport = pstAddr->sin_port;pstOffLoad->sip = pstHostInfo->localip;pstOffLoad->sport = pstHostInfo->localport;pstOffLoad->length = len;    /*    struct in_addr addr;addr.s_addr = pstOffLoad->dip;printf("nsendto ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstOffLoad->dport));    */ pstOffLoad->data = rte_malloc("unsigned char *", len, 0);if (pstOffLoad->data == NULL) {rte_free(pstOffLoad);return -1;}rte_memcpy(pstOffLoad->data, buf, len);rte_ring_mp_enqueue(pstHostInfo->sndbuf, pstOffLoad);return len;}

3.5 close函数

close函数则是将创建的控制块进行释放。

int nclose(int fd){    void *info = NULL;    info = (struct localhost *)get_hostinfo_fromfd(fd);    if(info == NULL)  return -1;    struct localhost *pstHostInfo = (struct localhost *)info;    if(pstHostInfo->protocol == IPPROTO_UDP)    { LL_REMOVE(pstHostInfo, g_pstHost); if (pstHostInfo->rcvbuf)rte_ring_free(pstHostInfo->rcvbuf);if (pstHostInfo->sndbuf) rte_ring_free(pstHostInfo->sndbuf);rte_free(pstHostInfo);set_fd_frombitmap(fd);    }    return 0;}

五、UDP包处理

5.1 协议栈接收

协议栈一直接收来自网卡的数据包,我们需要筛选出需要的协议数据,这主要是通过网络层中的IP数据头来分析,代码如下:

int pkt_process(void *arg){    struct rte_mempool *pstMbufPool;    int iRxNum;int i;struct rte_mbuf *pstMbuf[32];struct rte_ether_hdr *pstEthHdr;    struct rte_ipv4_hdr *pstIpHdr;    pstMbufPool = (struct rte_mempool *)arg;    while(1)    { iRxNum = rte_ring_mc_dequeue_burst(g_pstRingIns->pstInRing, (void**)pstMbuf, D_BURST_SIZE, NULL);  if(iRxNum <= 0)continue;  for(i = 0; i < iRxNum; ++i) {     pstEthHdr = rte_pktmbuf_mtod_offset(pstMbuf[i], struct rte_ether_hdr *, 0);     if (pstEthHdr->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))   //IPv4: 0800      {  pstIpHdr = (struct rte_ipv4_hdr *)(pstEthHdr + 1);  // 维护一个arp表ng_arp_entry_insert(pstIpHdr->src_addr, pstEthHdr->s_addr.addr_bytes);  if(pstIpHdr->next_proto_id == IPPROTO_UDP) // udp   {      // udp process      udp_process(pstMbuf[i]);  }  else if(pstIpHdr->next_proto_id == IPPROTO_TCP)  // tcp  {      printf("tcp_process ---\n");tcp_process(pstMbuf[i]);  }     }    } // to send udp_out(pstMbufPool); tcp_out(pstMbufPool);    }    return 0;}

其中,对于UDP数据包而言,如果应用层已经创建了控制块,那么协议栈这边主要是组装好数据,再发送到控制块中的接收队列,并通知阻塞在recvfrom函数中的应用层进行处理。

int udp_process(struct rte_mbuf *pstUdpMbuf) {    struct rte_ipv4_hdr *pstIpHdr;    struct rte_udp_hdr *pstUdpHdr;    struct localhost *pstHost;    struct offload *pstOffLoad;    pstIpHdr = rte_pktmbuf_mtod_offset(pstUdpMbuf, struct rte_ipv4_hdr *, sizeof(struct rte_ether_hdr));pstUdpHdr = (struct rte_udp_hdr *)(pstIpHdr + 1);    struct in_addr addr;addr.s_addr = pstIpHdr->src_addr;printf("udp_process ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstUdpHdr->src_port));    pstHost = get_hostinfo_fromip_port(pstIpHdr->dst_addr, pstUdpHdr->dst_port, pstIpHdr->next_proto_id);    if (pstHost == NULL)     {rte_pktmbuf_free(pstUdpMbuf);return -3;}     pstOffLoad = rte_malloc("offload", sizeof(struct offload), 0);if (pstOffLoad == NULL)     {rte_pktmbuf_free(pstUdpMbuf);return -1;}    pstOffLoad->dip = pstIpHdr->dst_addr;pstOffLoad->sip = pstIpHdr->src_addr;pstOffLoad->sport = pstUdpHdr->src_port;pstOffLoad->dport = pstUdpHdr->dst_port;    pstOffLoad->protocol = IPPROTO_UDP;pstOffLoad->length = ntohs(pstUdpHdr->dgram_len);    pstOffLoad->data = rte_malloc("unsigned char*", pstOffLoad->length - sizeof(struct rte_udp_hdr), 0);if (pstOffLoad->data == NULL)     {rte_pktmbuf_free(pstUdpMbuf);rte_free(pstOffLoad);return -2;}    rte_memcpy(pstOffLoad->data, (unsigned char *)(pstUdpHdr+1), pstOffLoad->length - sizeof(struct rte_udp_hdr));rte_ring_mp_enqueue(pstHost->rcvbuf, pstOffLoad);  // recv bufferpthread_mutex_lock(&pstHost->mutex);pthread_cond_signal(&pstHost->cond);pthread_mutex_unlock(&pstHost->mutex);rte_pktmbuf_free(pstUdpMbuf);    return 0;}

5.2 协议栈发送

协议栈遍历应用层控制块,如果发送队列中有数据,则协议栈的任务就是准确的发送给网卡,这包括广播arp包、组装标准UDP数据包,代码如下:

int udp_out(struct rte_mempool *pstMbufPool) {    struct localhost *pstHost;    for(pstHost = g_pstHost; pstHost != NULL; pstHost = pstHost->next)    { struct offload *pstOffLoad = NULL; int iSendCnt = rte_ring_mc_dequeue(pstHost->sndbuf, (void **)&pstOffLoad); if(iSendCnt < 0)      continue;  struct in_addr addr;addr.s_addr = pstOffLoad->dip;printf("udp_out ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstOffLoad->dport)); unsigned char *dstmac = ng_get_dst_macaddr(pstOffLoad->dip); // 查询对端mac地址if (dstmac == NULL)  // 先广播发个arp包确定对端mac地址 {struct rte_mbuf *pstArpbuf = ng_send_arp(pstMbufPool, RTE_ARP_OP_REQUEST, g_aucDefaultArpMac, pstOffLoad->sip, pstOffLoad->dip);rte_ring_mp_enqueue_burst(g_pstRingIns->pstOutRing, (void **)&pstArpbuf, 1, NULL);rte_ring_mp_enqueue(pstHost->sndbuf, pstOffLoad); // 将取出的udp数据再次写入队列}  else  {struct rte_mbuf *pstUdpbuf = ng_udp_pkt(pstMbufPool, pstOffLoad->sip, pstOffLoad->dip,pstOffLoad->sport, pstOffLoad->dport, pstHost->localmac,dstmac, pstOffLoad->data, pstOffLoad->length);rte_ring_mp_enqueue_burst(g_pstRingIns->pstOutRing, (void **)&pstUdpbuf, 1, NULL);if (pstOffLoad->data != NULL)rte_free(pstOffLoad->data);rte_free(pstOffLoad);}    }    return 0;}

5.3 项目地址及相关文章

项目地址:https://github.com/hjlogzw/DPDK-TCP-UDP_Protocol_Stack
DPDK——TCP/UDP协议栈服务端实现(一)
后台服务器