DPDK——TCP/UDP协议栈服务端实现(二)
文章目录
一、概述
在《DPDK——TCP/UDP协议栈服务端实现(一)》中已经讲述TCP/UDP简易协议栈项目的框架和基本信息,本篇文章要完成的工作如下:
- udp包结构设计,包括udp控制块结构、协议栈与应用层传输包结构
- ARP表处理
- UDP套接字函数实现
- UDP包处理
二、UDP数据结构
如下图所示,UDP的数据结构主要分为两个部分:数据传输块与控制块。
UDP控制块是在应用层创建socket的同时生成的,主要包含一个发送队列和接收队列,线程同步变量,以及相关参数,如下:
struct localhost {int fd;uint32_t localip; // ip --> macunsigned char localmac[RTE_ETHER_ADDR_LEN];uint16_t localport;unsigned char protocol;struct rte_ring *sndbuf;struct rte_ring *rcvbuf;struct localhost *prev; struct localhost *next;pthread_cond_t cond;pthread_mutex_t mutex;};
传输块作为协议栈向UDP应用通信的数据封装,DPDK协议栈在收到网卡发送的数据后,按照传输块的结构来封装数据,并发送至UDP控制块中的接收队列,结构如下:
struct offload { uint32_t sip;uint32_t dip;uint16_t sport;uint16_t dport; int protocol;unsigned char *data;uint16_t length;};
三、arp表处理
在网络传输中,ARP协议承担着“护送数据包最后一公里”的任务,因为在数据包到达局域网后,ip地址作为一个不可靠的标识,显然不能确保数据包具体送达目的主机,这个时候我们就要获知主机的MAC地址了。具体做法就是向局域网内同一个路由器下所有设备广播,内容就是“你们谁是ip为xxx.xxx.xxx.xxx的主机呀?看到消息请马上告知你的MAC地址,你的快递到了哈!”
所以,本项目也必须维护一个ip和mac映射的arp表,这样才能准确将数据包发给对方主机。
维护arp表的地方主要在两个地方:协议栈收到网卡数据包时存储ip和mac地址信息;协议栈发送数据包之前,查询arp表,若无mac地址,则先广播arp包。
主要函数如下:
int ng_arp_entry_insert(uint32_t ip, unsigned char *mac){ struct arp_table *pstTbl = arp_table_instance(); struct arp_entry *pstEntry = NULL; unsigned char *pstHwaddr = NULL; pstHwaddr = ng_get_dst_macaddr(ip); if(pstHwaddr == NULL) { pstEntry = rte_malloc("arp_entry", sizeof(struct arp_entry), 0);if (pstEntry) {memset(pstEntry, 0, sizeof(struct arp_entry));pstEntry->ip = ip;rte_memcpy(pstEntry->hwaddr, mac, RTE_ETHER_ADDR_LEN);pstEntry->type = 0;pthread_spin_lock(&pstTbl->spinlock);LL_ADD(pstEntry, pstTbl->entries);pstTbl->count ++;pthread_spin_unlock(&pstTbl->spinlock);} return 1; } return 0;}
四、UDP套接字函数实现
3.1 socket函数
该函数主要实现获取fd、创建控制块。
int nsocket(__attribute__((unused)) int domain, int type, __attribute__((unused)) int protocol){ int iFd; struct localhost *pstHost; pthread_cond_t pctCond = PTHREAD_COND_INITIALIZER; pthread_mutex_t pmtMutex = PTHREAD_MUTEX_INITIALIZER; iFd = get_fd_frombitmap(); if(type == SOCK_DGRAM) // udp { pstHost = rte_malloc("localhost", sizeof(struct localhost), 0); if(pstHost == NULL) { printf("[%s][%d]: rte_malloc fail!\n", __FUNCTION__, __LINE__); return -1; } memset(pstHost, 0x00, sizeof(struct localhost)); pstHost->fd = iFd; pstHost->protocol = IPPROTO_UDP; pstHost->rcvbuf = rte_ring_create("recv buffer", D_RING_SIZE, rte_socket_id(), RING_F_SP_ENQ | RING_F_SC_DEQ); if (pstHost->rcvbuf == NULL) { printf("[%s][%d]: rte_ring_create fail!\n", __FUNCTION__, __LINE__);rte_free(pstHost);return -1;} pstHost->sndbuf = rte_ring_create("send buffer", D_RING_SIZE, rte_socket_id(), RING_F_SP_ENQ | RING_F_SC_DEQ); if (pstHost->sndbuf == NULL) { printf("[%s][%d]: rte_ring_create fail!\n", __FUNCTION__, __LINE__); rte_ring_free(pstHost->rcvbuf);rte_free(pstHost);return -1;}rte_memcpy(&pstHost->cond, &pctCond, sizeof(pthread_cond_t));rte_memcpy(&pstHost->mutex, &pmtMutex, sizeof(pthread_mutex_t));LL_ADD(pstHost, g_pstHost); }return iFd;}
3.2 bind函数
bind函数的任务是将ip和端口信息绑定到socket函数创建的控制块结构当中。
int nbind(int sockfd, const struct sockaddr *addr, __attribute__((unused)) socklen_t addrlen){ void *info = NULL; info = get_hostinfo_fromfd(sockfd); if(info == NULL) return -1; struct localhost *pstHostInfo = (struct localhost *)info; if(pstHostInfo->protocol == IPPROTO_UDP) { const struct sockaddr_in *pstAddr = (const struct sockaddr_in *)addr;pstHostInfo->localport = pstAddr->sin_port;rte_memcpy(&pstHostInfo->localip, &pstAddr->sin_addr.s_addr, sizeof(uint32_t));rte_memcpy(pstHostInfo->localmac, &g_stCpuMac, RTE_ETHER_ADDR_LEN); } return 0;}
3.3 recvfrom函数
目前实现的recvfrom函数为阻塞式的,使用条件变量+互斥量等待接收队列中数据到来。
ssize_t nrecvfrom(int sockfd, void *buf, size_t len, __attribute__((unused)) int flags, struct sockaddr *src_addr, __attribute__((unused)) socklen_t *addrlen){ struct localhost *pstHostInfo = NULL; struct offload *pstOffLoad = NULL; struct sockaddr_in *pstAddr = NULL;unsigned char *pucPtr = NULL; int iLen = 0; int iRet = -1; pstHostInfo = (struct localhost *)get_hostinfo_fromfd(sockfd); if(pstHostInfo == NULL) return -1; pthread_mutex_lock(&pstHostInfo->mutex); while((iRet = rte_ring_mc_dequeue(pstHostInfo->rcvbuf, (void**)&pstOffLoad)) < 0) { pthread_cond_wait(&pstHostInfo->cond, &pstHostInfo->mutex); } pthread_mutex_unlock(&pstHostInfo->mutex); pstAddr = (struct sockaddr_in *)src_addr; pstAddr->sin_port = pstOffLoad->sport; rte_memcpy(&pstAddr->sin_addr.s_addr, &pstOffLoad->sip, sizeof(uint32_t)); if(len < pstOffLoad->length) { rte_memcpy(buf, pstOffLoad->data, len); pucPtr = rte_malloc("unsigned char *", pstOffLoad->length - len, 0);rte_memcpy(pucPtr, pstOffLoad->data + len, pstOffLoad->length - len);pstOffLoad->length -= len;rte_free(pstOffLoad->data);pstOffLoad->data = pucPtr;rte_ring_mp_enqueue(pstHostInfo->rcvbuf, pstOffLoad);return len; } iLen = pstOffLoad->length; rte_memcpy(buf, pstOffLoad->data, pstOffLoad->length); rte_free(pstOffLoad->data); rte_free(pstOffLoad); return iLen;}
3.4 sendto函数
sento函数则是将待发送数据封装成传输块,放入发送队列当中,交由协议栈发送至网卡。
ssize_t nsendto(int sockfd, const void *buf, size_t len, __attribute__((unused)) int flags, const struct sockaddr *dest_addr, __attribute__((unused)) socklen_t addrlen){ struct localhost *pstHostInfo = NULL; struct offload *pstOffLoad = NULL; const struct sockaddr_in *pstAddr = (const struct sockaddr_in *)dest_addr; pstHostInfo = (struct localhost *)get_hostinfo_fromfd(sockfd); if(pstHostInfo == NULL) return -1; pstOffLoad = rte_malloc("offload", sizeof(struct offload), 0);if (pstOffLoad == NULL) return -1; pstOffLoad->dip = pstAddr->sin_addr.s_addr;pstOffLoad->dport = pstAddr->sin_port;pstOffLoad->sip = pstHostInfo->localip;pstOffLoad->sport = pstHostInfo->localport;pstOffLoad->length = len; /* struct in_addr addr;addr.s_addr = pstOffLoad->dip;printf("nsendto ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstOffLoad->dport)); */ pstOffLoad->data = rte_malloc("unsigned char *", len, 0);if (pstOffLoad->data == NULL) {rte_free(pstOffLoad);return -1;}rte_memcpy(pstOffLoad->data, buf, len);rte_ring_mp_enqueue(pstHostInfo->sndbuf, pstOffLoad);return len;}
3.5 close函数
close函数则是将创建的控制块进行释放。
int nclose(int fd){ void *info = NULL; info = (struct localhost *)get_hostinfo_fromfd(fd); if(info == NULL) return -1; struct localhost *pstHostInfo = (struct localhost *)info; if(pstHostInfo->protocol == IPPROTO_UDP) { LL_REMOVE(pstHostInfo, g_pstHost); if (pstHostInfo->rcvbuf)rte_ring_free(pstHostInfo->rcvbuf);if (pstHostInfo->sndbuf) rte_ring_free(pstHostInfo->sndbuf);rte_free(pstHostInfo);set_fd_frombitmap(fd); } return 0;}
五、UDP包处理
5.1 协议栈接收
协议栈一直接收来自网卡的数据包,我们需要筛选出需要的协议数据,这主要是通过网络层中的IP数据头来分析,代码如下:
int pkt_process(void *arg){ struct rte_mempool *pstMbufPool; int iRxNum;int i;struct rte_mbuf *pstMbuf[32];struct rte_ether_hdr *pstEthHdr; struct rte_ipv4_hdr *pstIpHdr; pstMbufPool = (struct rte_mempool *)arg; while(1) { iRxNum = rte_ring_mc_dequeue_burst(g_pstRingIns->pstInRing, (void**)pstMbuf, D_BURST_SIZE, NULL); if(iRxNum <= 0)continue; for(i = 0; i < iRxNum; ++i) { pstEthHdr = rte_pktmbuf_mtod_offset(pstMbuf[i], struct rte_ether_hdr *, 0); if (pstEthHdr->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) //IPv4: 0800 { pstIpHdr = (struct rte_ipv4_hdr *)(pstEthHdr + 1); // 维护一个arp表ng_arp_entry_insert(pstIpHdr->src_addr, pstEthHdr->s_addr.addr_bytes); if(pstIpHdr->next_proto_id == IPPROTO_UDP) // udp { // udp process udp_process(pstMbuf[i]); } else if(pstIpHdr->next_proto_id == IPPROTO_TCP) // tcp { printf("tcp_process ---\n");tcp_process(pstMbuf[i]); } } } // to send udp_out(pstMbufPool); tcp_out(pstMbufPool); } return 0;}
其中,对于UDP数据包而言,如果应用层已经创建了控制块,那么协议栈这边主要是组装好数据,再发送到控制块中的接收队列,并通知阻塞在recvfrom函数中的应用层进行处理。
int udp_process(struct rte_mbuf *pstUdpMbuf) { struct rte_ipv4_hdr *pstIpHdr; struct rte_udp_hdr *pstUdpHdr; struct localhost *pstHost; struct offload *pstOffLoad; pstIpHdr = rte_pktmbuf_mtod_offset(pstUdpMbuf, struct rte_ipv4_hdr *, sizeof(struct rte_ether_hdr));pstUdpHdr = (struct rte_udp_hdr *)(pstIpHdr + 1); struct in_addr addr;addr.s_addr = pstIpHdr->src_addr;printf("udp_process ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstUdpHdr->src_port)); pstHost = get_hostinfo_fromip_port(pstIpHdr->dst_addr, pstUdpHdr->dst_port, pstIpHdr->next_proto_id); if (pstHost == NULL) {rte_pktmbuf_free(pstUdpMbuf);return -3;} pstOffLoad = rte_malloc("offload", sizeof(struct offload), 0);if (pstOffLoad == NULL) {rte_pktmbuf_free(pstUdpMbuf);return -1;} pstOffLoad->dip = pstIpHdr->dst_addr;pstOffLoad->sip = pstIpHdr->src_addr;pstOffLoad->sport = pstUdpHdr->src_port;pstOffLoad->dport = pstUdpHdr->dst_port; pstOffLoad->protocol = IPPROTO_UDP;pstOffLoad->length = ntohs(pstUdpHdr->dgram_len); pstOffLoad->data = rte_malloc("unsigned char*", pstOffLoad->length - sizeof(struct rte_udp_hdr), 0);if (pstOffLoad->data == NULL) {rte_pktmbuf_free(pstUdpMbuf);rte_free(pstOffLoad);return -2;} rte_memcpy(pstOffLoad->data, (unsigned char *)(pstUdpHdr+1), pstOffLoad->length - sizeof(struct rte_udp_hdr));rte_ring_mp_enqueue(pstHost->rcvbuf, pstOffLoad); // recv bufferpthread_mutex_lock(&pstHost->mutex);pthread_cond_signal(&pstHost->cond);pthread_mutex_unlock(&pstHost->mutex);rte_pktmbuf_free(pstUdpMbuf); return 0;}
5.2 协议栈发送
协议栈遍历应用层控制块,如果发送队列中有数据,则协议栈的任务就是准确的发送给网卡,这包括广播arp包、组装标准UDP数据包,代码如下:
int udp_out(struct rte_mempool *pstMbufPool) { struct localhost *pstHost; for(pstHost = g_pstHost; pstHost != NULL; pstHost = pstHost->next) { struct offload *pstOffLoad = NULL; int iSendCnt = rte_ring_mc_dequeue(pstHost->sndbuf, (void **)&pstOffLoad); if(iSendCnt < 0) continue; struct in_addr addr;addr.s_addr = pstOffLoad->dip;printf("udp_out ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstOffLoad->dport)); unsigned char *dstmac = ng_get_dst_macaddr(pstOffLoad->dip); // 查询对端mac地址if (dstmac == NULL) // 先广播发个arp包确定对端mac地址 {struct rte_mbuf *pstArpbuf = ng_send_arp(pstMbufPool, RTE_ARP_OP_REQUEST, g_aucDefaultArpMac, pstOffLoad->sip, pstOffLoad->dip);rte_ring_mp_enqueue_burst(g_pstRingIns->pstOutRing, (void **)&pstArpbuf, 1, NULL);rte_ring_mp_enqueue(pstHost->sndbuf, pstOffLoad); // 将取出的udp数据再次写入队列} else {struct rte_mbuf *pstUdpbuf = ng_udp_pkt(pstMbufPool, pstOffLoad->sip, pstOffLoad->dip,pstOffLoad->sport, pstOffLoad->dport, pstHost->localmac,dstmac, pstOffLoad->data, pstOffLoad->length);rte_ring_mp_enqueue_burst(g_pstRingIns->pstOutRing, (void **)&pstUdpbuf, 1, NULL);if (pstOffLoad->data != NULL)rte_free(pstOffLoad->data);rte_free(pstOffLoad);} } return 0;}
5.3 项目地址及相关文章
项目地址:https://github.com/hjlogzw/DPDK-TCP-UDP_Protocol_Stack
DPDK——TCP/UDP协议栈服务端实现(一)
后台服务器