> 文档中心 > linux内核网络收包过程(一)

linux内核网络收包过程(一)

目录

网络收包过程

创建ksoftirqd内核线程

网络子系统初始化

协议栈注册

网卡驱动初始化

启动网卡


网络收包过程

1,数据帧从外部网络到达网卡

2,网卡把帧使用DMA方式搬到内存

3,硬中断通知CPU

4,CPU响应硬中断,通知软中断处理数据

5,ksoftirqd内核进程处理软中断,调用网卡驱动注册的poll函数开始收包

6,帧从Ring buffer上取出

7,协议层处理网络帧将数据data放到socket接收队列中


创建ksoftirqd内核线程

软中断在内核线程ksoftirqd中进行 [kernel/softirq.c]

static struct smp_hotplug_thread softirq_threads = {.store= &ksoftirqd,.thread_should_run= ksoftirqd_should_run,.thread_fn= run_ksoftirqd,.thread_comm= "ksoftirqd/%u",};static __init int spawn_ksoftirqd(void){cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,  takeover_tasklets);BUG_ON(smpboot_register_percpu_thread(&softirq_threads));return 0;}early_initcall(spawn_ksoftirqd);

ksoftirqd被创建出来以后,进入循环函数ksoftirqd_should_run与run_ksoftirqd,判断有没有软中断需要被处理。


网络子系统初始化

static int __init net_dev_init(void){for_each_possible_cpu(i) {struct work_struct *flush = per_cpu_ptr(&flush_works, i);struct softnet_data *sd = &per_cpu(softnet_data, i);skb_queue_head_init(&sd->input_pkt_queue);skb_queue_head_init(&sd->process_queue);}...open_softirq(NET_TX_SOFTIRQ, net_tx_action);open_softirq(NET_RX_SOFTIRQ, net_rx_action);}subsys_initcall(net_dev_init);

open_softirq注册软中断NET_TX_SOFTIRQ发送与NET_RX_SOFTIRQ接收。

void open_softirq(int nr, void (*action)(struct softirq_action *)){softirq_vec[nr].action = action;}

softirq_vec中断向量,根据软中断枚举,注册相应的函数。


协议栈注册

网络层ip协议,传输层tcp/udp协议

static struct packet_type ip_packet_type __read_mostly = {.type = cpu_to_be16(ETH_P_IP),.func = ip_rcv,.list_func = ip_list_rcv,};static struct net_protocol tcp_protocol = {.early_demux=tcp_v4_early_demux,.early_demux_handler =  tcp_v4_early_demux,.handler=tcp_v4_rcv,.err_handler=tcp_v4_err,.no_policy=1,.netns_ok=1,.icmp_strict_tag_validation = 1,};static struct net_protocol udp_protocol = {.early_demux =udp_v4_early_demux,.early_demux_handler =udp_v4_early_demux,.handler =udp_rcv,.err_handler =udp_err,.no_policy =1,.netns_ok =1,};static int __init inet_init(void){    //注册tcp_portrc = proto_register(&tcp_prot, 1);    //注册udp_portrc = proto_register(&udp_prot, 1);...    //注册udp协议if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)pr_crit("%s: Cannot add UDP protocol\n", __func__);    //注册tcp协议if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)pr_crit("%s: Cannot add TCP protocol\n", __func__);dev_add_pack(&ip_packet_type);}

 先看一下inet_add_protocol

struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; inet_add_protocol(const struct net_protocol *prot, unsigned char protocol){...return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],NULL, prot) ? 0 : -1;}

inet_add_protocol将tcp,udp相应的函数注册到inet_protos数组中。即inet_protos 记录着 udp,tcp 的处理函数地址

再看一下dev_add_pack

void dev_add_pack(struct packet_type *pt){struct list_head *head = ptype_head(pt);...}static inline struct list_head *ptype_head(const struct packet_type *pt){if (pt->type == htons(ETH_P_ALL))return pt->dev ? &pt->dev->ptype_all : &ptype_all;elsereturn pt->dev ? &pt->dev->ptype_specific : &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];}

ptype_base 存储着ip_rcv() 函数的处理地址。

网卡驱动初始化

驱动程序会使⽤ module_init 向内核注册⼀个初始化函数,以igb网卡驱动为例

static struct pci_driver igb_driver = {.name     = igb_driver_name,.id_table = igb_pci_tbl,.probe    = igb_probe,.remove   = igb_remove,...};static int __init igb_init_module(void){int ret;ret = pci_register_driver(&igb_driver);return ret;}module_init(igb_init_module);

当网卡设备被识别后,内核会调用其驱动probe方法(igb_probe)

static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent){    //DMA初始化err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));    //ethools注册igb_netdev_opsnetdev->netdev_ops = &igb_netdev_ops;igb_set_ethtool_ops(netdev);err = igb_sw_init(adapter);    //注册netdeverr = register_netdev(netdev);dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);}

igb_sw_init-> igb_init_interrupt_scheme->igb_alloc_q_vectors->igb_alloc_q_vector

static int igb_alloc_q_vector(struct igb_adapter *adapter,      int v_count, int v_idx,      int txr_count, int txr_idx,      int rxr_count, int rxr_idx){netif_napi_add(adapter->netdev, &q_vector->napi,igb_poll, 64);...}

NAPI 机制所必须的 poll 函数,对于 igb ⽹卡驱动来说是 igb_poll。

启动网卡

当启⽤⼀个⽹卡时(例通过 ifconfig eth0 up ),net_device_ops 中的 igb_open ⽅法会被调⽤

  1. 启动网卡
  2. 分配RX TX队列内存
  3. 调用net_device_ops中注册open等函数
  4. 注册中断处理函数
  5. 开硬中断,等待数据包
static int __igb_open(struct net_device *netdev, bool resuming){/* allocate transmit descriptors */err = igb_setup_all_tx_resources(adapter);/* allocate receive descriptors */err = igb_setup_all_rx_resources(adapter);    //处理中断err = igb_request_irq(adapter);    //使能napifor (i = 0; i num_q_vectors; i++)napi_enable(&(adapter->q_vector[i]->napi));...return 0;}

中断处理igb_request_irq

static int igb_request_irq(struct igb_adapter *adapter){struct net_device *netdev = adapter->netdev;struct pci_dev *pdev = adapter->pdev;int err = 0;if (adapter->flags & IGB_FLAG_HAS_MSIX) {err = igb_request_msix(adapter); ...}...}static int igb_request_msix(struct igb_adapter *adapter){for (i = 0; i num_q_vectors; i++) {struct igb_q_vector *q_vector = adapter->q_vector[i];vector++;err = request_irq(adapter->msix_entries[vector].vector,  igb_msix_ring, 0, q_vector->name,  q_vector);}igb_configure_msix(adapter);return 0;...}

igb_request_msix 中对于多队列的⽹卡,为每⼀个队列都注册了中断,其对应的中断处理函数为 igb_msix_ring。 参考 https://course.0voice.com/v1/course/intro?courseId=2&agentId=0