linux内核网络收包过程(一)
目录
网络收包过程
创建ksoftirqd内核线程
网络子系统初始化
协议栈注册
网卡驱动初始化
启动网卡
网络收包过程
1,数据帧从外部网络到达网卡
2,网卡把帧使用DMA方式搬到内存
3,硬中断通知CPU
4,CPU响应硬中断,通知软中断处理数据
5,ksoftirqd内核进程处理软中断,调用网卡驱动注册的poll函数开始收包
6,帧从Ring buffer上取出
7,协议层处理网络帧将数据data放到socket接收队列中
创建ksoftirqd内核线程
软中断在内核线程ksoftirqd中进行 [kernel/softirq.c]
static struct smp_hotplug_thread softirq_threads = {.store= &ksoftirqd,.thread_should_run= ksoftirqd_should_run,.thread_fn= run_ksoftirqd,.thread_comm= "ksoftirqd/%u",};static __init int spawn_ksoftirqd(void){cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, takeover_tasklets);BUG_ON(smpboot_register_percpu_thread(&softirq_threads));return 0;}early_initcall(spawn_ksoftirqd);
ksoftirqd被创建出来以后,进入循环函数ksoftirqd_should_run与run_ksoftirqd,判断有没有软中断需要被处理。
网络子系统初始化
static int __init net_dev_init(void){for_each_possible_cpu(i) {struct work_struct *flush = per_cpu_ptr(&flush_works, i);struct softnet_data *sd = &per_cpu(softnet_data, i);skb_queue_head_init(&sd->input_pkt_queue);skb_queue_head_init(&sd->process_queue);}...open_softirq(NET_TX_SOFTIRQ, net_tx_action);open_softirq(NET_RX_SOFTIRQ, net_rx_action);}subsys_initcall(net_dev_init);
open_softirq注册软中断NET_TX_SOFTIRQ发送与NET_RX_SOFTIRQ接收。
void open_softirq(int nr, void (*action)(struct softirq_action *)){softirq_vec[nr].action = action;}
softirq_vec中断向量,根据软中断枚举,注册相应的函数。
协议栈注册
网络层ip协议,传输层tcp/udp协议
static struct packet_type ip_packet_type __read_mostly = {.type = cpu_to_be16(ETH_P_IP),.func = ip_rcv,.list_func = ip_list_rcv,};static struct net_protocol tcp_protocol = {.early_demux=tcp_v4_early_demux,.early_demux_handler = tcp_v4_early_demux,.handler=tcp_v4_rcv,.err_handler=tcp_v4_err,.no_policy=1,.netns_ok=1,.icmp_strict_tag_validation = 1,};static struct net_protocol udp_protocol = {.early_demux =udp_v4_early_demux,.early_demux_handler =udp_v4_early_demux,.handler =udp_rcv,.err_handler =udp_err,.no_policy =1,.netns_ok =1,};static int __init inet_init(void){ //注册tcp_portrc = proto_register(&tcp_prot, 1); //注册udp_portrc = proto_register(&udp_prot, 1);... //注册udp协议if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)pr_crit("%s: Cannot add UDP protocol\n", __func__); //注册tcp协议if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)pr_crit("%s: Cannot add TCP protocol\n", __func__);dev_add_pack(&ip_packet_type);}
先看一下inet_add_protocol
struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; inet_add_protocol(const struct net_protocol *prot, unsigned char protocol){...return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],NULL, prot) ? 0 : -1;}
inet_add_protocol将tcp,udp相应的函数注册到inet_protos数组中。即inet_protos 记录着 udp,tcp 的处理函数地址
再看一下dev_add_pack
void dev_add_pack(struct packet_type *pt){struct list_head *head = ptype_head(pt);...}static inline struct list_head *ptype_head(const struct packet_type *pt){if (pt->type == htons(ETH_P_ALL))return pt->dev ? &pt->dev->ptype_all : &ptype_all;elsereturn pt->dev ? &pt->dev->ptype_specific : &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];}
ptype_base 存储着ip_rcv() 函数的处理地址。
网卡驱动初始化
驱动程序会使⽤ module_init 向内核注册⼀个初始化函数,以igb网卡驱动为例
static struct pci_driver igb_driver = {.name = igb_driver_name,.id_table = igb_pci_tbl,.probe = igb_probe,.remove = igb_remove,...};static int __init igb_init_module(void){int ret;ret = pci_register_driver(&igb_driver);return ret;}module_init(igb_init_module);
当网卡设备被识别后,内核会调用其驱动probe方法(igb_probe)
static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent){ //DMA初始化err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); //ethools注册igb_netdev_opsnetdev->netdev_ops = &igb_netdev_ops;igb_set_ethtool_ops(netdev);err = igb_sw_init(adapter); //注册netdeverr = register_netdev(netdev);dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);}
igb_sw_init-> igb_init_interrupt_scheme->igb_alloc_q_vectors->igb_alloc_q_vector
static int igb_alloc_q_vector(struct igb_adapter *adapter, int v_count, int v_idx, int txr_count, int txr_idx, int rxr_count, int rxr_idx){netif_napi_add(adapter->netdev, &q_vector->napi,igb_poll, 64);...}
NAPI 机制所必须的 poll 函数,对于 igb ⽹卡驱动来说是 igb_poll。
启动网卡
当启⽤⼀个⽹卡时(例通过 ifconfig eth0 up ),net_device_ops 中的 igb_open ⽅法会被调⽤
- 启动网卡
- 分配RX TX队列内存
- 调用net_device_ops中注册open等函数
- 注册中断处理函数
- 开硬中断,等待数据包
static int __igb_open(struct net_device *netdev, bool resuming){/* allocate transmit descriptors */err = igb_setup_all_tx_resources(adapter);/* allocate receive descriptors */err = igb_setup_all_rx_resources(adapter); //处理中断err = igb_request_irq(adapter); //使能napifor (i = 0; i num_q_vectors; i++)napi_enable(&(adapter->q_vector[i]->napi));...return 0;}
中断处理igb_request_irq
static int igb_request_irq(struct igb_adapter *adapter){struct net_device *netdev = adapter->netdev;struct pci_dev *pdev = adapter->pdev;int err = 0;if (adapter->flags & IGB_FLAG_HAS_MSIX) {err = igb_request_msix(adapter); ...}...}static int igb_request_msix(struct igb_adapter *adapter){for (i = 0; i num_q_vectors; i++) {struct igb_q_vector *q_vector = adapter->q_vector[i];vector++;err = request_irq(adapter->msix_entries[vector].vector, igb_msix_ring, 0, q_vector->name, q_vector);}igb_configure_msix(adapter);return 0;...}
在 igb_request_msix 中对于多队列的⽹卡,为每⼀个队列都注册了中断,其对应的中断处理函数为 igb_msix_ring。 参考 https://course.0voice.com/v1/course/intro?courseId=2&agentId=0