首页 > 系统服务 > 详细

Linux内核数据包处理流程-数据包接收(1)[转载]

时间:2017-02-13 11:29:24      阅读:246      评论:0      收藏:0      [点我收藏+]

数据包的接收

作者:kendo

Kernel:2.6.12

一、从网卡说起

这并非是一个网卡驱动分析的专门文档,只是对网卡处理数据包的流程进行一个重点的分析。这里以Intel的e100驱动为例进行分析。
大多数网卡都是一个PCI设备,PCI设备都包含了一个标准的配置寄存器,寄存器中,包含了PCI设备的厂商ID、设备ID等等信息,驱动

程序使用来描述这些寄存器的标识符。如下:

1 struct pci_device_id {
2         __u32 vendor, device;               
3         __u32 subvendor, subdevice;       
4         __u32 class, class_mask;       
5         kernel_ulong_t driver_#;       
6 };

这样,在驱动程序中,常常就可以看到定义一个struct pci_device_id 类型的数组,告诉内核支持不同类型的
PCI设备的列表,以e100驱动为例:

 1 #define INTEL_8255X_ETHERNET_DEVICE(device_id, ich) { 2         PCI_VENDOR_ID_INTEL, device_id, PCI_ANY_ID, PCI_ANY_ID,  3         PCI_CLASS_NETWORK_ETHERNET << 8, 0xFFFF00, ich }
 4        
 5 static struct pci_device_id e100_id_table[] = {
 6         INTEL_8255X_ETHERNET_DEVICE(0x1029, 0),
 7         INTEL_8255X_ETHERNET_DEVICE(0x1030, 0),
 8         INTEL_8255X_ETHERNET_DEVICE(0x1031, 3),
 9 ……
10         { 0, }
11 };

在内核中,一个PCI设备,使用struct pci_driver结构来描述

 1 struct pci_driver {
 2         struct list_head node;
 3         char *name;
 4         struct module *owner;
 5         const struct pci_device_id *id_table;       
 6         int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);       
 7         void (*remove) (struct pci_dev *dev);       
 8         int  (*suspend) (struct pci_dev *dev, pm_message_t state);       
 9         int  (*resume) (struct pci_dev *dev);                        
10         int  (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable);   
11         void (*shutdown) (struct pci_dev *dev);
12 
13         struct device_driver        driver;
14         struct pci_dynids dynids;
15 };

因为在系统引导的时候,PCI设备已经被识别,当内核发现一个已经检测到的设备同驱动注册的id_table中的信息相匹配时,
它就会触发驱动的probe函数,以e100为例:

 1 static struct pci_driver e100_driver = {
 2         .name =         DRV_NAME,
 3         .id_table =     e100_id_table,
 4         .probe =        e100_probe,
 5         .remove =       __devexit_p(e100_remove),
 6 #ifdef CONFIG_PM
 7         .suspend =      e100_suspend,
 8         .resume =       e100_resume,
 9 #endif
10 
11         .driver = {
12                 .shutdown = e100_shutdown,
13         }
14 
15 };

这样,如果系统检测到有与id_table中对应的设备时,就调用驱动的probe函数。
驱动设备在init函数中,调用pci_module_init函数初始化PCI设备e100_driver:

static int __init e100_init_module(void)
{
        if(((1 << debug) - 1) & NETIF_MSG_DRV) {
                printk(KERN_INFO PFX "%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
                printk(KERN_INFO PFX "%s\n", DRV_COPYRIGHT);
        }
        return pci_module_init(&e100_driver);
}

一切顺利的话,注册的e100_probe函数将被内核调用,这个函数完成两个重要的工作:
1、分配/初始化/注册网络设备;
2、完成PCI设备的I/O区域的分配和映射,以及完成硬件的其它初始化工作;

网络设备使用structnet_device结构来描述,这个结构非常之大,许多重要的参考书籍对它都有较为深入的描述,可以参考《Linux设备驱动程序》中网卡驱动设计的相关章节。我会在后面的内容中,对其重要的成员进行注释;

当probe函数被调用,证明已经发现了我们所支持的网卡,这样,就可以调用register_netdev函数向内核注册网络设备了,注册之前,一般会调用alloc_etherdev为以太网分析一个net_device,然后初始化它的重要成员。

除了向内核注册网络设备之外,探测函数另一项重要的工作就是需要对硬件进行初始化,比如,要访问其I/O区域,需要为I/O区域分配内存区域,然后进行映射,这一步一般的流程是:
1、request_mem_region()
2、ioremap()

对于一般的PCI设备而言,可以调用:
1、pci_request_regions()
2、ioremap()

pci_request_regions函数对PCI的6个寄存器都会调用资源分配函数进行申请(需要判断是I/O端口还是I/O内存),例如:

1 int pci_request_regions(struct pci_dev *pdev, char *res_name)
2 {
3         int i;
4        
5         for (i = 0; i < 6; i++)
6                 if(pci_request_region(pdev, i, res_name))
7                         goto err_out;
8         return 0;
 1 int pci_request_region(struct pci_dev *pdev, int bar, char *res_name)
 2 {
 3         if (pci_resource_len(pdev, bar) == 0)
 4                 return 0;
 5                
 6         if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) {
 7                 if (!request_region(pci_resource_start(pdev, bar),
 8                             pci_resource_len(pdev, bar), res_name))
 9                         goto err_out;
10         }
11         else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
12                 if (!request_mem_region(pci_resource_start(pdev, bar),
13                                         pci_resource_len(pdev, bar), res_name))
14                         goto err_out;
15         }
16        
17         return 0;

有了这些基础,我们来看设备的探测函数

  1 static int __devinit e100_probe(struct pci_dev *pdev,
  2         const struct pci_device_id *ent)
  3 {
  4         struct net_device *netdev;
  5         struct nic *nic;
  6         int err;
  7 
  8        
  9         if(!(netdev = alloc_etherdev(sizeof(struct nic)))) {
 10                 if(((1 << debug) - 1) & NETIF_MSG_PROBE)
 11                         printk(KERN_ERR PFX "Etherdev alloc failed, abort.\n");
 12                 return -ENOMEM;
 13         }
 14 
 15        
 16         netdev->open = e100_open;
 17         netdev->stop = e100_close;
 18         netdev->hard_start_xmit = e100_xmit_frame;
 19         netdev->get_stats = e100_get_stats;
 20         netdev->set_multicast_list = e100_set_multicast_list;
 21         netdev->set_mac_address = e100_set_mac_address;
 22         netdev->change_mtu = e100_change_mtu;
 23         netdev->do_ioctl = e100_do_ioctl;
 24         SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
 25         netdev->tx_timeout = e100_tx_timeout;
 26         netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
 27         netdev->poll = e100_poll;
 28         netdev->weight = E100_NAPI_WEIGHT;
 29 #ifdef CONFIG_NET_POLL_CONTROLLER
 30         netdev->poll_controller = e100_netpoll;
 31 #endif
 32        
 33         strcpy(netdev->name, pci_name(pdev));
 34 
 35        
 36         nic = netdev_priv(netdev);
 37        
 38         nic->netdev = netdev;
 39        
 40         nic->pdev = pdev;
 41         nic->msg_enable = (1 << debug) - 1;
 42        
 43        
 44         pci_set_drv#(pdev, netdev);
 45 
 46        
 47         if((err = pci_enable_device(pdev))) {
 48                 DPRINTK(PROBE, ERR, "Cannot enable PCI device, aborting.\n");
 49                 goto err_out_free_dev;
 50         }
 51 
 52        
 53         if(!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
 54                 DPRINTK(PROBE, ERR, "Cannot find proper PCI device "
 55                         "base address, aborting.\n");
 56                 err = -ENODEV;
 57                 goto err_out_disable_pdev;
 58         }
 59 
 60        
 61         if((err = pci_request_regions(pdev, DRV_NAME))) {
 62                 DPRINTK(PROBE, ERR, "Cannot obtain PCI resources, aborting.\n");
 63                 goto err_out_disable_pdev;
 64         }
 65 
 66        
 67         if((err = pci_set_dma_mask(pdev, 0xFFFFFFFFULL))) {
 68                 DPRINTK(PROBE, ERR, "No usable DMA configuration, aborting.\n");
 69                 goto err_out_free_res;
 70         }
 71 
 72         SET_MODULE_OWNER(netdev);
 73         SET_NETDEV_DEV(netdev, &pdev->dev);
 74 
 75        
 76         nic->csr = ioremap(pci_resource_start(pdev, 0), sizeof(struct csr));
 77         if(!nic->csr) {
 78                 DPRINTK(PROBE, ERR, "Cannot map device registers, aborting.\n");
 79                 err = -ENOMEM;
 80                 goto err_out_free_res;
 81         }
 82 
 83         if(ent->driver_#)
 84                 nic->flags |= ich;
 85         else
 86                 nic->flags &= ~ich;
 87 
 88        
 89         e100_get_defaults(nic);
 90 
 91        
 92         spin_lock_init(&nic->cb_lock);
 93         spin_lock_init(&nic->cmd_lock);
 94 
 95        
 96         e100_hw_reset(nic);
 97 
 98        
 99         pci_set_master(pdev);
100 
101        
102         init_timer(&nic->watchdog);
103         nic->watchdog.function = e100_watchdog;
104         nic->watchdog.# = (unsigned long)nic;
105         init_timer(&nic->blink_timer);
106         nic->blink_timer.function = e100_blink_led;
107         nic->blink_timer.# = (unsigned long)nic;
108 
109         INIT_WORK(&nic->tx_timeout_task,
110                 (void (*)(void *))e100_tx_timeout_task, netdev);
111 
112         if((err = e100_alloc(nic))) {
113                 DPRINTK(PROBE, ERR, "Cannot alloc driver memory, aborting.\n");
114                 goto err_out_iounmap;
115         }
116 
117        
118         e100_phy_init(nic);
119 
120         if((err = e100_eeprom_load(nic)))
121                 goto err_out_free;
122 
123         memcpy(netdev->dev_addr, nic->eeprom, ETH_ALEN);
124         if(!is_valid_ether_addr(netdev->dev_addr)) {
125                 DPRINTK(PROBE, ERR, "Invalid MAC address from "
126                         "EEPROM, aborting.\n");
127                 err = -EAGAIN;
128                 goto err_out_free;
129         }
130 
131        
132         if((nic->mac >= mac_82558_D101_A4) &&
133            (nic->eeprom[eeprom_id] & eeprom_id_wol))
134                 nic->flags |= wol_magic;
135 
136        
137         pci_enable_wake(pdev, 0, 0);
138 
139        
140         strcpy(netdev->name, "eth%d");
141         if((err = register_netdev(netdev))) {
142                 DPRINTK(PROBE, ERR, "Cannot register net device, aborting.\n");
143                 goto err_out_free;
144         }
145 
146         DPRINTK(PROBE, INFO, "addr 0x%lx, irq %d, "
147                 "MAC addr %02X:%02X:%02X:%02X:%02X:%02X\n",
148                 pci_resource_start(pdev, 0), pdev->irq,
149                 netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
150                 netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
151 
152         return 0;
153 
154 err_out_free:
155         e100_free(nic);
156 err_out_iounmap:
157         iounmap(nic->csr);
158 err_out_free_res:
159         pci_release_regions(pdev);
160 err_out_disable_pdev:
161         pci_disable_device(pdev);
162 err_out_free_dev:
163         pci_set_drv#(pdev, NULL);
164         free_netdev(netdev);
165         return err;
166 }

执行到这里,探测函数的使命就完成了,在对网络设备重要成员初始化时,有:
netdev->open = e100_open;
指定了设备的open函数为e100_open,这样,当第一次使用设备,比如使用ifconfig工具的时候,open函数将被调用。

 

二、打开设备
在探测函数中,设置了netdev->open = e100_open;指定了设备的open函数为e100_open:

 1 static int e100_open(struct net_device *netdev)
 2 {
 3         struct nic *nic = netdev_priv(netdev);
 4         int err = 0;
 5 
 6         netif_carrier_off(netdev);
 7         if((err = e100_up(nic)))
 8                 DPRINTK(IFUP, ERR, "Cannot open interface, aborting.\n");
 9         return err;
10 }

大多数涉及物理设备可以感知信号载波(carrier)的存在,载波的存在意味着设备可以工作
据个例子来讲:当一个用户拔掉了网线,也就意味着信号载波的消失。
netif_carrier_off:关闭载波信号;
netif_carrier_on:打开载波信号;
netif_carrier_ok:检测载波信号;

 

对于探测网卡网线是否连接,这一组函数被使用得较多;

接着,调用e100_up函数启动网卡,这个“启动”的过程,最重要的步骤有:
1、调用request_irq向内核注册中断;
2、调用netif_wake_queue函数来重新启动传输队例;

 1 static int e100_up(struct nic *nic)
 2 {
 3         int err;
 4 
 5         if((err = e100_rx_alloc_list(nic)))
 6                 return err;
 7         if((err = e100_alloc_cbs(nic)))
 8                 goto err_rx_clean_list;
 9         if((err = e100_hw_init(nic)))
10                 goto err_clean_cbs;
11         e100_set_multicast_list(nic->netdev);
12         e100_start_receiver(nic, 0);
13         mod_timer(&nic->watchdog, jiffies);
14         if((err = request_irq(nic->pdev->irq, e100_intr, SA_SHIRQ,
15                 nic->netdev->name, nic->netdev)))
16                 goto err_no_irq;
17         netif_wake_queue(nic->netdev);
18         netif_poll_enable(nic->netdev);
19        
20         e100_enable_irq(nic);
21         return 0;
22 
23 err_no_irq:
24         del_timer_sync(&nic->watchdog);
25 err_clean_cbs:
26         e100_clean_cbs(nic);
27 err_rx_clean_list:
28         e100_rx_clean_list(nic);
29         return err;
30 
31 
32 }

这样,中断函数e100_intr将被调用

 

三、网卡中断

从本质上来讲,中断,是一种电信号,当设备有某种事件发生的时候,它就会产生中断,通过总线把电信号发送给中断控制器,如果中断的线是激活的,中断控制器就把电信号发送给处理器的某个特定引脚。处理器于是立即停止自己正在做的事,跳到内存中内核设置的中断处理程序的入口点,进行中断处理。
在内核中断处理中,会检测中断与我们刚才注册的中断号匹配,于是,注册的中断处理函数就被调用了。

当需要发/收数据,出现错误,连接状态变化等,网卡的中断信号会被触发。当接收到中断后,中断函数读取中断状态位,进行合法性判断,如判断中断信号是否是自己的等,然后,应答设备中断——OK,我已经知道了,你回去继续工作吧……
接着,它就屏蔽此中断,然后netif_rx_schedule函数接收,接收函数会在未来某一时刻调用设备的poll函数(对这里而言,注册的是e100_poll)实现设备的轮询

 1 static irqreturn_t e100_intr(int irq, void *dev_id, struct pt_regs *regs)
 2 {
 3         struct net_device *netdev = dev_id;
 4         struct nic *nic = netdev_priv(netdev);
 5         u8 stat_ack = readb(&nic->csr->scb.stat_ack);
 6 
 7         DPRINTK(INTR, DEBUG, "stat_ack = 0x%02X\n", stat_ack);
 8 
 9         if(stat_ack == stat_ack_not_ours ||       
10            stat_ack == stat_ack_not_present)       
11                 return IRQ_NONE;
12 
13        
14         writeb(stat_ack, &nic->csr->scb.stat_ack);
15 
16        
17         if(stat_ack & stat_ack_rnr)
18                 nic->ru_running = RU_SUSPENDED;
19 
20         e100_disable_irq(nic);
21         netif_rx_schedule(netdev);
22 
23         return IRQ_HANDLED;
24 }

对于数据包的接收而言,我们关注的是poll函数中,调用e100_rx_clean进行数据的接收

 1 static int e100_poll(struct net_device *netdev, int *budget)
 2 {
 3         struct nic *nic = netdev_priv(netdev);
 4       
 5         unsigned int work_to_do = min(netdev->quota, *budget);
 6         unsigned int work_done = 0;
 7         int tx_cleaned;
 8 
 9                       
10         e100_rx_clean(nic, &work_done, work_to_do);
11         tx_cleaned = e100_tx_clean(nic);
12 
13          
14        
15         if((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) {
16                 netif_rx_complete(netdev);
17                 e100_enable_irq(nic);
18                 return 0;
19         }
20 
21         *budget -= work_done;
22         netdev->quota -= work_done;
23 
24         return 1;
25 }
26 
27 
28 static inline void e100_rx_clean(struct nic *nic, unsigned int *work_done,
29         unsigned int work_to_do)
30 {
31         struct rx *rx;
32         int restart_required = 0;
33         struct rx *rx_to_start = NULL;
34 
35        
36         if(RU_SUSPENDED == nic->ru_running)
37                 restart_required = 1;
38 
39        
40         for(rx = nic->rx_to_clean; rx->skb; rx = nic->rx_to_clean = rx->next) {
41                 int err = e100_rx_indicate(nic, rx, work_done, work_to_do);
42                 if(-EAGAIN == err) {
43                        
44                         restart_required = 0;
45                         break;
46                 } else if(-ENO# == err)
47                         break;
48         }
49 
50        
51         if(restart_required)
52                 rx_to_start = nic->rx_to_clean;
53 
54        
55         for(rx = nic->rx_to_use; !rx->skb; rx = nic->rx_to_use = rx->next) {
56                 if(unlikely(e100_rx_alloc_skb(nic, rx)))
57                         break;
58         }
59 
60         if(restart_required) {
61                 // ack the rnr?
62                 writeb(stat_ack_rnr, &nic->csr->scb.stat_ack);
63                 e100_start_receiver(nic, rx_to_start);
64                 if(work_done)
65                         (*work_done)++;
66         }
67 }

 

Linux内核数据包处理流程-数据包接收(1)[转载]

原文:http://www.cnblogs.com/listenerln/p/6393009.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!