dpdk l2fwd


之前在helloworld中主要分析了hugepage的使用,这回在l2fwd中主要分析一下uio和PMD的实现

 

main函数中首先调用了rte_eal_init初始化eal环境,其中主要是hugepage的初始化;

ret = (ret < );

 

 

接着创建了mbuf pool

=( (l2fwd_pktmbuf_pool ==);

 

 

然后是PMD驱动的注册和PCI设备驱动加载


 (rte_pmd_init_all() <  (rte_eal_pci_probe() < );

 

首先是PMD驱动的注册,目前DPDK支持igb igbvf em ixgbe ixgbevf virtio vmxnet3;不过这些具体是什么还不清楚,后面以虚拟机环境中使用的em驱动为例子分析;

 rte_pmd_init_all( ret = - ((ret = rte_igb_pmd_init()) !=  ((ret = rte_igbvf_pmd_init()) !=  /* RTE_LIBRTE_IGB_PMD */ ((ret = rte_em_pmd_init()) !=  /* RTE_LIBRTE_EM_PMD */ ((ret = rte_ixgbe_pmd_init()) !=  ((ret = rte_ixgbevf_pmd_init()) !=  /* RTE_LIBRTE_IXGBE_PMD */ ((ret = rte_virtio_pmd_init()) !=  /* RTE_LIBRTE_VIRTIO_PMD */ ((ret = rte_vmxnet3_pmd_init()) !=  /* RTE_LIBRTE_VMXNET3_PMD */

     (ret == -

 

注册EM驱动

& 
 eth_driver *->pci_drv.devinit =&eth_drv->
 rte_pci_driver *&

 

这里PMD驱动结构包含了PMD驱动部分和PCI驱动部分


 rte_pci_driver pci_drv;     dev_private_size;    

 

接下来,如果不存在白名单则加载每个device的所有驱动;在白名单中的device加载驱动失败直接退出;


 rte_pci_device *dev =& (!  (pcidev_is_whitelisted(dev) && pci_probe_all_drivers(dev) < , dev->addr.domain,dev->->addr.devid, dev-> 

对于每个device,尝试是否可以加载driver,RTE_PCI_DRV_MULTIPLE标记的驱动需要加载多次,第三方驱动可能需要;


  rte_pci_device * rte_pci_driver *dr =->blacklisted = !!&= (rc < 
             (rc > 
            
         ((dr->drv_flags & RTE_PCI_DRV_MULTIPLE) &&!dev-> (rte_eal_pci_probe_one_driver(dr, dev) ==   -

 

驱动加载


 rte_pci_driver *dr,  rte_pci_device * rte_pci_id *
     (id_table = dr->id_table ; id_table->vendor_id != ; id_table++
         (id_table->vendor_id != dev->id.vendor_id &&->vendor_id != (id_table->device_id != dev->id.device_id &&->device_id != (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id &&->subsystem_vendor_id != (id_table->subsystem_device_id != dev->id.subsystem_device_id &&->subsystem_device_id !=
         rte_pci_addr *loc = &dev->PCI_PRI_FMT->domain, loc->bus, loc->devid, loc->->, dev->->id.device_id, dr->
        
         (dev->  (dr->drv_flags &
             (pci_switch_module(dr, dev, , IGB_UIO_NAME) <  -  (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND &&==
             (pci_unbind_kernel_driver(dev) <  -
        
         (dr->drv_flags &
             (pci_uio_map_resource(dev) <  -

        ->driver =
        
         dr->
     

 

映射PCI地址空间到用户空间的过程


  rte_pci_device * devname[PATH_MAX]; 
     * rte_pci_addr *loc = &dev-> uio_resource * uio_map *->intr_handle.fd = -
    
     ((rte_eal_process_type() != RTE_PROC_PRIMARY) &&->id.vendor_id !=
    = pci_get_uio_dev(dev, dirname,  (uio_num <   PCI_PRI_FMT
                , loc->domain, loc->bus, loc->devid, loc-> -
    (dev->id.vendor_id == (eal_parse_sysfs_value(filename, &size) <  - (eal_parse_sysfs_value(filename, &start) <  -->mem_resource[].addr = ( *->mem_resource[].len =
         (
     ((uio_res = rte_zmalloc(,  (*uio_res), )) == (-(devname), ->path, (uio_res->path), &uio_res->pci_addr, &dev->addr, (uio_res->
    
     ((nb_maps = pci_uio_get_mappings(dirname, uio_res-> (uio_res->maps) /  (uio_res->maps[< ->nb_maps === uio_res-> (i = ; i != PCI_MAX_RESOURCE; i++
        
        
         ((phaddr = dev->mem_resource[i].phys_addr) == 
         (j = ; j != nb_maps && (phaddr != maps[j].phaddr ||->mem_resource[i].len !=++
        
         (j !== j * (maps[j].addr != NULL ||=== (-==->mem_resource[i].addr =
 
    TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);

     (

 

回到pci驱动的初始化rte_eth_dev_init

  rte_pci_driver * rte_pci_device * eth_driver    * rte_eth_dev *= ( eth_driver *= (eth_dev == - (rte_eal_process_type() ==->data->dev_private = rte_zmalloc(-> (eth_dev->data->dev_private ==->pci_dev =->driver =->data->rx_mbuf_alloc_failed = &(eth_dev->->data->max_frame_size =
    = (*eth_drv-> (diag ==  (
            , pci_drv->->-> (rte_eal_process_type() ==->data->--

 

PMD驱动的初始化过程

  eth_driver * rte_eth_dev * rte_pci_device * e1000_hw *hw =->data-> e1000_vfta * shadow_vfta =->data->= eth_dev->->dev_ops = &->rx_pkt_burst = (eth_rx_burst_t)&->tx_pkt_burst = (eth_tx_burst_t)&
     (rte_eal_process_type() != (eth_dev->data->->rx_pkt_burst =& ->hw_addr = ( *)pci_dev->mem_resource[->device_id = pci_dev->

     (e1000_setup_init_funcs(hw, TRUE) != E1000_SUCCESS ||!= 
            ->data->port_id, pci_dev->-> -->data->mac_addrs = rte_zmalloc(, ETHER_ADDR_LEN *->mac.rar_entry_count,  (eth_dev->data->mac_addrs ==
            * hw-> - ether_addr *) hw->->data->, (*->data->port_id, pci_dev->->&(pci_dev-> * (

PMD驱动初始化主要是一些硬件相关的寄存器初始化以及函数的初始化,细节就不再分析了;函数的最后注册了一个中断处理函数,下面主要分析中断处理的过程;

 rte_intr_handle * * rte_intr_source * rte_intr_callback *= 
    
     (intr_handle == NULL || intr_handle->fd <  || cb == -= rte_zmalloc((*callback),  (callback == -->cb_fn =->cb_arg =&& (src->intr_handle.fd == intr_handle->
             TAILQ_EMPTY(&src->= &(src->= 
     (src == ((src = rte_zmalloc((*src), )) === -->intr_handle = *&src->&(src->&= = &
    
     (write(intr_pipe.writefd, , ) <  -

 

在rte_eal_init初始化过程中调用了rte_eal_intr_init, rte_eal_intr_init里面会初始化一个中断处理线程

 ret = &
     (pipe(intr_pipe.pipefd) <  -
    = pthread_create(& (ret !=  -

 


 __attribute__((noreturn))  * *
    
          epoll_event pipe_event == EPOLLIN | rte_intr_source *= 
        
         pfd = epoll_create( (pfd < =
        &pipe_event) < ++&& (src->callbacks.tqh_first ==; = EPOLLIN |= src->
            ->intr_handle.fd, &ev) < ->++&
        

 

  epoll_event *events,  rte_intr_source * rte_intr_callback * (n = ; n < nfds; n++
        
         (events[n].data.fd == r = -&& (src->intr_handle.fd == (src ==&->active = &
        
         (src->= = = = read(events[n].data.fd, & (bytes_read < 
                  (bytes_read == 
                
        & (bytes_read > &src->= *&&src->&->active = & 

 

对于E1000的驱动注册的callback eth_em_interrupt_handler里面处理了link状态的回调, link down消息则关闭收发包, link up开启收发包;

  rte_intr_handle * * rte_eth_dev *dev = ( rte_eth_dev *

 

后面还有收发包队列的初始化, 待分析;

相关内容