Controladores de red NAPI en Linux

¡Hola, Habr!





Hablemos de los controladores de dispositivos de red de Linux, el mecanismo NAPI y sus cambios en el kernel 5.12.





Linux () BSD, . unix- , TCP/IP (sys_socket) , , IP- . (sk_buff) - , . (tx_queue, rx_queue).





– sk_buff:





struct sk_buff {
	union {
		struct {
		    /*   */
			struct sk_buff		*next;
			struct sk_buff		*prev;
			
			struct net_device	*dev;
		};
		struct list_head	list;
	};

	struct sock		*sk;

	unsigned int		len,
				data_len;
	__u16			mac_len,
				hdr_len;

/*  NAPI- */
#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
	union {
		unsigned int	napi_id;
		unsigned int	sender_cpu;
	};
#endif

	__u8		inner_ipproto;
	__u16			inner_transport_header;
	__u16			inner_network_header;
	__u16			inner_mac_header;

	__be16			protocol;
	__u16			transport_header;
	__u16			network_header;
	__u16			mac_header;

	sk_buff_data_t		tail;
	sk_buff_data_t		end;
	unsigned char		*head,
				*data;
	unsigned int		truesize;

};
      
      



( MAC-) . xmit rx, , stats . net_device, alloc_netdev register_netdev.





net_device:





struct net_device {
	char			name[IFNAMSIZ];    //    printf

	unsigned long		mem_end;
	unsigned long		mem_start;
	unsigned long		base_addr;

	unsigned long		state;

	struct list_head	dev_list;
	struct list_head	napi_list;

	unsigned int		flags;
	unsigned int		priv_flags;
	const struct net_device_ops *netdev_ops;
	unsigned short		hard_header_len;

	unsigned int		mtu;

	struct net_device_stats	stats; 

	atomic_long_t		rx_dropped;
	atomic_long_t		tx_dropped;
	atomic_long_t		rx_nohandler;

	const struct ethtool_ops *ethtool_ops;

	const struct header_ops *header_ops;

	unsigned char		if_port;
	unsigned char		dma;

	/* Interface address info. */
	unsigned char		perm_addr[MAX_ADDR_LEN];

	unsigned short          dev_id;
	unsigned short          dev_port;

	spinlock_t		addr_list_lock;
	int			irq;
	
	unsigned char		*dev_addr;

	struct netdev_rx_queue	*_rx;
	unsigned int		num_rx_queues;

	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
	unsigned int		num_tx_queues;

	struct timer_list	watchdog_timer;
	int			watchdog_timeo;
};
      
      



: , , . Linux, “” , .





: ( intel Ethernet e1000):





static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,  //  
			       struct e1000_rx_ring *rx_ring, //   
			       int *work_done, int work_to_do)
{
	while (rx_desc->status & E1000_RXD_STAT_DD) {
		struct sk_buff *skb;
		u8 *data;
		u8 status;

        if (netdev->features & NETIF_F_RXALL) {
		    total_rx_bytes += (length - 4); 
		    total_rx_packets++;

		    e1000_receive_skb(adapter, status, rx_desc->special, skb);
		} 
    }

	if (cleaned_count)    //   
		adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
		
    //  
	adapter->total_rx_packets += total_rx_packets;
	adapter->total_rx_bytes += total_rx_bytes;
	netdev->stats.rx_bytes += total_rx_bytes;
	netdev->stats.rx_packets += total_rx_packets;
	return cleaned;
}
      
      



2.3 (top half) (bottom half) (task queue). 2.3 BH (softirq), (tasklet) (work queue). softirq , . .





NAPI

, . . 2001 New API 2.4. ( – SMP-, pktgen).





NAPI - , . NAPI . , , . .





NAPI- , . rx_schedule, , . ( – budget), dev->poll. poll , , . , . . poll .





poll e1000:





static void e1000_netpoll(struct net_device *netdev)
{
	struct e1000_adapter *adapter = netdev_priv(netdev);

	if (disable_hardirq(adapter->pdev->irq))
		e1000_intr(adapter->pdev->irq, netdev);
	enable_irq(adapter->pdev->irq);
}
      
      



NAPI- :





  • DMA









  • poll





  • poll softirq ksoftirqd, .





NAPI:





  • , dev->poll









  • IRQ-, .





NAPI 5.12?

5.12 poll softirq .





Wei Wang , – . , softirq. CPU, , , userspace- . . Kthread CPU, , .





net/core/dev.c. __napi_poll, napi_poll. sysfs net_device / napi up/down.





napi_struct threaded , kthread napi_set_threaded ( NAPI_STATE_THREADED).





napi_struct:





struct napi_struct {
        struct list_head        dev_list;
        struct hlist_node       napi_hash_node;
        unsigned int            napi_id;
        struct task_struct      *thread;
 };
      
      



:





static int napi_kthread_create(struct napi_struct *n)
{       int err = 0;

       /* Create and wake up the kthread once to put it in
        * TASK_INTERRUPTIBLE mode to avoid the blocked task
        * warning and work with loadavg.
        */
       n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
                               n->dev->name, n->napi_id);
       if (IS_ERR(n->thread)) {
               err = PTR_ERR(n->thread);
               pr_err("kthread_run failed with err %d\n", err);
               n->thread = NULL;
       }

       return err;
}
      
      



napi_thread_wait.





Wei Wang softirq, kthread :





- LDD3 :





Sondeo NAPI en subprocesos del kernel

Sondeo NAPI enhebrable, softirqs y correcciones adecuadas

Reelaboración de la migración del controlador NAPI

: controladores de red





Gracias de antemano por las aclaraciones y las indicaciones de error.








All Articles