Linux network protocol stack 7--macvlan

Posted by dewen on Fri, 04 Feb 2022 04:53:49 +0100

macvlan is a virtual network interface of linux. macvlan allows you to configure multiple virtual network interfaces on one network interface of the host. These network interfaces have their own independent mac address and can also be configured with ip address for communication. The virtual machine or container network under mac VLAN shares the same broadcast domain with the host in the same network segment.
Macvlan is similar to bridge, but because it eliminates the existence of bridge, it is relatively simple to configure and debug, and the efficiency is relatively high. In addition, macvlan itself perfectly supports VLAN.

The macvlan virtual network card device includes five modes:
private mode: in this mode, the MAC VLAN device cannot accept the data packets of other MAC VLAN devices parasitic on the same physical network card, even the packets sent by other MAC VLAN devices through the physical network card and returned through the hairpin device.
vepa mode: in this mode, the MAC VLAN device cannot directly accept the data packets of other MAC VLAN devices parasitic on the same physical network card, but other MAC VLAN devices can send the data packets through the physical network card and then return them to other MAC VLAN devices through the hairpin device.
passthru mode: in this mode, each physical device can only parasitize one MAC VLAN device
bridge mode: in this mode, macvlan devices parasitic on the same physical device can communicate directly without the help of external hairpin devices.
Source mode: in this mode, such MAC VLAN devices parasitic on physical devices can only accept data packets from the specified source mac source, and other data packets are not accepted.

macvlan has two important data structures in the protocol stack:
When creating macvlan interface, struct macvlan_dev will act as the MAC VLAN interface device data structure net_ The private data structure creation of device (netdev_priv(dev) acquisition) stores the information of a single MAC VLAN interface.
At the same time, it will be the net of its host interface (if it is the first macvlan interface created on it)_ Device mount special device receive processing function rx_handler=macvlan_handle_frame, and the parameter RX required for this function_ handler_ Data is macvlan_port. What is saved is the overall information of the host interface and the MAC VLAN interface under it. Of course, the most important thing is to find the MAC VLAN interface corresponding to the message dmac lock.
All related processing is in macvlan_ common_ In the newlink function.

struct macvlan_port {
	struct net_device	*dev;
	struct hlist_head	vlan_hash[MACVLAN_HASH_SIZE]; // macvlan device_ Dev structure hash table, used to find
	struct list_head	vlans; // macvlan device_ Dev structure linked list, used to traverse
	struct rcu_head		rcu;
	struct sk_buff_head	bc_queue;   // Broadcast message queue

	struct work_struct	bc_work;    // Broadcast message processing task process
	bool 			passthru;    
	int			count;
	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE]; // source mode is used
	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
};


struct macvlan_dev {
	struct net_device	*dev;      //Mac VLAN network card device pointing back
	struct list_head	list;
	struct hlist_node	hlist;     
	struct macvlan_port	*port;    // struct macvlan_port Anaphora
	struct net_device	*lowerdev;  // Host structure device callback
	void			*fwd_priv;           // Used when the physical network card supports hardware acceleration
	struct vlan_pcpu_stats __percpu *pcpu_stats;

	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);

	netdev_features_t	set_features;
	enum macvlan_mode	mode;
	u16			flags;
	/* This array tracks active taps. */
	struct macvtap_queue	__rcu *taps[MAX_MACVTAP_QUEUES];
	/* This list tracks all taps (both enabled and disabled) */
	struct list_head	queue_list;
	int			numvtaps;
	int			numqueues;
	netdev_features_t	tap_features;
	int			minor;
	int			nest_level;
#ifdef CONFIG_NET_POLL_CONTROLLER
	struct netpoll		*netpoll;
#endif
	unsigned int		macaddr_count;
};

Host interface receiving function, macvlan_handle_frame.

/* called under rcu_read_lock() from netif_receive_skb */
static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
{
	struct macvlan_port *port;
	struct sk_buff *skb = *pskb;
	const struct ethhdr *eth = eth_hdr(skb);
	const struct macvlan_dev *vlan;
	const struct macvlan_dev *src;
	struct net_device *dev;
	unsigned int len = 0;
	int ret;
	rx_handler_result_t handle_res;

	port = macvlan_port_get_rcu(skb->dev);
	// The broadcast message received by the host interface may be sent by the external device or the internal device and returned in the hairpin mode of the external device
	if (is_multicast_ether_addr(eth->h_dest)) {
		unsigned int hash;

		skb = ip_check_defrag(dev_net(skb->dev), skb, IP_DEFRAG_MACVLAN);
		if (!skb)
			return RX_HANDLER_CONSUMED;
		*pskb = skb;
		eth = eth_hdr(skb);
		// source mode, whether broadcast or unicast, brainless receives according to smac matching
		macvlan_forward_source(skb, port, eth->h_source);
		src = macvlan_hash_lookup(port, eth->h_source);
		// private mode does not allow receiving messages sent by local interfaces (derived from the same host interface). passthru mode has only one derived interface,
		// These two types are equivalent to sending only to yourself without entering the queue.
		if (src && src->mode != MACVLAN_MODE_VEPA &&
		    src->mode != MACVLAN_MODE_BRIDGE) {
			/* forward to original port. */
			vlan = src;
			ret = macvlan_broadcast_one(skb, vlan, eth, 0) ?:
			      netif_rx(skb);
			handle_res = RX_HANDLER_CONSUMED;
			goto out;
		}

		hash = mc_hash(NULL, eth->h_dest);
		if (test_bit(hash, port->mc_filter))
			macvlan_broadcast_enqueue(port, src, skb);

		return RX_HANDLER_PASS;
	}
	/*source Mode, whether broadcast or unicast, brainless receives according to smac matching.
	  And it does not affect the normal forwarding process according to mac address matching.
	  So if smac and dmac are matched with a source type interface at the same time, won't they receive two copies?? But actually not. We need to take a closer look at the reason.
	*/ 
	macvlan_forward_source(skb, port, eth->h_source);
	if (port->passthru)
		// passthru mode has only one derived interface, which directly takes the first data in the linked list
		vlan = list_first_or_null_rcu(&port->vlans,
					      struct macvlan_dev, list);
	else
		// Others can be found in the hash table
		vlan = macvlan_hash_lookup(port, eth->h_dest);
	if (vlan == NULL)
		return RX_HANDLER_PASS;

	dev = vlan->dev;
	if (unlikely(!(dev->flags & IFF_UP))) {
		kfree_skb(skb);
		return RX_HANDLER_CONSUMED;
	}
	len = skb->len + ETH_HLEN;
	skb = skb_share_check(skb, GFP_ATOMIC);
	if (!skb) {
		ret = NET_RX_DROP;
		handle_res = RX_HANDLER_CONSUMED;
		goto out;
	}

	*pskb = skb;
	skb->dev = dev;
	skb->pkt_type = PACKET_HOST;

	ret = NET_RX_SUCCESS;
	// Unicast. After modification, SKB - > dev is the macvlan interface, and Rx is returned_ HANDLER_ ANOTHER,
	// __ netif_ receive_ skb_ The core will go through its own process again, which is equivalent to going through the protocol stack again on the MAC VLAN interface.
	handle_res = RX_HANDLER_ANOTHER;
out:
	macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, false);
	return handle_res;
}

Mac VLAN interface contracting process

static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
				      struct net_device *dev)
{
	unsigned int len = skb->len;
	int ret;
	struct macvlan_dev *vlan = netdev_priv(dev);

	if (unlikely(netpoll_tx_running(dev)))
		return macvlan_netpoll_send_skb(vlan, skb);
	// Hardware optimized code, don't worry
	if (vlan->fwd_priv) {
		skb->dev = vlan->lowerdev;
		ret = dev_queue_xmit_accel(skb, vlan->fwd_priv);
	} else {
		// Actual call to macvlan_queue_xmit
		ret = macvlan_queue_xmit(skb, dev);
	}

	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
		struct vlan_pcpu_stats *pcpu_stats;

		pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
		u64_stats_update_begin(&pcpu_stats->syncp);
		pcpu_stats->tx_packets++;
		pcpu_stats->tx_bytes += len;
		u64_stats_update_end(&pcpu_stats->syncp);
	} else {
		this_cpu_inc(vlan->pcpu_stats->tx_dropped);
	}
	return ret;
}


static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
{
	const struct macvlan_dev *vlan = netdev_priv(dev);
	const struct macvlan_port *port = vlan->port;
	const struct macvlan_dev *dest;
	// Bridge mode: if the message is sent to the macvlan interface of bridge mode derived from the same host interface, forwarding is prohibited.
	if (vlan->mode == MACVLAN_MODE_BRIDGE) {
		const struct ethhdr *eth = (void *)skb->data;

		/* send to other bridge ports directly */
		// It can be seen that only bridge mode can send broadcast messages to the derivative port of bridge mode
		if (is_multicast_ether_addr(eth->h_dest)) {
			macvlan_broadcast(skb, port, dev, MACVLAN_MODE_BRIDGE);
			goto xmit_world;
		}

		dest = macvlan_hash_lookup(port, eth->h_dest);
		// It can be seen that only bridge mode can send unicast message to the derived port of bridge mode
		if (dest && dest->mode == MACVLAN_MODE_BRIDGE) {
			/* send to lowerdev first for its network taps */
			dev_forward_skb(vlan->lowerdev, skb);

			return NET_XMIT_SUCCESS;
		}
	}

xmit_world:
	// In other cases, it is sent directly through the host physical interface, so the performance of the macvlan interface is still very high. In addition to the simple table lookup above, there will be no additional packaging, which is very close to the physical interface.
	skb->dev = vlan->lowerdev;
	return dev_queue_xmit(skb);
}

Topics: Linux network Network Protocol