dev.c source code [linux/net/core/dev.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* NET3 Protocol independent device support routines.
4	*
5	* Derived from the non IP parts of dev.c 1.0.19
6	* Authors: Ross Biro
7	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8	* Mark Evans, <evansmp@uhura.aston.ac.uk>
9	*
10	* Additional Authors:
11	* Florian la Roche <rzsfl@rz.uni-sb.de>
12	* Alan Cox <gw4pts@gw4pts.ampr.org>
13	* David Hinds <dahinds@users.sourceforge.net>
14	* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
15	* Adam Sulmicki <adam@cfar.umd.edu>
16	* Pekka Riikonen <priikone@poesidon.pspt.fi>
17	*
18	* Changes:
19	* D.J. Barrow : Fixed bug where dev->refcnt gets set
20	* to 2 if register_netdev gets called
21	* before net_dev_init & also removed a
22	* few lines of code in the process.
23	* Alan Cox : device private ioctl copies fields back.
24	* Alan Cox : Transmit queue code does relevant
25	* stunts to keep the queue safe.
26	* Alan Cox : Fixed double lock.
27	* Alan Cox : Fixed promisc NULL pointer trap
28	* ???????? : Support the full private ioctl range
29	* Alan Cox : Moved ioctl permission check into
30	* drivers
31	* Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
32	* Alan Cox : 100 backlog just doesn't cut it when
33	* you start doing multicast video 8)
34	* Alan Cox : Rewrote net_bh and list manager.
35	* Alan Cox : Fix ETH_P_ALL echoback lengths.
36	* Alan Cox : Took out transmit every packet pass
37	* Saved a few bytes in the ioctl handler
38	* Alan Cox : Network driver sets packet type before
39	* calling netif_rx. Saves a function
40	* call a packet.
41	* Alan Cox : Hashed net_bh()
42	* Richard Kooijman: Timestamp fixes.
43	* Alan Cox : Wrong field in SIOCGIFDSTADDR
44	* Alan Cox : Device lock protection.
45	* Alan Cox : Fixed nasty side effect of device close
46	* changes.
47	* Rudi Cilibrasi : Pass the right thing to
48	* set_mac_address()
49	* Dave Miller : 32bit quantity for the device lock to
50	* make it work out on a Sparc.
51	* Bjorn Ekwall : Added KERNELD hack.
52	* Alan Cox : Cleaned up the backlog initialise.
53	* Craig Metz : SIOCGIFCONF fix if space for under
54	* 1 device.
55	* Thomas Bogendoerfer : Return ENODEV for dev_open, if there
56	* is no device open function.
57	* Andi Kleen : Fix error reporting for SIOCGIFCONF
58	* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
59	* Cyrus Durgin : Cleaned for KMOD
60	* Adam Sulmicki : Bug Fix : Network Device Unload
61	* A network device unload needs to purge
62	* the backlog queue.
63	* Paul Rusty Russell : SIOCSIFNAME
64	* Pekka Riikonen : Netdev boot-time settings code
65	* Andrew Morton : Make unregister_netdevice wait
66	* indefinitely on dev->refcnt
67	* J Hadi Salim : - Backlog queue sampling
68	* - netif_rx() feedback
69	*/
70
71	#include <linux/uaccess.h>
72	#include <linux/bitmap.h>
73	#include <linux/capability.h>
74	#include <linux/cpu.h>
75	#include <linux/types.h>
76	#include <linux/kernel.h>
77	#include <linux/hash.h>
78	#include <linux/slab.h>
79	#include <linux/sched.h>
80	#include <linux/sched/mm.h>
81	#include <linux/mutex.h>
82	#include <linux/rwsem.h>
83	#include <linux/string.h>
84	#include <linux/mm.h>
85	#include <linux/socket.h>
86	#include <linux/sockios.h>
87	#include <linux/errno.h>
88	#include <linux/interrupt.h>
89	#include <linux/if_ether.h>
90	#include <linux/netdevice.h>
91	#include <linux/etherdevice.h>
92	#include <linux/ethtool.h>
93	#include <linux/skbuff.h>
94	#include <linux/kthread.h>
95	#include <linux/bpf.h>
96	#include <linux/bpf_trace.h>
97	#include <net/net_namespace.h>
98	#include <net/sock.h>
99	#include <net/busy_poll.h>
100	#include <linux/rtnetlink.h>
101	#include <linux/stat.h>
102	#include <net/dsa.h>
103	#include <net/dst.h>
104	#include <net/dst_metadata.h>
105	#include <net/gro.h>
106	#include <net/pkt_sched.h>
107	#include <net/pkt_cls.h>
108	#include <net/checksum.h>
109	#include <net/xfrm.h>
110	#include <net/tcx.h>
111	#include <linux/highmem.h>
112	#include <linux/init.h>
113	#include <linux/module.h>
114	#include <linux/netpoll.h>
115	#include <linux/rcupdate.h>
116	#include <linux/delay.h>
117	#include <net/iw_handler.h>
118	#include <asm/current.h>
119	#include <linux/audit.h>
120	#include <linux/dmaengine.h>
121	#include <linux/err.h>
122	#include <linux/ctype.h>
123	#include <linux/if_arp.h>
124	#include <linux/if_vlan.h>
125	#include <linux/ip.h>
126	#include <net/ip.h>
127	#include <net/mpls.h>
128	#include <linux/ipv6.h>
129	#include <linux/in.h>
130	#include <linux/jhash.h>
131	#include <linux/random.h>
132	#include <trace/events/napi.h>
133	#include <trace/events/net.h>
134	#include <trace/events/skb.h>
135	#include <trace/events/qdisc.h>
136	#include <trace/events/xdp.h>
137	#include <linux/inetdevice.h>
138	#include <linux/cpu_rmap.h>
139	#include <linux/static_key.h>
140	#include <linux/hashtable.h>
141	#include <linux/vmalloc.h>
142	#include <linux/if_macvlan.h>
143	#include <linux/errqueue.h>
144	#include <linux/hrtimer.h>
145	#include <linux/netfilter_netdev.h>
146	#include <linux/crash_dump.h>
147	#include <linux/sctp.h>
148	#include <net/udp_tunnel.h>
149	#include <linux/net_namespace.h>
150	#include <linux/indirect_call_wrapper.h>
151	#include <net/devlink.h>
152	#include <linux/pm_runtime.h>
153	#include <linux/prandom.h>
154	#include <linux/once_lite.h>
155	#include <net/netdev_rx_queue.h>
156	#include <net/page_pool/types.h>
157	#include <net/page_pool/helpers.h>
158	#include <net/rps.h>
159
160	#include "dev.h"
161	#include "net-sysfs.h"
162
163	static DEFINE_SPINLOCK(ptype_lock);
164	struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
165
166	static int netif_rx_internal(struct sk_buff *skb);
167	static int call_netdevice_notifiers_extack(unsigned long val,
168	struct net_device *dev,
169	struct netlink_ext_ack *extack);
170
171	static DEFINE_MUTEX(ifalias_mutex);
172
173	/ protects napi_hash addition/deletion and napi_gen_id /
174	static DEFINE_SPINLOCK(napi_hash_lock);
175
176	static unsigned int napi_gen_id = NR_CPUS;
177	static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, `8`);
178
179	static DECLARE_RWSEM(devnet_rename_sem);
180
181	static inline void dev_base_seq_inc(struct net *net)
182	{
183	unsigned int val = net->dev_base_seq + `1`;
184
185	WRITE_ONCE(net->dev_base_seq, val ?: `1`);
186	}
187
188	static inline struct hlist_head dev_name_hash(struct* net net, const* char *name)
189	{
190	unsigned int hash = full_name_hash(salt: net, name, strnlen(p: name, IFNAMSIZ));
191
192	return &net->dev_name_head[hash_32(val: hash, NETDEV_HASHBITS)];
193	}
194
195	static inline struct hlist_head dev_index_hash(struct* net net, int* ifindex)
196	{
197	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - `1`)];
198	}
199
200	static inline void rps_lock_irqsave(struct softnet_data *sd,
201	unsigned long *flags)
202	{
203	if (IS_ENABLED(CONFIG_RPS))
204	spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
205	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
206	local_irq_save(*flags);
207	}
208
209	static inline void rps_lock_irq_disable(struct softnet_data *sd)
210	{
211	if (IS_ENABLED(CONFIG_RPS))
212	spin_lock_irq(lock: &sd->input_pkt_queue.lock);
213	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
214	local_irq_disable();
215	}
216
217	static inline void rps_unlock_irq_restore(struct softnet_data *sd,
218	unsigned long *flags)
219	{
220	if (IS_ENABLED(CONFIG_RPS))
221	spin_unlock_irqrestore(lock: &sd->input_pkt_queue.lock, flags: *flags);
222	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
223	local_irq_restore(*flags);
224	}
225
226	static inline void rps_unlock_irq_enable(struct softnet_data *sd)
227	{
228	if (IS_ENABLED(CONFIG_RPS))
229	spin_unlock_irq(lock: &sd->input_pkt_queue.lock);
230	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
231	local_irq_enable();
232	}
233
234	static struct netdev_name_node netdev_name_node_alloc(struct* net_device *dev,
235	const char *name)
236	{
237	struct netdev_name_node *name_node;
238
239	name_node = kmalloc(size: sizeof(*name_node), GFP_KERNEL);
240	if (!name_node)
241	return NULL;
242	INIT_HLIST_NODE(h: &name_node->hlist);
243	name_node->dev = dev;
244	name_node->name = name;
245	return name_node;
246	}
247
248	static struct netdev_name_node *
249	netdev_name_node_head_alloc(struct net_device *dev)
250	{
251	struct netdev_name_node *name_node;
252
253	name_node = netdev_name_node_alloc(dev, name: dev->name);
254	if (!name_node)
255	return NULL;
256	INIT_LIST_HEAD(list: &name_node->list);
257	return name_node;
258	}
259
260	static void netdev_name_node_free(struct netdev_name_node *name_node)
261	{
262	kfree(objp: name_node);
263	}
264
265	static void netdev_name_node_add(struct net *net,
266	struct netdev_name_node *name_node)
267	{
268	hlist_add_head_rcu(n: &name_node->hlist,
269	h: dev_name_hash(net, name: name_node->name));
270	}
271
272	static void netdev_name_node_del(struct netdev_name_node *name_node)
273	{
274	hlist_del_rcu(n: &name_node->hlist);
275	}
276
277	static struct netdev_name_node netdev_name_node_lookup(struct* net *net,
278	const char *name)
279	{
280	struct hlist_head *head = dev_name_hash(net, name);
281	struct netdev_name_node *name_node;
282
283	hlist_for_each_entry(name_node, head, hlist)
284	if (!strcmp(name_node->name, name))
285	return name_node;
286	return NULL;
287	}
288
289	static struct netdev_name_node netdev_name_node_lookup_rcu(struct* net *net,
290	const char *name)
291	{
292	struct hlist_head *head = dev_name_hash(net, name);
293	struct netdev_name_node *name_node;
294
295	hlist_for_each_entry_rcu(name_node, head, hlist)
296	if (!strcmp(name_node->name, name))
297	return name_node;
298	return NULL;
299	}
300
301	bool netdev_name_in_use(struct net net, const* char *name)
302	{
303	return netdev_name_node_lookup(net, name);
304	}
305	EXPORT_SYMBOL(netdev_name_in_use);
306
307	int netdev_name_node_alt_create(struct net_device dev, const* char *name)
308	{
309	struct netdev_name_node *name_node;
310	struct net *net = dev_net(dev);
311
312	name_node = netdev_name_node_lookup(net, name);
313	if (name_node)
314	return -EEXIST;
315	name_node = netdev_name_node_alloc(dev, name);
316	if (!name_node)
317	return -ENOMEM;
318	netdev_name_node_add(net, name_node);
319	/ The node that holds dev->name acts as a head of per-device list. /
320	list_add_tail_rcu(new: &name_node->list, head: &dev->name_node->list);
321
322	return `0`;
323	}
324
325	static void netdev_name_node_alt_free(struct rcu_head *head)
326	{
327	struct netdev_name_node *name_node =
328	container_of(head, struct netdev_name_node, rcu);
329
330	kfree(objp: name_node->name);
331	netdev_name_node_free(name_node);
332	}
333
334	static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
335	{
336	netdev_name_node_del(name_node);
337	list_del(entry: &name_node->list);
338	call_rcu(head: &name_node->rcu, func: netdev_name_node_alt_free);
339	}
340
341	int netdev_name_node_alt_destroy(struct net_device dev, const* char *name)
342	{
343	struct netdev_name_node *name_node;
344	struct net *net = dev_net(dev);
345
346	name_node = netdev_name_node_lookup(net, name);
347	if (!name_node)
348	return -ENOENT;
349	/ lookup might have found our primary name or a name belonging*
350	* to another device.
351	*/
352	if (name_node == dev->name_node \|\| name_node->dev != dev)
353	return -EINVAL;
354
355	__netdev_name_node_alt_destroy(name_node);
356	return `0`;
357	}
358
359	static void netdev_name_node_alt_flush(struct net_device *dev)
360	{
361	struct netdev_name_node name_node, tmp;
362
363	list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) {
364	list_del(entry: &name_node->list);
365	netdev_name_node_alt_free(head: &name_node->rcu);
366	}
367	}
368
369	/ Device list insertion /
370	static void list_netdevice(struct net_device *dev)
371	{
372	struct netdev_name_node *name_node;
373	struct net *net = dev_net(dev);
374
375	ASSERT_RTNL();
376
377	list_add_tail_rcu(new: &dev->dev_list, head: &net->dev_base_head);
378	netdev_name_node_add(net, name_node: dev->name_node);
379	hlist_add_head_rcu(n: &dev->index_hlist,
380	h: dev_index_hash(net, ifindex: dev->ifindex));
381
382	netdev_for_each_altname(dev, name_node)
383	netdev_name_node_add(net, name_node);
384
385	/ We reserved the ifindex, this can't fail /
386	WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
387
388	dev_base_seq_inc(net);
389	}
390
391	/ Device list removal*
392	* caller must respect a RCU grace period before freeing/reusing dev
393	*/
394	static void unlist_netdevice(struct net_device *dev)
395	{
396	struct netdev_name_node *name_node;
397	struct net *net = dev_net(dev);
398
399	ASSERT_RTNL();
400
401	xa_erase(&net->dev_by_index, index: dev->ifindex);
402
403	netdev_for_each_altname(dev, name_node)
404	netdev_name_node_del(name_node);
405
406	/ Unlink dev from the device chain /
407	list_del_rcu(entry: &dev->dev_list);
408	netdev_name_node_del(name_node: dev->name_node);
409	hlist_del_rcu(n: &dev->index_hlist);
410
411	dev_base_seq_inc(net: dev_net(dev));
412	}
413
414	/*
415	* Our notifier list
416	*/
417
418	static RAW_NOTIFIER_HEAD(netdev_chain);
419
420	/*
421	* Device drivers call our routines to queue packets here. We empty the
422	* queue in the local softnet handler.
423	*/
424
425	DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
426	EXPORT_PER_CPU_SYMBOL(softnet_data);
427
428	/ Page_pool has a lockless array/stack to alloc/recycle pages.*
429	* PP consumers must pay attention to run APIs in the appropriate context
430	* (e.g. NAPI context).
431	*/
432	static DEFINE_PER_CPU(struct page_pool *, system_page_pool);
433
434	#ifdef CONFIG_LOCKDEP
435	/*
436	* register_netdevice() inits txq->_xmit_lock and sets lockdep class
437	* according to dev->type
438	*/
439	static const unsigned short netdev_lock_type[] = {
440	ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
441	ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
442	ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
443	ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
444	ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
445	ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
446	ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
447	ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
448	ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
449	ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
450	ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
451	ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
452	ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
453	ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
454	ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
455
456	static const char *const netdev_lock_name[] = {
457	"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
458	"_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
459	"_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
460	"_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
461	"_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
462	"_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
463	"_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
464	"_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
465	"_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
466	"_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
467	"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
468	"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
469	"_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
470	"_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
471	"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
472
473	static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
474	static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
475
476	static inline unsigned short netdev_lock_pos(unsigned short dev_type)
477	{
478	int i;
479
480	for (i = `0`; i < ARRAY_SIZE(netdev_lock_type); i++)
481	if (netdev_lock_type[i] == dev_type)
482	return i;
483	/ the last key is used by default /
484	return ARRAY_SIZE(netdev_lock_type) - `1`;
485	}
486
487	static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
488	unsigned short dev_type)
489	{
490	int i;
491
492	i = netdev_lock_pos(dev_type);
493	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
494	netdev_lock_name[i]);
495	}
496
497	static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
498	{
499	int i;
500
501	i = netdev_lock_pos(dev_type: dev->type);
502	lockdep_set_class_and_name(&dev->addr_list_lock,
503	&netdev_addr_lock_key[i],
504	netdev_lock_name[i]);
505	}
506	#else
507	static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
508	unsigned short dev_type)
509	{
510	}
511
512	static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
513	{
514	}
515	#endif
516
517	/*******************************************************************************
518	*
519	* Protocol management and registration routines
520	*
521	*******************************************************************************/
522
523
524	/*
525	* Add a protocol ID to the list. Now that the input handler is
526	* smarter we can dispense with all the messy stuff that used to be
527	* here.
528	*
529	* BEWARE!!! Protocol handlers, mangling input packets,
530	* MUST BE last in hash buckets and checking protocol handlers
531	* MUST start from promiscuous ptype_all chain in net_bh.
532	* It is true now, do not change it.
533	* Explanation follows: if protocol handler, mangling packet, will
534	* be the first on list, it is not able to sense, that packet
535	* is cloned and should be copied-on-write, so that it will
536	* change it and subsequent readers will get broken packet.
537	* --ANK (980803)
538	*/
539
540	static inline struct list_head ptype_head(const* struct packet_type *pt)
541	{
542	if (pt->type == htons(ETH_P_ALL))
543	return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all;
544	else
545	return pt->dev ? &pt->dev->ptype_specific :
546	&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
547	}
548
549	/**
550	* dev_add_pack - add packet handler
551	* @pt: packet type declaration
552	*
553	* Add a protocol handler to the networking stack. The passed &packet_type
554	* is linked into kernel lists and may not be freed until it has been
555	* removed from the kernel lists.
556	*
557	* This call does not sleep therefore it can not
558	* guarantee all CPU's that are in middle of receiving packets
559	* will see the new packet type (until the next received packet).
560	*/
561
562	void dev_add_pack(struct packet_type *pt)
563	{
564	struct list_head *head = ptype_head(pt);
565
566	spin_lock(lock: &ptype_lock);
567	list_add_rcu(new: &pt->list, head);
568	spin_unlock(lock: &ptype_lock);
569	}
570	EXPORT_SYMBOL(dev_add_pack);
571
572	/**
573	* __dev_remove_pack - remove packet handler
574	* @pt: packet type declaration
575	*
576	* Remove a protocol handler that was previously added to the kernel
577	* protocol handlers by dev_add_pack(). The passed &packet_type is removed
578	* from the kernel lists and can be freed or reused once this function
579	* returns.
580	*
581	* The packet type might still be in use by receivers
582	* and must not be freed until after all the CPU's have gone
583	* through a quiescent state.
584	*/
585	void __dev_remove_pack(struct packet_type *pt)
586	{
587	struct list_head *head = ptype_head(pt);
588	struct packet_type *pt1;
589
590	spin_lock(lock: &ptype_lock);
591
592	list_for_each_entry(pt1, head, list) {
593	if (pt == pt1) {
594	list_del_rcu(entry: &pt->list);
595	goto out;
596	}
597	}
598
599	pr_warn("dev_remove_pack: %p not found\n", pt);
600	out:
601	spin_unlock(lock: &ptype_lock);
602	}
603	EXPORT_SYMBOL(__dev_remove_pack);
604
605	/**
606	* dev_remove_pack - remove packet handler
607	* @pt: packet type declaration
608	*
609	* Remove a protocol handler that was previously added to the kernel
610	* protocol handlers by dev_add_pack(). The passed &packet_type is removed
611	* from the kernel lists and can be freed or reused once this function
612	* returns.
613	*
614	* This call sleeps to guarantee that no CPU is looking at the packet
615	* type after return.
616	*/
617	void dev_remove_pack(struct packet_type *pt)
618	{
619	__dev_remove_pack(pt);
620
621	synchronize_net();
622	}
623	EXPORT_SYMBOL(dev_remove_pack);
624
625
626	/*******************************************************************************
627	*
628	* Device Interface Subroutines
629	*
630	*******************************************************************************/
631
632	/**
633	* dev_get_iflink - get 'iflink' value of a interface
634	* @dev: targeted interface
635	*
636	* Indicates the ifindex the interface is linked to.
637	* Physical interfaces have the same 'ifindex' and 'iflink' values.
638	*/
639
640	int dev_get_iflink(const struct net_device *dev)
641	{
642	if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
643	return dev->netdev_ops->ndo_get_iflink(dev);
644
645	return READ_ONCE(dev->ifindex);
646	}
647	EXPORT_SYMBOL(dev_get_iflink);
648
649	/**
650	* dev_fill_metadata_dst - Retrieve tunnel egress information.
651	* @dev: targeted interface
652	* @skb: The packet.
653	*
654	* For better visibility of tunnel traffic OVS needs to retrieve
655	* egress tunnel information for a packet. Following API allows
656	* user to get this info.
657	*/
658	int dev_fill_metadata_dst(struct net_device dev, struct* sk_buff *skb)
659	{
660	struct ip_tunnel_info *info;
661
662	if (!dev->netdev_ops \|\| !dev->netdev_ops->ndo_fill_metadata_dst)
663	return -EINVAL;
664
665	info = skb_tunnel_info_unclone(skb);
666	if (!info)
667	return -ENOMEM;
668	if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
669	return -EINVAL;
670
671	return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
672	}
673	EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
674
675	static struct net_device_path dev_fwd_path(struct* net_device_path_stack *stack)
676	{
677	int k = stack->num_paths++;
678
679	if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
680	return NULL;
681
682	return &stack->path[k];
683	}
684
685	int dev_fill_forward_path(const struct net_device dev, const* u8 *daddr,
686	struct net_device_path_stack *stack)
687	{
688	const struct net_device *last_dev;
689	struct net_device_path_ctx ctx = {
690	.dev = dev,
691	};
692	struct net_device_path *path;
693	int ret = `0`;
694
695	memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
696	stack->num_paths = `0`;
697	while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
698	last_dev = ctx.dev;
699	path = dev_fwd_path(stack);
700	if (!path)
701	return -`1`;
702
703	memset(path, `0`, sizeof(struct net_device_path));
704	ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
705	if (ret < `0`)
706	return -`1`;
707
708	if (WARN_ON_ONCE(last_dev == ctx.dev))
709	return -`1`;
710	}
711
712	if (!ctx.dev)
713	return ret;
714
715	path = dev_fwd_path(stack);
716	if (!path)
717	return -`1`;
718	path->type = DEV_PATH_ETHERNET;
719	path->dev = ctx.dev;
720
721	return ret;
722	}
723	EXPORT_SYMBOL_GPL(dev_fill_forward_path);
724
725	/**
726	* __dev_get_by_name - find a device by its name
727	* @net: the applicable net namespace
728	* @name: name to find
729	*
730	* Find an interface by name. Must be called under RTNL semaphore.
731	* If the name is found a pointer to the device is returned.
732	* If the name is not found then %NULL is returned. The
733	* reference counters are not incremented so the caller must be
734	* careful with locks.
735	*/
736
737	struct net_device __dev_get_by_name(struct* net net, const* char *name)
738	{
739	struct netdev_name_node *node_name;
740
741	node_name = netdev_name_node_lookup(net, name);
742	return node_name ? node_name->dev : NULL;
743	}
744	EXPORT_SYMBOL(__dev_get_by_name);
745
746	/**
747	* dev_get_by_name_rcu - find a device by its name
748	* @net: the applicable net namespace
749	* @name: name to find
750	*
751	* Find an interface by name.
752	* If the name is found a pointer to the device is returned.
753	* If the name is not found then %NULL is returned.
754	* The reference counters are not incremented so the caller must be
755	* careful with locks. The caller must hold RCU lock.
756	*/
757
758	struct net_device dev_get_by_name_rcu(struct* net net, const* char *name)
759	{
760	struct netdev_name_node *node_name;
761
762	node_name = netdev_name_node_lookup_rcu(net, name);
763	return node_name ? node_name->dev : NULL;
764	}
765	EXPORT_SYMBOL(dev_get_by_name_rcu);
766
767	/ Deprecated for new users, call netdev_get_by_name() instead /
768	struct net_device dev_get_by_name(struct* net net, const* char *name)
769	{
770	struct net_device *dev;
771
772	rcu_read_lock();
773	dev = dev_get_by_name_rcu(net, name);
774	dev_hold(dev);
775	rcu_read_unlock();
776	return dev;
777	}
778	EXPORT_SYMBOL(dev_get_by_name);
779
780	/**
781	* netdev_get_by_name() - find a device by its name
782	* @net: the applicable net namespace
783	* @name: name to find
784	* @tracker: tracking object for the acquired reference
785	* @gfp: allocation flags for the tracker
786	*
787	* Find an interface by name. This can be called from any
788	* context and does its own locking. The returned handle has
789	* the usage count incremented and the caller must use netdev_put() to
790	* release it when it is no longer needed. %NULL is returned if no
791	* matching device is found.
792	*/
793	struct net_device netdev_get_by_name(struct* net net, const* char *name,
794	netdevice_tracker *tracker, gfp_t gfp)
795	{
796	struct net_device *dev;
797
798	dev = dev_get_by_name(net, name);
799	if (dev)
800	netdev_tracker_alloc(dev, tracker, gfp);
801	return dev;
802	}
803	EXPORT_SYMBOL(netdev_get_by_name);
804
805	/**
806	* __dev_get_by_index - find a device by its ifindex
807	* @net: the applicable net namespace
808	* @ifindex: index of device
809	*
810	* Search for an interface by index. Returns %NULL if the device
811	* is not found or a pointer to the device. The device has not
812	* had its reference counter increased so the caller must be careful
813	* about locking. The caller must hold the RTNL semaphore.
814	*/
815
816	struct net_device __dev_get_by_index(struct* net net, int* ifindex)
817	{
818	struct net_device *dev;
819	struct hlist_head *head = dev_index_hash(net, ifindex);
820
821	hlist_for_each_entry(dev, head, index_hlist)
822	if (dev->ifindex == ifindex)
823	return dev;
824
825	return NULL;
826	}
827	EXPORT_SYMBOL(__dev_get_by_index);
828
829	/**
830	* dev_get_by_index_rcu - find a device by its ifindex
831	* @net: the applicable net namespace
832	* @ifindex: index of device
833	*
834	* Search for an interface by index. Returns %NULL if the device
835	* is not found or a pointer to the device. The device has not
836	* had its reference counter increased so the caller must be careful
837	* about locking. The caller must hold RCU lock.
838	*/
839
840	struct net_device dev_get_by_index_rcu(struct* net net, int* ifindex)
841	{
842	struct net_device *dev;
843	struct hlist_head *head = dev_index_hash(net, ifindex);
844
845	hlist_for_each_entry_rcu(dev, head, index_hlist)
846	if (dev->ifindex == ifindex)
847	return dev;
848
849	return NULL;
850	}
851	EXPORT_SYMBOL(dev_get_by_index_rcu);
852
853	/ Deprecated for new users, call netdev_get_by_index() instead /
854	struct net_device dev_get_by_index(struct* net net, int* ifindex)
855	{
856	struct net_device *dev;
857
858	rcu_read_lock();
859	dev = dev_get_by_index_rcu(net, ifindex);
860	dev_hold(dev);
861	rcu_read_unlock();
862	return dev;
863	}
864	EXPORT_SYMBOL(dev_get_by_index);
865
866	/**
867	* netdev_get_by_index() - find a device by its ifindex
868	* @net: the applicable net namespace
869	* @ifindex: index of device
870	* @tracker: tracking object for the acquired reference
871	* @gfp: allocation flags for the tracker
872	*
873	* Search for an interface by index. Returns NULL if the device
874	* is not found or a pointer to the device. The device returned has
875	* had a reference added and the pointer is safe until the user calls
876	* netdev_put() to indicate they have finished with it.
877	*/
878	struct net_device netdev_get_by_index(struct* net net, int* ifindex,
879	netdevice_tracker *tracker, gfp_t gfp)
880	{
881	struct net_device *dev;
882
883	dev = dev_get_by_index(net, ifindex);
884	if (dev)
885	netdev_tracker_alloc(dev, tracker, gfp);
886	return dev;
887	}
888	EXPORT_SYMBOL(netdev_get_by_index);
889
890	/**
891	* dev_get_by_napi_id - find a device by napi_id
892	* @napi_id: ID of the NAPI struct
893	*
894	* Search for an interface by NAPI ID. Returns %NULL if the device
895	* is not found or a pointer to the device. The device has not had
896	* its reference counter increased so the caller must be careful
897	* about locking. The caller must hold RCU lock.
898	*/
899
900	struct net_device dev_get_by_napi_id(unsigned* int napi_id)
901	{
902	struct napi_struct *napi;
903
904	WARN_ON_ONCE(!rcu_read_lock_held());
905
906	if (napi_id < MIN_NAPI_ID)
907	return NULL;
908
909	napi = napi_by_id(napi_id);
910
911	return napi ? napi->dev : NULL;
912	}
913	EXPORT_SYMBOL(dev_get_by_napi_id);
914
915	/**
916	* netdev_get_name - get a netdevice name, knowing its ifindex.
917	* @net: network namespace
918	* @name: a pointer to the buffer where the name will be stored.
919	* @ifindex: the ifindex of the interface to get the name from.
920	*/
921	int netdev_get_name(struct net net, char* name, int* ifindex)
922	{
923	struct net_device *dev;
924	int ret;
925
926	down_read(sem: &devnet_rename_sem);
927	rcu_read_lock();
928
929	dev = dev_get_by_index_rcu(net, ifindex);
930	if (!dev) {
931	ret = -ENODEV;
932	goto out;
933	}
934
935	strcpy(p: name, q: dev->name);
936
937	ret = `0`;
938	out:
939	rcu_read_unlock();
940	up_read(sem: &devnet_rename_sem);
941	return ret;
942	}
943
944	/**
945	* dev_getbyhwaddr_rcu - find a device by its hardware address
946	* @net: the applicable net namespace
947	* @type: media type of device
948	* @ha: hardware address
949	*
950	* Search for an interface by MAC address. Returns NULL if the device
951	* is not found or a pointer to the device.
952	* The caller must hold RCU or RTNL.
953	* The returned device has not had its ref count increased
954	* and the caller must therefore be careful about locking
955	*
956	*/
957
958	struct net_device dev_getbyhwaddr_rcu(struct* net net, unsigned* short type,
959	const char *ha)
960	{
961	struct net_device *dev;
962
963	for_each_netdev_rcu(net, dev)
964	if (dev->type == type &&
965	!memcmp(p: dev->dev_addr, q: ha, size: dev->addr_len))
966	return dev;
967
968	return NULL;
969	}
970	EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
971
972	struct net_device dev_getfirstbyhwtype(struct* net net, unsigned* short type)
973	{
974	struct net_device dev, ret = NULL;
975
976	rcu_read_lock();
977	for_each_netdev_rcu(net, dev)
978	if (dev->type == type) {
979	dev_hold(dev);
980	ret = dev;
981	break;
982	}
983	rcu_read_unlock();
984	return ret;
985	}
986	EXPORT_SYMBOL(dev_getfirstbyhwtype);
987
988	/**
989	* __dev_get_by_flags - find any device with given flags
990	* @net: the applicable net namespace
991	* @if_flags: IFF_* values
992	* @mask: bitmask of bits in if_flags to check
993	*
994	* Search for any interface with the given flags. Returns NULL if a device
995	* is not found or a pointer to the device. Must be called inside
996	* rtnl_lock(), and result refcount is unchanged.
997	*/
998
999	struct net_device __dev_get_by_flags(struct* net net, unsigned* short if_flags,
1000	unsigned short mask)
1001	{
1002	struct net_device dev, ret;
1003
1004	ASSERT_RTNL();
1005
1006	ret = NULL;
1007	for_each_netdev(net, dev) {
1008	if (((dev->flags ^ if_flags) & mask) == `0`) {
1009	ret = dev;
1010	break;
1011	}
1012	}
1013	return ret;
1014	}
1015	EXPORT_SYMBOL(__dev_get_by_flags);
1016
1017	/**
1018	* dev_valid_name - check if name is okay for network device
1019	* @name: name string
1020	*
1021	* Network device names need to be valid file names to
1022	* allow sysfs to work. We also disallow any kind of
1023	* whitespace.
1024	*/
1025	bool dev_valid_name(const char *name)
1026	{
1027	if (*name == `'\0'`)
1028	return false;
1029	if (strnlen(p: name, IFNAMSIZ) == IFNAMSIZ)
1030	return false;
1031	if (!strcmp(name, ".") \|\| !strcmp(name, ".."))
1032	return false;
1033
1034	while (*name) {
1035	if (name == `'/'` \|\| name == `':'` \|\| isspace(*name))
1036	return false;
1037	name++;
1038	}
1039	return true;
1040	}
1041	EXPORT_SYMBOL(dev_valid_name);
1042
1043	/**
1044	* __dev_alloc_name - allocate a name for a device
1045	* @net: network namespace to allocate the device name in
1046	* @name: name format string
1047	* @res: result name string
1048	*
1049	* Passed a format string - eg "lt%d" it will try and find a suitable
1050	* id. It scans list of devices to build up a free map, then chooses
1051	* the first empty slot. The caller must hold the dev_base or rtnl lock
1052	* while allocating the name and adding the device in order to avoid
1053	* duplicates.
1054	* Limited to bits_per_byte * page size devices (ie 32K on most platforms).
1055	* Returns the number of the unit assigned or a negative errno code.
1056	*/
1057
1058	static int __dev_alloc_name(struct net net, const* char name, char* *res)
1059	{
1060	int i = `0`;
1061	const char *p;
1062	const int max_netdevices = `8`*PAGE_SIZE;
1063	unsigned long *inuse;
1064	struct net_device *d;
1065	char buf[IFNAMSIZ];
1066
1067	/ Verify the string as this thing may have come from the user.*
1068	* There must be one "%d" and no other "%" characters.
1069	*/
1070	p = strchr(name, `'%'`);
1071	if (!p \|\| p[`1`] != `'d'` \|\| strchr(p + `2`, `'%'`))
1072	return -EINVAL;
1073
1074	/ Use one page as a bit array of possible slots /
1075	inuse = bitmap_zalloc(nbits: max_netdevices, GFP_ATOMIC);
1076	if (!inuse)
1077	return -ENOMEM;
1078
1079	for_each_netdev(net, d) {
1080	struct netdev_name_node *name_node;
1081
1082	netdev_for_each_altname(d, name_node) {
1083	if (!sscanf(name_node->name, name, &i))
1084	continue;
1085	if (i < `0` \|\| i >= max_netdevices)
1086	continue;
1087
1088	/ avoid cases where sscanf is not exact inverse of printf /
1089	snprintf(buf, IFNAMSIZ, fmt: name, i);
1090	if (!strncmp(buf, name_node->name, IFNAMSIZ))
1091	__set_bit(i, inuse);
1092	}
1093	if (!sscanf(d->name, name, &i))
1094	continue;
1095	if (i < `0` \|\| i >= max_netdevices)
1096	continue;
1097
1098	/ avoid cases where sscanf is not exact inverse of printf /
1099	snprintf(buf, IFNAMSIZ, fmt: name, i);
1100	if (!strncmp(buf, d->name, IFNAMSIZ))
1101	__set_bit(i, inuse);
1102	}
1103
1104	i = find_first_zero_bit(addr: inuse, size: max_netdevices);
1105	bitmap_free(bitmap: inuse);
1106	if (i == max_netdevices)
1107	return -ENFILE;
1108
1109	/ 'res' and 'name' could overlap, use 'buf' as an intermediate buffer /
1110	strscpy(buf, name, IFNAMSIZ);
1111	snprintf(buf: res, IFNAMSIZ, fmt: buf, i);
1112	return i;
1113	}
1114
1115	/ Returns negative errno or allocated unit id (see __dev_alloc_name()) /
1116	static int dev_prep_valid_name(struct net net, struct* net_device *dev,
1117	const char want_name, char* *out_name,
1118	int dup_errno)
1119	{
1120	if (!dev_valid_name(want_name))
1121	return -EINVAL;
1122
1123	if (strchr(want_name, `'%'`))
1124	return __dev_alloc_name(net, name: want_name, res: out_name);
1125
1126	if (netdev_name_in_use(net, want_name))
1127	return -dup_errno;
1128	if (out_name != want_name)
1129	strscpy(out_name, want_name, IFNAMSIZ);
1130	return `0`;
1131	}
1132
1133	/**
1134	* dev_alloc_name - allocate a name for a device
1135	* @dev: device
1136	* @name: name format string
1137	*
1138	* Passed a format string - eg "lt%d" it will try and find a suitable
1139	* id. It scans list of devices to build up a free map, then chooses
1140	* the first empty slot. The caller must hold the dev_base or rtnl lock
1141	* while allocating the name and adding the device in order to avoid
1142	* duplicates.
1143	* Limited to bits_per_byte * page size devices (ie 32K on most platforms).
1144	* Returns the number of the unit assigned or a negative errno code.
1145	*/
1146
1147	int dev_alloc_name(struct net_device dev, const* char *name)
1148	{
1149	return dev_prep_valid_name(net: dev_net(dev), dev, want_name: name, out_name: dev->name, ENFILE);
1150	}
1151	EXPORT_SYMBOL(dev_alloc_name);
1152
1153	static int dev_get_valid_name(struct net net, struct* net_device *dev,
1154	const char *name)
1155	{
1156	int ret;
1157
1158	ret = dev_prep_valid_name(net, dev, want_name: name, out_name: dev->name, EEXIST);
1159	return ret < `0` ? ret : `0`;
1160	}
1161
1162	/**
1163	* dev_change_name - change name of a device
1164	* @dev: device
1165	* @newname: name (or format string) must be at least IFNAMSIZ
1166	*
1167	* Change name of a device, can pass format strings "eth%d".
1168	* for wildcarding.
1169	*/
1170	int dev_change_name(struct net_device dev, const* char *newname)
1171	{
1172	unsigned char old_assign_type;
1173	char oldname[IFNAMSIZ];
1174	int err = `0`;
1175	int ret;
1176	struct net *net;
1177
1178	ASSERT_RTNL();
1179	BUG_ON(!dev_net(dev));
1180
1181	net = dev_net(dev);
1182
1183	down_write(sem: &devnet_rename_sem);
1184
1185	if (strncmp(newname, dev->name, IFNAMSIZ) == `0`) {
1186	up_write(sem: &devnet_rename_sem);
1187	return `0`;
1188	}
1189
1190	memcpy(oldname, dev->name, IFNAMSIZ);
1191
1192	err = dev_get_valid_name(net, dev, name: newname);
1193	if (err < `0`) {
1194	up_write(sem: &devnet_rename_sem);
1195	return err;
1196	}
1197
1198	if (oldname[`0`] && !strchr(oldname, `'%'`))
1199	netdev_info(dev, format: "renamed from %s%s\n", oldname,
1200	dev->flags & IFF_UP ? " (while UP)" : "");
1201
1202	old_assign_type = dev->name_assign_type;
1203	WRITE_ONCE(dev->name_assign_type, NET_NAME_RENAMED);
1204
1205	rollback:
1206	ret = device_rename(dev: &dev->dev, new_name: dev->name);
1207	if (ret) {
1208	memcpy(dev->name, oldname, IFNAMSIZ);
1209	WRITE_ONCE(dev->name_assign_type, old_assign_type);
1210	up_write(sem: &devnet_rename_sem);
1211	return ret;
1212	}
1213
1214	up_write(sem: &devnet_rename_sem);
1215
1216	netdev_adjacent_rename_links(dev, oldname);
1217
1218	netdev_name_node_del(name_node: dev->name_node);
1219
1220	synchronize_net();
1221
1222	netdev_name_node_add(net, name_node: dev->name_node);
1223
1224	ret = call_netdevice_notifiers(val: NETDEV_CHANGENAME, dev);
1225	ret = notifier_to_errno(ret);
1226
1227	if (ret) {
1228	/ err >= 0 after dev_alloc_name() or stores the first errno /
1229	if (err >= `0`) {
1230	err = ret;
1231	down_write(sem: &devnet_rename_sem);
1232	memcpy(dev->name, oldname, IFNAMSIZ);
1233	memcpy(oldname, newname, IFNAMSIZ);
1234	WRITE_ONCE(dev->name_assign_type, old_assign_type);
1235	old_assign_type = NET_NAME_RENAMED;
1236	goto rollback;
1237	} else {
1238	netdev_err(dev, format: "name change rollback failed: %d\n",
1239	ret);
1240	}
1241	}
1242
1243	return err;
1244	}
1245
1246	/**
1247	* dev_set_alias - change ifalias of a device
1248	* @dev: device
1249	* @alias: name up to IFALIASZ
1250	* @len: limit of bytes to copy from info
1251	*
1252	* Set ifalias for a device,
1253	*/
1254	int dev_set_alias(struct net_device dev, const* char *alias, size_t len)
1255	{
1256	struct dev_ifalias *new_alias = NULL;
1257
1258	if (len >= IFALIASZ)
1259	return -EINVAL;
1260
1261	if (len) {
1262	new_alias = kmalloc(size: sizeof(*new_alias) + len + `1`, GFP_KERNEL);
1263	if (!new_alias)
1264	return -ENOMEM;
1265
1266	memcpy(new_alias->ifalias, alias, len);
1267	new_alias->ifalias[len] = `0`;
1268	}
1269
1270	mutex_lock(&ifalias_mutex);
1271	new_alias = rcu_replace_pointer(dev->ifalias, new_alias,
1272	mutex_is_locked(&ifalias_mutex));
1273	mutex_unlock(lock: &ifalias_mutex);
1274
1275	if (new_alias)
1276	kfree_rcu(new_alias, rcuhead);
1277
1278	return len;
1279	}
1280	EXPORT_SYMBOL(dev_set_alias);
1281
1282	/**
1283	* dev_get_alias - get ifalias of a device
1284	* @dev: device
1285	* @name: buffer to store name of ifalias
1286	* @len: size of buffer
1287	*
1288	* get ifalias for a device. Caller must make sure dev cannot go
1289	* away, e.g. rcu read lock or own a reference count to device.
1290	*/
1291	int dev_get_alias(const struct net_device dev, char* *name, size_t len)
1292	{
1293	const struct dev_ifalias *alias;
1294	int ret = `0`;
1295
1296	rcu_read_lock();
1297	alias = rcu_dereference(dev->ifalias);
1298	if (alias)
1299	ret = snprintf(buf: name, size: len, fmt: "%s", alias->ifalias);
1300	rcu_read_unlock();
1301
1302	return ret;
1303	}
1304
1305	/**
1306	* netdev_features_change - device changes features
1307	* @dev: device to cause notification
1308	*
1309	* Called to indicate a device has changed features.
1310	*/
1311	void netdev_features_change(struct net_device *dev)
1312	{
1313	call_netdevice_notifiers(val: NETDEV_FEAT_CHANGE, dev);
1314	}
1315	EXPORT_SYMBOL(netdev_features_change);
1316
1317	/**
1318	* netdev_state_change - device changes state
1319	* @dev: device to cause notification
1320	*
1321	* Called to indicate a device has changed state. This function calls
1322	* the notifier chains for netdev_chain and sends a NEWLINK message
1323	* to the routing socket.
1324	*/
1325	void netdev_state_change(struct net_device *dev)
1326	{
1327	if (dev->flags & IFF_UP) {
1328	struct netdev_notifier_change_info change_info = {
1329	.info.dev = dev,
1330	};
1331
1332	call_netdevice_notifiers_info(val: NETDEV_CHANGE,
1333	info: &change_info.info);
1334	rtmsg_ifinfo(RTM_NEWLINK, dev, change: `0`, GFP_KERNEL, portid: `0`, NULL);
1335	}
1336	}
1337	EXPORT_SYMBOL(netdev_state_change);
1338
1339	/**
1340	* __netdev_notify_peers - notify network peers about existence of @dev,
1341	* to be called when rtnl lock is already held.
1342	* @dev: network device
1343	*
1344	* Generate traffic such that interested network peers are aware of
1345	* @dev, such as by generating a gratuitous ARP. This may be used when
1346	* a device wants to inform the rest of the network about some sort of
1347	* reconfiguration such as a failover event or virtual machine
1348	* migration.
1349	*/
1350	void __netdev_notify_peers(struct net_device *dev)
1351	{
1352	ASSERT_RTNL();
1353	call_netdevice_notifiers(val: NETDEV_NOTIFY_PEERS, dev);
1354	call_netdevice_notifiers(val: NETDEV_RESEND_IGMP, dev);
1355	}
1356	EXPORT_SYMBOL(__netdev_notify_peers);
1357
1358	/**
1359	* netdev_notify_peers - notify network peers about existence of @dev
1360	* @dev: network device
1361	*
1362	* Generate traffic such that interested network peers are aware of
1363	* @dev, such as by generating a gratuitous ARP. This may be used when
1364	* a device wants to inform the rest of the network about some sort of
1365	* reconfiguration such as a failover event or virtual machine
1366	* migration.
1367	*/
1368	void netdev_notify_peers(struct net_device *dev)
1369	{
1370	rtnl_lock();
1371	__netdev_notify_peers(dev);
1372	rtnl_unlock();
1373	}
1374	EXPORT_SYMBOL(netdev_notify_peers);
1375
1376	static int napi_threaded_poll(void *data);
1377
1378	static int napi_kthread_create(struct napi_struct *n)
1379	{
1380	int err = `0`;
1381
1382	/ Create and wake up the kthread once to put it in*
1383	* TASK_INTERRUPTIBLE mode to avoid the blocked task
1384	* warning and work with loadavg.
1385	*/
1386	n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
1387	n->dev->name, n->napi_id);
1388	if (IS_ERR(ptr: n->thread)) {
1389	err = PTR_ERR(ptr: n->thread);
1390	pr_err("kthread_run failed with err %d\n", err);
1391	n->thread = NULL;
1392	}
1393
1394	return err;
1395	}
1396
1397	static int __dev_open(struct net_device dev, struct* netlink_ext_ack *extack)
1398	{
1399	const struct net_device_ops *ops = dev->netdev_ops;
1400	int ret;
1401
1402	ASSERT_RTNL();
1403	dev_addr_check(dev);
1404
1405	if (!netif_device_present(dev)) {
1406	/ may be detached because parent is runtime-suspended /
1407	if (dev->dev.parent)
1408	pm_runtime_resume(dev: dev->dev.parent);
1409	if (!netif_device_present(dev))
1410	return -ENODEV;
1411	}
1412
1413	/ Block netpoll from trying to do any rx path servicing.*
1414	* If we don't do this there is a chance ndo_poll_controller
1415	* or ndo_poll may be running while we open the device
1416	*/
1417	netpoll_poll_disable(dev);
1418
1419	ret = call_netdevice_notifiers_extack(val: NETDEV_PRE_UP, dev, extack);
1420	ret = notifier_to_errno(ret);
1421	if (ret)
1422	return ret;
1423
1424	set_bit(nr: __LINK_STATE_START, addr: &dev->state);
1425
1426	if (ops->ndo_validate_addr)
1427	ret = ops->ndo_validate_addr(dev);
1428
1429	if (!ret && ops->ndo_open)
1430	ret = ops->ndo_open(dev);
1431
1432	netpoll_poll_enable(dev);
1433
1434	if (ret)
1435	clear_bit(nr: __LINK_STATE_START, addr: &dev->state);
1436	else {
1437	dev->flags \|= IFF_UP;
1438	dev_set_rx_mode(dev);
1439	dev_activate(dev);
1440	add_device_randomness(buf: dev->dev_addr, len: dev->addr_len);
1441	}
1442
1443	return ret;
1444	}
1445
1446	/**
1447	* dev_open - prepare an interface for use.
1448	* @dev: device to open
1449	* @extack: netlink extended ack
1450	*
1451	* Takes a device from down to up state. The device's private open
1452	* function is invoked and then the multicast lists are loaded. Finally
1453	* the device is moved into the up state and a %NETDEV_UP message is
1454	* sent to the netdev notifier chain.
1455	*
1456	* Calling this function on an active interface is a nop. On a failure
1457	* a negative errno code is returned.
1458	*/
1459	int dev_open(struct net_device dev, struct* netlink_ext_ack *extack)
1460	{
1461	int ret;
1462
1463	if (dev->flags & IFF_UP)
1464	return `0`;
1465
1466	ret = __dev_open(dev, extack);
1467	if (ret < `0`)
1468	return ret;
1469
1470	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP \| IFF_RUNNING, GFP_KERNEL, portid: `0`, NULL);
1471	call_netdevice_notifiers(val: NETDEV_UP, dev);
1472
1473	return ret;
1474	}
1475	EXPORT_SYMBOL(dev_open);
1476
1477	static void __dev_close_many(struct list_head *head)
1478	{
1479	struct net_device *dev;
1480
1481	ASSERT_RTNL();
1482	might_sleep();
1483
1484	list_for_each_entry(dev, head, close_list) {
1485	/ Temporarily disable netpoll until the interface is down /
1486	netpoll_poll_disable(dev);
1487
1488	call_netdevice_notifiers(val: NETDEV_GOING_DOWN, dev);
1489
1490	clear_bit(nr: __LINK_STATE_START, addr: &dev->state);
1491
1492	/ Synchronize to scheduled poll. We cannot touch poll list, it*
1493	* can be even on different cpu. So just clear netif_running().
1494	*
1495	* dev->stop() will invoke napi_disable() on all of it's
1496	* napi_struct instances on this device.
1497	*/
1498	smp_mb__after_atomic(); / Commit netif_running(). /
1499	}
1500
1501	dev_deactivate_many(head);
1502
1503	list_for_each_entry(dev, head, close_list) {
1504	const struct net_device_ops *ops = dev->netdev_ops;
1505
1506	/*
1507	* Call the device specific close. This cannot fail.
1508	* Only if device is UP
1509	*
1510	* We allow it to be called even after a DETACH hot-plug
1511	* event.
1512	*/
1513	if (ops->ndo_stop)
1514	ops->ndo_stop(dev);
1515
1516	dev->flags &= ~IFF_UP;
1517	netpoll_poll_enable(dev);
1518	}
1519	}
1520
1521	static void __dev_close(struct net_device *dev)
1522	{
1523	LIST_HEAD(single);
1524
1525	list_add(new: &dev->close_list, head: &single);
1526	__dev_close_many(head: &single);
1527	list_del(entry: &single);
1528	}
1529
1530	void dev_close_many(struct list_head *head, bool unlink)
1531	{
1532	struct net_device dev, tmp;
1533
1534	/ Remove the devices that don't need to be closed /
1535	list_for_each_entry_safe(dev, tmp, head, close_list)
1536	if (!(dev->flags & IFF_UP))
1537	list_del_init(entry: &dev->close_list);
1538
1539	__dev_close_many(head);
1540
1541	list_for_each_entry_safe(dev, tmp, head, close_list) {
1542	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP \| IFF_RUNNING, GFP_KERNEL, portid: `0`, NULL);
1543	call_netdevice_notifiers(val: NETDEV_DOWN, dev);
1544	if (unlink)
1545	list_del_init(entry: &dev->close_list);
1546	}
1547	}
1548	EXPORT_SYMBOL(dev_close_many);
1549
1550	/**
1551	* dev_close - shutdown an interface.
1552	* @dev: device to shutdown
1553	*
1554	* This function moves an active device into down state. A
1555	* %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1556	* is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1557	* chain.
1558	*/
1559	void dev_close(struct net_device *dev)
1560	{
1561	if (dev->flags & IFF_UP) {
1562	LIST_HEAD(single);
1563
1564	list_add(new: &dev->close_list, head: &single);
1565	dev_close_many(&single, true);
1566	list_del(entry: &single);
1567	}
1568	}
1569	EXPORT_SYMBOL(dev_close);
1570
1571
1572	/**
1573	* dev_disable_lro - disable Large Receive Offload on a device
1574	* @dev: device
1575	*
1576	* Disable Large Receive Offload (LRO) on a net device. Must be
1577	* called under RTNL. This is needed if received packets may be
1578	* forwarded to another interface.
1579	*/
1580	void dev_disable_lro(struct net_device *dev)
1581	{
1582	struct net_device *lower_dev;
1583	struct list_head *iter;
1584
1585	dev->wanted_features &= ~NETIF_F_LRO;
1586	netdev_update_features(dev);
1587
1588	if (unlikely(dev->features & NETIF_F_LRO))
1589	netdev_WARN(dev, "failed to disable LRO!\n");
1590
1591	netdev_for_each_lower_dev(dev, lower_dev, iter)
1592	dev_disable_lro(dev: lower_dev);
1593	}
1594	EXPORT_SYMBOL(dev_disable_lro);
1595
1596	/**
1597	* dev_disable_gro_hw - disable HW Generic Receive Offload on a device
1598	* @dev: device
1599	*
1600	* Disable HW Generic Receive Offload (GRO_HW) on a net device. Must be
1601	* called under RTNL. This is needed if Generic XDP is installed on
1602	* the device.
1603	*/
1604	static void dev_disable_gro_hw(struct net_device *dev)
1605	{
1606	dev->wanted_features &= ~NETIF_F_GRO_HW;
1607	netdev_update_features(dev);
1608
1609	if (unlikely(dev->features & NETIF_F_GRO_HW))
1610	netdev_WARN(dev, "failed to disable GRO_HW!\n");
1611	}
1612
1613	const char netdev_cmd_to_name(enum* netdev_cmd cmd)
1614	{
1615	#define N(val) \
1616	case NETDEV_##val: \
1617	return "NETDEV_" __stringify(val);
1618	switch (cmd) {
1619	N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
1620	N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
1621	N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
1622	N(POST_INIT) N(PRE_UNINIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN)
1623	N(CHANGEUPPER) N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA)
1624	N(BONDING_INFO) N(PRECHANGEUPPER) N(CHANGELOWERSTATE)
1625	N(UDP_TUNNEL_PUSH_INFO) N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
1626	N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
1627	N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
1628	N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
1629	N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
1630	N(XDP_FEAT_CHANGE)
1631	}
1632	#undef N
1633	return "UNKNOWN_NETDEV_EVENT";
1634	}
1635	EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
1636
1637	static int call_netdevice_notifier(struct notifier_block nb, unsigned* long val,
1638	struct net_device *dev)
1639	{
1640	struct netdev_notifier_info info = {
1641	.dev = dev,
1642	};
1643
1644	return nb->notifier_call(nb, val, &info);
1645	}
1646
1647	static int call_netdevice_register_notifiers(struct notifier_block *nb,
1648	struct net_device *dev)
1649	{
1650	int err;
1651
1652	err = call_netdevice_notifier(nb, val: NETDEV_REGISTER, dev);
1653	err = notifier_to_errno(ret: err);
1654	if (err)
1655	return err;
1656
1657	if (!(dev->flags & IFF_UP))
1658	return `0`;
1659
1660	call_netdevice_notifier(nb, val: NETDEV_UP, dev);
1661	return `0`;
1662	}
1663
1664	static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
1665	struct net_device *dev)
1666	{
1667	if (dev->flags & IFF_UP) {
1668	call_netdevice_notifier(nb, val: NETDEV_GOING_DOWN,
1669	dev);
1670	call_netdevice_notifier(nb, val: NETDEV_DOWN, dev);
1671	}
1672	call_netdevice_notifier(nb, val: NETDEV_UNREGISTER, dev);
1673	}
1674
1675	static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
1676	struct net *net)
1677	{
1678	struct net_device *dev;
1679	int err;
1680
1681	for_each_netdev(net, dev) {
1682	err = call_netdevice_register_notifiers(nb, dev);
1683	if (err)
1684	goto rollback;
1685	}
1686	return `0`;
1687
1688	rollback:
1689	for_each_netdev_continue_reverse(net, dev)
1690	call_netdevice_unregister_notifiers(nb, dev);
1691	return err;
1692	}
1693
1694	static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
1695	struct net *net)
1696	{
1697	struct net_device *dev;
1698
1699	for_each_netdev(net, dev)
1700	call_netdevice_unregister_notifiers(nb, dev);
1701	}
1702
1703	static int dev_boot_phase = `1`;
1704
1705	/**
1706	* register_netdevice_notifier - register a network notifier block
1707	* @nb: notifier
1708	*
1709	* Register a notifier to be called when network device events occur.
1710	* The notifier passed is linked into the kernel structures and must
1711	* not be reused until it has been unregistered. A negative errno code
1712	* is returned on a failure.
1713	*
1714	* When registered all registration and up events are replayed
1715	* to the new notifier to allow device to have a race free
1716	* view of the network device list.
1717	*/
1718
1719	int register_netdevice_notifier(struct notifier_block *nb)
1720	{
1721	struct net *net;
1722	int err;
1723
1724	/ Close race with setup_net() and cleanup_net() /
1725	down_write(sem: &pernet_ops_rwsem);
1726	rtnl_lock();
1727	err = raw_notifier_chain_register(nh: &netdev_chain, nb);
1728	if (err)
1729	goto unlock;
1730	if (dev_boot_phase)
1731	goto unlock;
1732	for_each_net(net) {
1733	err = call_netdevice_register_net_notifiers(nb, net);
1734	if (err)
1735	goto rollback;
1736	}
1737
1738	unlock:
1739	rtnl_unlock();
1740	up_write(sem: &pernet_ops_rwsem);
1741	return err;
1742
1743	rollback:
1744	for_each_net_continue_reverse(net)
1745	call_netdevice_unregister_net_notifiers(nb, net);
1746
1747	raw_notifier_chain_unregister(nh: &netdev_chain, nb);
1748	goto unlock;
1749	}
1750	EXPORT_SYMBOL(register_netdevice_notifier);
1751
1752	/**
1753	* unregister_netdevice_notifier - unregister a network notifier block
1754	* @nb: notifier
1755	*
1756	* Unregister a notifier previously registered by
1757	* register_netdevice_notifier(). The notifier is unlinked into the
1758	* kernel structures and may then be reused. A negative errno code
1759	* is returned on a failure.
1760	*
1761	* After unregistering unregister and down device events are synthesized
1762	* for all devices on the device list to the removed notifier to remove
1763	* the need for special case cleanup code.
1764	*/
1765
1766	int unregister_netdevice_notifier(struct notifier_block *nb)
1767	{
1768	struct net *net;
1769	int err;
1770
1771	/ Close race with setup_net() and cleanup_net() /
1772	down_write(sem: &pernet_ops_rwsem);
1773	rtnl_lock();
1774	err = raw_notifier_chain_unregister(nh: &netdev_chain, nb);
1775	if (err)
1776	goto unlock;
1777
1778	for_each_net(net)
1779	call_netdevice_unregister_net_notifiers(nb, net);
1780
1781	unlock:
1782	rtnl_unlock();
1783	up_write(sem: &pernet_ops_rwsem);
1784	return err;
1785	}
1786	EXPORT_SYMBOL(unregister_netdevice_notifier);
1787
1788	static int __register_netdevice_notifier_net(struct net *net,
1789	struct notifier_block *nb,
1790	bool ignore_call_fail)
1791	{
1792	int err;
1793
1794	err = raw_notifier_chain_register(nh: &net->netdev_chain, nb);
1795	if (err)
1796	return err;
1797	if (dev_boot_phase)
1798	return `0`;
1799
1800	err = call_netdevice_register_net_notifiers(nb, net);
1801	if (err && !ignore_call_fail)
1802	goto chain_unregister;
1803
1804	return `0`;
1805
1806	chain_unregister:
1807	raw_notifier_chain_unregister(nh: &net->netdev_chain, nb);
1808	return err;
1809	}
1810
1811	static int __unregister_netdevice_notifier_net(struct net *net,
1812	struct notifier_block *nb)
1813	{
1814	int err;
1815
1816	err = raw_notifier_chain_unregister(nh: &net->netdev_chain, nb);
1817	if (err)
1818	return err;
1819
1820	call_netdevice_unregister_net_notifiers(nb, net);
1821	return `0`;
1822	}
1823
1824	/**
1825	* register_netdevice_notifier_net - register a per-netns network notifier block
1826	* @net: network namespace
1827	* @nb: notifier
1828	*
1829	* Register a notifier to be called when network device events occur.
1830	* The notifier passed is linked into the kernel structures and must
1831	* not be reused until it has been unregistered. A negative errno code
1832	* is returned on a failure.
1833	*
1834	* When registered all registration and up events are replayed
1835	* to the new notifier to allow device to have a race free
1836	* view of the network device list.
1837	*/
1838
1839	int register_netdevice_notifier_net(struct net net, struct* notifier_block *nb)
1840	{
1841	int err;
1842
1843	rtnl_lock();
1844	err = __register_netdevice_notifier_net(net, nb, ignore_call_fail: false);
1845	rtnl_unlock();
1846	return err;
1847	}
1848	EXPORT_SYMBOL(register_netdevice_notifier_net);
1849
1850	/**
1851	* unregister_netdevice_notifier_net - unregister a per-netns
1852	* network notifier block
1853	* @net: network namespace
1854	* @nb: notifier
1855	*
1856	* Unregister a notifier previously registered by
1857	* register_netdevice_notifier_net(). The notifier is unlinked from the
1858	* kernel structures and may then be reused. A negative errno code
1859	* is returned on a failure.
1860	*
1861	* After unregistering unregister and down device events are synthesized
1862	* for all devices on the device list to the removed notifier to remove
1863	* the need for special case cleanup code.
1864	*/
1865
1866	int unregister_netdevice_notifier_net(struct net *net,
1867	struct notifier_block *nb)
1868	{
1869	int err;
1870
1871	rtnl_lock();
1872	err = __unregister_netdevice_notifier_net(net, nb);
1873	rtnl_unlock();
1874	return err;
1875	}
1876	EXPORT_SYMBOL(unregister_netdevice_notifier_net);
1877
1878	static void __move_netdevice_notifier_net(struct net *src_net,
1879	struct net *dst_net,
1880	struct notifier_block *nb)
1881	{
1882	__unregister_netdevice_notifier_net(net: src_net, nb);
1883	__register_netdevice_notifier_net(net: dst_net, nb, ignore_call_fail: true);
1884	}
1885
1886	int register_netdevice_notifier_dev_net(struct net_device *dev,
1887	struct notifier_block *nb,
1888	struct netdev_net_notifier *nn)
1889	{
1890	int err;
1891
1892	rtnl_lock();
1893	err = __register_netdevice_notifier_net(net: dev_net(dev), nb, ignore_call_fail: false);
1894	if (!err) {
1895	nn->nb = nb;
1896	list_add(new: &nn->list, head: &dev->net_notifier_list);
1897	}
1898	rtnl_unlock();
1899	return err;
1900	}
1901	EXPORT_SYMBOL(register_netdevice_notifier_dev_net);
1902
1903	int unregister_netdevice_notifier_dev_net(struct net_device *dev,
1904	struct notifier_block *nb,
1905	struct netdev_net_notifier *nn)
1906	{
1907	int err;
1908
1909	rtnl_lock();
1910	list_del(entry: &nn->list);
1911	err = __unregister_netdevice_notifier_net(net: dev_net(dev), nb);
1912	rtnl_unlock();
1913	return err;
1914	}
1915	EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net);
1916
1917	static void move_netdevice_notifiers_dev_net(struct net_device *dev,
1918	struct net *net)
1919	{
1920	struct netdev_net_notifier *nn;
1921
1922	list_for_each_entry(nn, &dev->net_notifier_list, list)
1923	__move_netdevice_notifier_net(src_net: dev_net(dev), dst_net: net, nb: nn->nb);
1924	}
1925
1926	/**
1927	* call_netdevice_notifiers_info - call all network notifier blocks
1928	* @val: value passed unmodified to notifier function
1929	* @info: notifier information data
1930	*
1931	* Call all network notifier blocks. Parameters and return value
1932	* are as for raw_notifier_call_chain().
1933	*/
1934
1935	int call_netdevice_notifiers_info(unsigned long val,
1936	struct netdev_notifier_info *info)
1937	{
1938	struct net *net = dev_net(dev: info->dev);
1939	int ret;
1940
1941	ASSERT_RTNL();
1942
1943	/ Run per-netns notifier block chain first, then run the global one.*
1944	* Hopefully, one day, the global one is going to be removed after
1945	* all notifier block registrators get converted to be per-netns.
1946	*/
1947	ret = raw_notifier_call_chain(nh: &net->netdev_chain, val, v: info);
1948	if (ret & NOTIFY_STOP_MASK)
1949	return ret;
1950	return raw_notifier_call_chain(nh: &netdev_chain, val, v: info);
1951	}
1952
1953	/**
1954	* call_netdevice_notifiers_info_robust - call per-netns notifier blocks
1955	* for and rollback on error
1956	* @val_up: value passed unmodified to notifier function
1957	* @val_down: value passed unmodified to the notifier function when
1958	* recovering from an error on @val_up
1959	* @info: notifier information data
1960	*
1961	* Call all per-netns network notifier blocks, but not notifier blocks on
1962	* the global notifier chain. Parameters and return value are as for
1963	* raw_notifier_call_chain_robust().
1964	*/
1965
1966	static int
1967	call_netdevice_notifiers_info_robust(unsigned long val_up,
1968	unsigned long val_down,
1969	struct netdev_notifier_info *info)
1970	{
1971	struct net *net = dev_net(dev: info->dev);
1972
1973	ASSERT_RTNL();
1974
1975	return raw_notifier_call_chain_robust(nh: &net->netdev_chain,
1976	val_up, val_down, v: info);
1977	}
1978
1979	static int call_netdevice_notifiers_extack(unsigned long val,
1980	struct net_device *dev,
1981	struct netlink_ext_ack *extack)
1982	{
1983	struct netdev_notifier_info info = {
1984	.dev = dev,
1985	.extack = extack,
1986	};
1987
1988	return call_netdevice_notifiers_info(val, info: &info);
1989	}
1990
1991	/**
1992	* call_netdevice_notifiers - call all network notifier blocks
1993	* @val: value passed unmodified to notifier function
1994	* @dev: net_device pointer passed unmodified to notifier function
1995	*
1996	* Call all network notifier blocks. Parameters and return value
1997	* are as for raw_notifier_call_chain().
1998	*/
1999
2000	int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
2001	{
2002	return call_netdevice_notifiers_extack(val, dev, NULL);
2003	}
2004	EXPORT_SYMBOL(call_netdevice_notifiers);
2005
2006	/**
2007	* call_netdevice_notifiers_mtu - call all network notifier blocks
2008	* @val: value passed unmodified to notifier function
2009	* @dev: net_device pointer passed unmodified to notifier function
2010	* @arg: additional u32 argument passed to the notifier function
2011	*
2012	* Call all network notifier blocks. Parameters and return value
2013	* are as for raw_notifier_call_chain().
2014	*/
2015	static int call_netdevice_notifiers_mtu(unsigned long val,
2016	struct net_device *dev, u32 arg)
2017	{
2018	struct netdev_notifier_info_ext info = {
2019	.info.dev = dev,
2020	.ext.mtu = arg,
2021	};
2022
2023	BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != `0`);
2024
2025	return call_netdevice_notifiers_info(val, info: &info.info);
2026	}
2027
2028	#ifdef CONFIG_NET_INGRESS
2029	static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
2030
2031	void net_inc_ingress_queue(void)
2032	{
2033	static_branch_inc(&ingress_needed_key);
2034	}
2035	EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
2036
2037	void net_dec_ingress_queue(void)
2038	{
2039	static_branch_dec(&ingress_needed_key);
2040	}
2041	EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
2042	#endif
2043
2044	#ifdef CONFIG_NET_EGRESS
2045	static DEFINE_STATIC_KEY_FALSE(egress_needed_key);
2046
2047	void net_inc_egress_queue(void)
2048	{
2049	static_branch_inc(&egress_needed_key);
2050	}
2051	EXPORT_SYMBOL_GPL(net_inc_egress_queue);
2052
2053	void net_dec_egress_queue(void)
2054	{
2055	static_branch_dec(&egress_needed_key);
2056	}
2057	EXPORT_SYMBOL_GPL(net_dec_egress_queue);
2058	#endif
2059
2060	DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
2061	EXPORT_SYMBOL(netstamp_needed_key);
2062	#ifdef CONFIG_JUMP_LABEL
2063	static atomic_t netstamp_needed_deferred;
2064	static atomic_t netstamp_wanted;
2065	static void netstamp_clear(struct work_struct *work)
2066	{
2067	int deferred = atomic_xchg(v: &netstamp_needed_deferred, new: `0`);
2068	int wanted;
2069
2070	wanted = atomic_add_return(i: deferred, v: &netstamp_wanted);
2071	if (wanted > `0`)
2072	static_branch_enable(&netstamp_needed_key);
2073	else
2074	static_branch_disable(&netstamp_needed_key);
2075	}
2076	static DECLARE_WORK(netstamp_work, netstamp_clear);
2077	#endif
2078
2079	void net_enable_timestamp(void)
2080	{
2081	#ifdef CONFIG_JUMP_LABEL
2082	int wanted = atomic_read(v: &netstamp_wanted);
2083
2084	while (wanted > `0`) {
2085	if (atomic_try_cmpxchg(v: &netstamp_wanted, old: &wanted, new: wanted + `1`))
2086	return;
2087	}
2088	atomic_inc(v: &netstamp_needed_deferred);
2089	schedule_work(work: &netstamp_work);
2090	#else
2091	static_branch_inc(&netstamp_needed_key);
2092	#endif
2093	}
2094	EXPORT_SYMBOL(net_enable_timestamp);
2095
2096	void net_disable_timestamp(void)
2097	{
2098	#ifdef CONFIG_JUMP_LABEL
2099	int wanted = atomic_read(v: &netstamp_wanted);
2100
2101	while (wanted > `1`) {
2102	if (atomic_try_cmpxchg(v: &netstamp_wanted, old: &wanted, new: wanted - `1`))
2103	return;
2104	}
2105	atomic_dec(v: &netstamp_needed_deferred);
2106	schedule_work(work: &netstamp_work);
2107	#else
2108	static_branch_dec(&netstamp_needed_key);
2109	#endif
2110	}
2111	EXPORT_SYMBOL(net_disable_timestamp);
2112
2113	static inline void net_timestamp_set(struct sk_buff *skb)
2114	{
2115	skb->tstamp = `0`;
2116	skb->mono_delivery_time = `0`;
2117	if (static_branch_unlikely(&netstamp_needed_key))
2118	skb->tstamp = ktime_get_real();
2119	}
2120
2121	#define net_timestamp_check(COND, SKB) \
2122	if (static_branch_unlikely(&netstamp_needed_key)) { \
2123	if ((COND) && !(SKB)->tstamp) \
2124	(SKB)->tstamp = ktime_get_real(); \
2125	} \
2126
2127	bool is_skb_forwardable(const struct net_device dev, const* struct sk_buff *skb)
2128	{
2129	return __is_skb_forwardable(dev, skb, check_mtu: true);
2130	}
2131	EXPORT_SYMBOL_GPL(is_skb_forwardable);
2132
2133	static int __dev_forward_skb2(struct net_device dev, struct* sk_buff *skb,
2134	bool check_mtu)
2135	{
2136	int ret = ____dev_forward_skb(dev, skb, check_mtu);
2137
2138	if (likely(!ret)) {
2139	skb->protocol = eth_type_trans(skb, dev);
2140	skb_postpull_rcsum(skb, start: eth_hdr(skb), ETH_HLEN);
2141	}
2142
2143	return ret;
2144	}
2145
2146	int __dev_forward_skb(struct net_device dev, struct* sk_buff *skb)
2147	{
2148	return __dev_forward_skb2(dev, skb, check_mtu: true);
2149	}
2150	EXPORT_SYMBOL_GPL(__dev_forward_skb);
2151
2152	/**
2153	* dev_forward_skb - loopback an skb to another netif
2154	*
2155	* @dev: destination network device
2156	* @skb: buffer to forward
2157	*
2158	* return values:
2159	* NET_RX_SUCCESS (no congestion)
2160	* NET_RX_DROP (packet was dropped, but freed)
2161	*
2162	* dev_forward_skb can be used for injecting an skb from the
2163	* start_xmit function of one device into the receive queue
2164	* of another device.
2165	*
2166	* The receiving device may be in another namespace, so
2167	* we have to clear all information in the skb that could
2168	* impact namespace isolation.
2169	*/
2170	int dev_forward_skb(struct net_device dev, struct* sk_buff *skb)
2171	{
2172	return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
2173	}
2174	EXPORT_SYMBOL_GPL(dev_forward_skb);
2175
2176	int dev_forward_skb_nomtu(struct net_device dev, struct* sk_buff *skb)
2177	{
2178	return __dev_forward_skb2(dev, skb, check_mtu: false) ?: netif_rx_internal(skb);
2179	}
2180
2181	static inline int deliver_skb(struct sk_buff *skb,
2182	struct packet_type *pt_prev,
2183	struct net_device *orig_dev)
2184	{
2185	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
2186	return -ENOMEM;
2187	refcount_inc(r: &skb->users);
2188	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2189	}
2190
2191	static inline void deliver_ptype_list_skb(struct sk_buff *skb,
2192	struct packet_type **pt,
2193	struct net_device *orig_dev,
2194	__be16 type,
2195	struct list_head *ptype_list)
2196	{
2197	struct packet_type ptype, pt_prev = *pt;
2198
2199	list_for_each_entry_rcu(ptype, ptype_list, list) {
2200	if (ptype->type != type)
2201	continue;
2202	if (pt_prev)
2203	deliver_skb(skb, pt_prev, orig_dev);
2204	pt_prev = ptype;
2205	}
2206	*pt = pt_prev;
2207	}
2208
2209	static inline bool skb_loop_sk(struct packet_type ptype, struct* sk_buff *skb)
2210	{
2211	if (!ptype->af_packet_priv \|\| !skb->sk)
2212	return false;
2213
2214	if (ptype->id_match)
2215	return ptype->id_match(ptype, skb->sk);
2216	else if ((struct sock *)ptype->af_packet_priv == skb->sk)
2217	return true;
2218
2219	return false;
2220	}
2221
2222	/**
2223	* dev_nit_active - return true if any network interface taps are in use
2224	*
2225	* @dev: network device to check for the presence of taps
2226	*/
2227	bool dev_nit_active(struct net_device *dev)
2228	{
2229	return !list_empty(head: &net_hotdata.ptype_all) \|\|
2230	!list_empty(head: &dev->ptype_all);
2231	}
2232	EXPORT_SYMBOL_GPL(dev_nit_active);
2233
2234	/*
2235	* Support routine. Sends outgoing frames to any network
2236	* taps currently in use.
2237	*/
2238
2239	void dev_queue_xmit_nit(struct sk_buff skb, struct* net_device *dev)
2240	{
2241	struct list_head *ptype_list = &net_hotdata.ptype_all;
2242	struct packet_type ptype, pt_prev = NULL;
2243	struct sk_buff *skb2 = NULL;
2244
2245	rcu_read_lock();
2246	again:
2247	list_for_each_entry_rcu(ptype, ptype_list, list) {
2248	if (READ_ONCE(ptype->ignore_outgoing))
2249	continue;
2250
2251	/ Never send packets back to the socket*
2252	* they originated from - MvS (miquels@drinkel.ow.org)
2253	*/
2254	if (skb_loop_sk(ptype, skb))
2255	continue;
2256
2257	if (pt_prev) {
2258	deliver_skb(skb: skb2, pt_prev, orig_dev: skb->dev);
2259	pt_prev = ptype;
2260	continue;
2261	}
2262
2263	/ need to clone skb, done only once /
2264	skb2 = skb_clone(skb, GFP_ATOMIC);
2265	if (!skb2)
2266	goto out_unlock;
2267
2268	net_timestamp_set(skb: skb2);
2269
2270	/ skb->nh should be correctly*
2271	* set by sender, so that the second statement is
2272	* just protection against buggy protocols.
2273	*/
2274	skb_reset_mac_header(skb: skb2);
2275
2276	if (skb_network_header(skb: skb2) < skb2->data \|\|
2277	skb_network_header(skb: skb2) > skb_tail_pointer(skb: skb2)) {
2278	net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
2279	ntohs(skb2->protocol),
2280	dev->name);
2281	skb_reset_network_header(skb: skb2);
2282	}
2283
2284	skb2->transport_header = skb2->network_header;
2285	skb2->pkt_type = PACKET_OUTGOING;
2286	pt_prev = ptype;
2287	}
2288
2289	if (ptype_list == &net_hotdata.ptype_all) {
2290	ptype_list = &dev->ptype_all;
2291	goto again;
2292	}
2293	out_unlock:
2294	if (pt_prev) {
2295	if (!skb_orphan_frags_rx(skb: skb2, GFP_ATOMIC))
2296	pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
2297	else
2298	kfree_skb(skb: skb2);
2299	}
2300	rcu_read_unlock();
2301	}
2302	EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
2303
2304	/**
2305	* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
2306	* @dev: Network device
2307	* @txq: number of queues available
2308	*
2309	* If real_num_tx_queues is changed the tc mappings may no longer be
2310	* valid. To resolve this verify the tc mapping remains valid and if
2311	* not NULL the mapping. With no priorities mapping to this
2312	* offset/count pair it will no longer be used. In the worst case TC0
2313	* is invalid nothing can be done so disable priority mappings. If is
2314	* expected that drivers will fix this mapping if they can before
2315	* calling netif_set_real_num_tx_queues.
2316	*/
2317	static void netif_setup_tc(struct net_device dev, unsigned* int txq)
2318	{
2319	int i;
2320	struct netdev_tc_txq *tc = &dev->tc_to_txq[`0`];
2321
2322	/ If TC0 is invalidated disable TC mapping /
2323	if (tc->offset + tc->count > txq) {
2324	netdev_warn(dev, format: "Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
2325	dev->num_tc = `0`;
2326	return;
2327	}
2328
2329	/ Invalidated prio to tc mappings set to TC0 /
2330	for (i = `1`; i < TC_BITMASK + `1`; i++) {
2331	int q = netdev_get_prio_tc_map(dev, prio: i);
2332
2333	tc = &dev->tc_to_txq[q];
2334	if (tc->offset + tc->count > txq) {
2335	netdev_warn(dev, format: "Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
2336	i, q);
2337	netdev_set_prio_tc_map(dev, prio: i, tc: `0`);
2338	}
2339	}
2340	}
2341
2342	int netdev_txq_to_tc(struct net_device dev, unsigned* int txq)
2343	{
2344	if (dev->num_tc) {
2345	struct netdev_tc_txq *tc = &dev->tc_to_txq[`0`];
2346	int i;
2347
2348	/ walk through the TCs and see if it falls into any of them /
2349	for (i = `0`; i < TC_MAX_QUEUE; i++, tc++) {
2350	if ((txq - tc->offset) < tc->count)
2351	return i;
2352	}
2353
2354	/ didn't find it, just return -1 to indicate no match /
2355	return -`1`;
2356	}
2357
2358	return `0`;
2359	}
2360	EXPORT_SYMBOL(netdev_txq_to_tc);
2361
2362	#ifdef CONFIG_XPS
2363	static struct static_key xps_needed __read_mostly;
2364	static struct static_key xps_rxqs_needed __read_mostly;
2365	static DEFINE_MUTEX(xps_map_mutex);
2366	#define xmap_dereference(P) \
2367	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
2368
2369	static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
2370	struct xps_dev_maps old_maps, int* tci, u16 index)
2371	{
2372	struct xps_map *map = NULL;
2373	int pos;
2374
2375	map = xmap_dereference(dev_maps->attr_map[tci]);
2376	if (!map)
2377	return false;
2378
2379	for (pos = map->len; pos--;) {
2380	if (map->queues[pos] != index)
2381	continue;
2382
2383	if (map->len > `1`) {
2384	map->queues[pos] = map->queues[--map->len];
2385	break;
2386	}
2387
2388	if (old_maps)
2389	RCU_INIT_POINTER(old_maps->attr_map[tci], NULL);
2390	RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
2391	kfree_rcu(map, rcu);
2392	return false;
2393	}
2394
2395	return true;
2396	}
2397
2398	static bool remove_xps_queue_cpu(struct net_device *dev,
2399	struct xps_dev_maps *dev_maps,
2400	int cpu, u16 offset, u16 count)
2401	{
2402	int num_tc = dev_maps->num_tc;
2403	bool active = false;
2404	int tci;
2405
2406	for (tci = cpu * num_tc; num_tc--; tci++) {
2407	int i, j;
2408
2409	for (i = count, j = offset; i--; j++) {
2410	if (!remove_xps_queue(dev_maps, NULL, tci, index: j))
2411	break;
2412	}
2413
2414	active \|= i < `0`;
2415	}
2416
2417	return active;
2418	}
2419
2420	static void reset_xps_maps(struct net_device *dev,
2421	struct xps_dev_maps *dev_maps,
2422	enum xps_map_type type)
2423	{
2424	static_key_slow_dec_cpuslocked(key: &xps_needed);
2425	if (type == XPS_RXQS)
2426	static_key_slow_dec_cpuslocked(key: &xps_rxqs_needed);
2427
2428	RCU_INIT_POINTER(dev->xps_maps[type], NULL);
2429
2430	kfree_rcu(dev_maps, rcu);
2431	}
2432
2433	static void clean_xps_maps(struct net_device dev, enum* xps_map_type type,
2434	u16 offset, u16 count)
2435	{
2436	struct xps_dev_maps *dev_maps;
2437	bool active = false;
2438	int i, j;
2439
2440	dev_maps = xmap_dereference(dev->xps_maps[type]);
2441	if (!dev_maps)
2442	return;
2443
2444	for (j = `0`; j < dev_maps->nr_ids; j++)
2445	active \|= remove_xps_queue_cpu(dev, dev_maps, cpu: j, offset, count);
2446	if (!active)
2447	reset_xps_maps(dev, dev_maps, type);
2448
2449	if (type == XPS_CPUS) {
2450	for (i = offset + (count - `1`); count--; i--)
2451	netdev_queue_numa_node_write(
2452	q: netdev_get_tx_queue(dev, index: i), NUMA_NO_NODE);
2453	}
2454	}
2455
2456	static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
2457	u16 count)
2458	{
2459	if (!static_key_false(key: &xps_needed))
2460	return;
2461
2462	cpus_read_lock();
2463	mutex_lock(&xps_map_mutex);
2464
2465	if (static_key_false(key: &xps_rxqs_needed))
2466	clean_xps_maps(dev, type: XPS_RXQS, offset, count);
2467
2468	clean_xps_maps(dev, type: XPS_CPUS, offset, count);
2469
2470	mutex_unlock(lock: &xps_map_mutex);
2471	cpus_read_unlock();
2472	}
2473
2474	static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
2475	{
2476	netif_reset_xps_queues(dev, offset: index, count: dev->num_tx_queues - index);
2477	}
2478
2479	static struct xps_map expand_xps_map(struct* xps_map map, int* attr_index,
2480	u16 index, bool is_rxqs_map)
2481	{
2482	struct xps_map *new_map;
2483	int alloc_len = XPS_MIN_MAP_ALLOC;
2484	int i, pos;
2485
2486	for (pos = `0`; map && pos < map->len; pos++) {
2487	if (map->queues[pos] != index)
2488	continue;
2489	return map;
2490	}
2491
2492	/ Need to add tx-queue to this CPU's/rx-queue's existing map /
2493	if (map) {
2494	if (pos < map->alloc_len)
2495	return map;
2496
2497	alloc_len = map->alloc_len * `2`;
2498	}
2499
2500	/ Need to allocate new map to store tx-queue on this CPU's/rx-queue's*
2501	* map
2502	*/
2503	if (is_rxqs_map)
2504	new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
2505	else
2506	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
2507	cpu_to_node(cpu: attr_index));
2508	if (!new_map)
2509	return NULL;
2510
2511	for (i = `0`; i < pos; i++)
2512	new_map->queues[i] = map->queues[i];
2513	new_map->alloc_len = alloc_len;
2514	new_map->len = pos;
2515
2516	return new_map;
2517	}
2518
2519	/ Copy xps maps at a given index /
2520	static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
2521	struct xps_dev_maps new_dev_maps, int* index,
2522	int tc, bool skip_tc)
2523	{
2524	int i, tci = index * dev_maps->num_tc;
2525	struct xps_map *map;
2526
2527	/ copy maps belonging to foreign traffic classes /
2528	for (i = `0`; i < dev_maps->num_tc; i++, tci++) {
2529	if (i == tc && skip_tc)
2530	continue;
2531
2532	/ fill in the new device map from the old device map /
2533	map = xmap_dereference(dev_maps->attr_map[tci]);
2534	RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2535	}
2536	}
2537
2538	/ Must be called under cpus_read_lock /
2539	int __netif_set_xps_queue(struct net_device dev, const* unsigned long *mask,
2540	u16 index, enum xps_map_type type)
2541	{
2542	struct xps_dev_maps dev_maps, new_dev_maps = NULL, *old_dev_maps = NULL;
2543	const unsigned long *online_mask = NULL;
2544	bool active = false, copy = false;
2545	int i, j, tci, numa_node_id = -`2`;
2546	int maps_sz, num_tc = `1`, tc = `0`;
2547	struct xps_map map, new_map;
2548	unsigned int nr_ids;
2549
2550	WARN_ON_ONCE(index >= dev->num_tx_queues);
2551
2552	if (dev->num_tc) {
2553	/ Do not allow XPS on subordinate device directly /
2554	num_tc = dev->num_tc;
2555	if (num_tc < `0`)
2556	return -EINVAL;
2557
2558	/ If queue belongs to subordinate dev use its map /
2559	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
2560
2561	tc = netdev_txq_to_tc(dev, index);
2562	if (tc < `0`)
2563	return -EINVAL;
2564	}
2565
2566	mutex_lock(&xps_map_mutex);
2567
2568	dev_maps = xmap_dereference(dev->xps_maps[type]);
2569	if (type == XPS_RXQS) {
2570	maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
2571	nr_ids = dev->num_rx_queues;
2572	} else {
2573	maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
2574	if (num_possible_cpus() > `1`)
2575	online_mask = cpumask_bits(cpu_online_mask);
2576	nr_ids = nr_cpu_ids;
2577	}
2578
2579	if (maps_sz < L1_CACHE_BYTES)
2580	maps_sz = L1_CACHE_BYTES;
2581
2582	/ The old dev_maps could be larger or smaller than the one we're*
2583	* setting up now, as dev->num_tc or nr_ids could have been updated in
2584	* between. We could try to be smart, but let's be safe instead and only
2585	* copy foreign traffic classes if the two map sizes match.
2586	*/
2587	if (dev_maps &&
2588	dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids)
2589	copy = true;
2590
2591	/ allocate memory for queue storage /
2592	for (j = -`1`; j = netif_attrmask_next_and(n: j, src1p: online_mask, src2p: mask, nr_bits: nr_ids),
2593	j < nr_ids;) {
2594	if (!new_dev_maps) {
2595	new_dev_maps = kzalloc(size: maps_sz, GFP_KERNEL);
2596	if (!new_dev_maps) {
2597	mutex_unlock(lock: &xps_map_mutex);
2598	return -ENOMEM;
2599	}
2600
2601	new_dev_maps->nr_ids = nr_ids;
2602	new_dev_maps->num_tc = num_tc;
2603	}
2604
2605	tci = j * num_tc + tc;
2606	map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
2607
2608	map = expand_xps_map(map, attr_index: j, index, is_rxqs_map: type == XPS_RXQS);
2609	if (!map)
2610	goto error;
2611
2612	RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2613	}
2614
2615	if (!new_dev_maps)
2616	goto out_no_new_maps;
2617
2618	if (!dev_maps) {
2619	/ Increment static keys at most once per type /
2620	static_key_slow_inc_cpuslocked(key: &xps_needed);
2621	if (type == XPS_RXQS)
2622	static_key_slow_inc_cpuslocked(key: &xps_rxqs_needed);
2623	}
2624
2625	for (j = `0`; j < nr_ids; j++) {
2626	bool skip_tc = false;
2627
2628	tci = j * num_tc + tc;
2629	if (netif_attr_test_mask(j, mask, nr_bits: nr_ids) &&
2630	netif_attr_test_online(j, online_mask, nr_bits: nr_ids)) {
2631	/ add tx-queue to CPU/rx-queue maps /
2632	int pos = `0`;
2633
2634	skip_tc = true;
2635
2636	map = xmap_dereference(new_dev_maps->attr_map[tci]);
2637	while ((pos < map->len) && (map->queues[pos] != index))
2638	pos++;
2639
2640	if (pos == map->len)
2641	map->queues[map->len++] = index;
2642	#ifdef CONFIG_NUMA
2643	if (type == XPS_CPUS) {
2644	if (numa_node_id == -`2`)
2645	numa_node_id = cpu_to_node(cpu: j);
2646	else if (numa_node_id != cpu_to_node(cpu: j))
2647	numa_node_id = -`1`;
2648	}
2649	#endif
2650	}
2651
2652	if (copy)
2653	xps_copy_dev_maps(dev_maps, new_dev_maps, index: j, tc,
2654	skip_tc);
2655	}
2656
2657	rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
2658
2659	/ Cleanup old maps /
2660	if (!dev_maps)
2661	goto out_no_old_maps;
2662
2663	for (j = `0`; j < dev_maps->nr_ids; j++) {
2664	for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
2665	map = xmap_dereference(dev_maps->attr_map[tci]);
2666	if (!map)
2667	continue;
2668
2669	if (copy) {
2670	new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
2671	if (map == new_map)
2672	continue;
2673	}
2674
2675	RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
2676	kfree_rcu(map, rcu);
2677	}
2678	}
2679
2680	old_dev_maps = dev_maps;
2681
2682	out_no_old_maps:
2683	dev_maps = new_dev_maps;
2684	active = true;
2685
2686	out_no_new_maps:
2687	if (type == XPS_CPUS)
2688	/ update Tx queue numa node /
2689	netdev_queue_numa_node_write(q: netdev_get_tx_queue(dev, index),
2690	node: (numa_node_id >= `0`) ?
2691	numa_node_id : NUMA_NO_NODE);
2692
2693	if (!dev_maps)
2694	goto out_no_maps;
2695
2696	/ removes tx-queue from unused CPUs/rx-queues /
2697	for (j = `0`; j < dev_maps->nr_ids; j++) {
2698	tci = j * dev_maps->num_tc;
2699
2700	for (i = `0`; i < dev_maps->num_tc; i++, tci++) {
2701	if (i == tc &&
2702	netif_attr_test_mask(j, mask, nr_bits: dev_maps->nr_ids) &&
2703	netif_attr_test_online(j, online_mask, nr_bits: dev_maps->nr_ids))
2704	continue;
2705
2706	active \|= remove_xps_queue(dev_maps,
2707	old_maps: copy ? old_dev_maps : NULL,
2708	tci, index);
2709	}
2710	}
2711
2712	if (old_dev_maps)
2713	kfree_rcu(old_dev_maps, rcu);
2714
2715	/ free map if not active /
2716	if (!active)
2717	reset_xps_maps(dev, dev_maps, type);
2718
2719	out_no_maps:
2720	mutex_unlock(lock: &xps_map_mutex);
2721
2722	return `0`;
2723	error:
2724	/ remove any maps that we added /
2725	for (j = `0`; j < nr_ids; j++) {
2726	for (i = num_tc, tci = j * num_tc; i--; tci++) {
2727	new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
2728	map = copy ?
2729	xmap_dereference(dev_maps->attr_map[tci]) :
2730	NULL;
2731	if (new_map && new_map != map)
2732	kfree(objp: new_map);
2733	}
2734	}
2735
2736	mutex_unlock(lock: &xps_map_mutex);
2737
2738	kfree(objp: new_dev_maps);
2739	return -ENOMEM;
2740	}
2741	EXPORT_SYMBOL_GPL(__netif_set_xps_queue);
2742
2743	int netif_set_xps_queue(struct net_device dev, const* struct cpumask *mask,
2744	u16 index)
2745	{
2746	int ret;
2747
2748	cpus_read_lock();
2749	ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS);
2750	cpus_read_unlock();
2751
2752	return ret;
2753	}
2754	EXPORT_SYMBOL(netif_set_xps_queue);
2755
2756	#endif
2757	static void netdev_unbind_all_sb_channels(struct net_device *dev)
2758	{
2759	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
2760
2761	/ Unbind any subordinate channels /
2762	while (txq-- != &dev->_tx[`0`]) {
2763	if (txq->sb_dev)
2764	netdev_unbind_sb_channel(dev, sb_dev: txq->sb_dev);
2765	}
2766	}
2767
2768	void netdev_reset_tc(struct net_device *dev)
2769	{
2770	#ifdef CONFIG_XPS
2771	netif_reset_xps_queues_gt(dev, index: `0`);
2772	#endif
2773	netdev_unbind_all_sb_channels(dev);
2774
2775	/ Reset TC configuration of device /
2776	dev->num_tc = `0`;
2777	memset(dev->tc_to_txq, `0`, sizeof(dev->tc_to_txq));
2778	memset(dev->prio_tc_map, `0`, sizeof(dev->prio_tc_map));
2779	}
2780	EXPORT_SYMBOL(netdev_reset_tc);
2781
2782	int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
2783	{
2784	if (tc >= dev->num_tc)
2785	return -EINVAL;
2786
2787	#ifdef CONFIG_XPS
2788	netif_reset_xps_queues(dev, offset, count);
2789	#endif
2790	dev->tc_to_txq[tc].count = count;
2791	dev->tc_to_txq[tc].offset = offset;
2792	return `0`;
2793	}
2794	EXPORT_SYMBOL(netdev_set_tc_queue);
2795
2796	int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
2797	{
2798	if (num_tc > TC_MAX_QUEUE)
2799	return -EINVAL;
2800
2801	#ifdef CONFIG_XPS
2802	netif_reset_xps_queues_gt(dev, index: `0`);
2803	#endif
2804	netdev_unbind_all_sb_channels(dev);
2805
2806	dev->num_tc = num_tc;
2807	return `0`;
2808	}
2809	EXPORT_SYMBOL(netdev_set_num_tc);
2810
2811	void netdev_unbind_sb_channel(struct net_device *dev,
2812	struct net_device *sb_dev)
2813	{
2814	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
2815
2816	#ifdef CONFIG_XPS
2817	netif_reset_xps_queues_gt(dev: sb_dev, index: `0`);
2818	#endif
2819	memset(sb_dev->tc_to_txq, `0`, sizeof(sb_dev->tc_to_txq));
2820	memset(sb_dev->prio_tc_map, `0`, sizeof(sb_dev->prio_tc_map));
2821
2822	while (txq-- != &dev->_tx[`0`]) {
2823	if (txq->sb_dev == sb_dev)
2824	txq->sb_dev = NULL;
2825	}
2826	}
2827	EXPORT_SYMBOL(netdev_unbind_sb_channel);
2828
2829	int netdev_bind_sb_channel_queue(struct net_device *dev,
2830	struct net_device *sb_dev,
2831	u8 tc, u16 count, u16 offset)
2832	{
2833	/ Make certain the sb_dev and dev are already configured /
2834	if (sb_dev->num_tc >= `0` \|\| tc >= dev->num_tc)
2835	return -EINVAL;
2836
2837	/ We cannot hand out queues we don't have /
2838	if ((offset + count) > dev->real_num_tx_queues)
2839	return -EINVAL;
2840
2841	/ Record the mapping /
2842	sb_dev->tc_to_txq[tc].count = count;
2843	sb_dev->tc_to_txq[tc].offset = offset;
2844
2845	/ Provide a way for Tx queue to find the tc_to_txq map or*
2846	* XPS map for itself.
2847	*/
2848	while (count--)
2849	netdev_get_tx_queue(dev, index: count + offset)->sb_dev = sb_dev;
2850
2851	return `0`;
2852	}
2853	EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
2854
2855	int netdev_set_sb_channel(struct net_device *dev, u16 channel)
2856	{
2857	/ Do not use a multiqueue device to represent a subordinate channel /
2858	if (netif_is_multiqueue(dev))
2859	return -ENODEV;
2860
2861	/ We allow channels 1 - 32767 to be used for subordinate channels.*
2862	* Channel 0 is meant to be "native" mode and used only to represent
2863	* the main root device. We allow writing 0 to reset the device back
2864	* to normal mode after being used as a subordinate channel.
2865	*/
2866	if (channel > S16_MAX)
2867	return -EINVAL;
2868
2869	dev->num_tc = -channel;
2870
2871	return `0`;
2872	}
2873	EXPORT_SYMBOL(netdev_set_sb_channel);
2874
2875	/*
2876	* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
2877	* greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
2878	*/
2879	int netif_set_real_num_tx_queues(struct net_device dev, unsigned* int txq)
2880	{
2881	bool disabling;
2882	int rc;
2883
2884	disabling = txq < dev->real_num_tx_queues;
2885
2886	if (txq < `1` \|\| txq > dev->num_tx_queues)
2887	return -EINVAL;
2888
2889	if (dev->reg_state == NETREG_REGISTERED \|\|
2890	dev->reg_state == NETREG_UNREGISTERING) {
2891	ASSERT_RTNL();
2892
2893	rc = netdev_queue_update_kobjects(net: dev, old_num: dev->real_num_tx_queues,
2894	new_num: txq);
2895	if (rc)
2896	return rc;
2897
2898	if (dev->num_tc)
2899	netif_setup_tc(dev, txq);
2900
2901	dev_qdisc_change_real_num_tx(dev, new_real_tx: txq);
2902
2903	dev->real_num_tx_queues = txq;
2904
2905	if (disabling) {
2906	synchronize_net();
2907	qdisc_reset_all_tx_gt(dev, i: txq);
2908	#ifdef CONFIG_XPS
2909	netif_reset_xps_queues_gt(dev, index: txq);
2910	#endif
2911	}
2912	} else {
2913	dev->real_num_tx_queues = txq;
2914	}
2915
2916	return `0`;
2917	}
2918	EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2919
2920	#ifdef CONFIG_SYSFS
2921	/**
2922	* netif_set_real_num_rx_queues - set actual number of RX queues used
2923	* @dev: Network device
2924	* @rxq: Actual number of RX queues
2925	*
2926	* This must be called either with the rtnl_lock held or before
2927	* registration of the net device. Returns 0 on success, or a
2928	* negative error code. If called before registration, it always
2929	* succeeds.
2930	*/
2931	int netif_set_real_num_rx_queues(struct net_device dev, unsigned* int rxq)
2932	{
2933	int rc;
2934
2935	if (rxq < `1` \|\| rxq > dev->num_rx_queues)
2936	return -EINVAL;
2937
2938	if (dev->reg_state == NETREG_REGISTERED) {
2939	ASSERT_RTNL();
2940
2941	rc = net_rx_queue_update_kobjects(dev, old_num: dev->real_num_rx_queues,
2942	new_num: rxq);
2943	if (rc)
2944	return rc;
2945	}
2946
2947	dev->real_num_rx_queues = rxq;
2948	return `0`;
2949	}
2950	EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2951	#endif
2952
2953	/**
2954	* netif_set_real_num_queues - set actual number of RX and TX queues used
2955	* @dev: Network device
2956	* @txq: Actual number of TX queues
2957	* @rxq: Actual number of RX queues
2958	*
2959	* Set the real number of both TX and RX queues.
2960	* Does nothing if the number of queues is already correct.
2961	*/
2962	int netif_set_real_num_queues(struct net_device *dev,
2963	unsigned int txq, unsigned int rxq)
2964	{
2965	unsigned int old_rxq = dev->real_num_rx_queues;
2966	int err;
2967
2968	if (txq < `1` \|\| txq > dev->num_tx_queues \|\|
2969	rxq < `1` \|\| rxq > dev->num_rx_queues)
2970	return -EINVAL;
2971
2972	/ Start from increases, so the error path only does decreases -*
2973	* decreases can't fail.
2974	*/
2975	if (rxq > dev->real_num_rx_queues) {
2976	err = netif_set_real_num_rx_queues(dev, rxq);
2977	if (err)
2978	return err;
2979	}
2980	if (txq > dev->real_num_tx_queues) {
2981	err = netif_set_real_num_tx_queues(dev, txq);
2982	if (err)
2983	goto undo_rx;
2984	}
2985	if (rxq < dev->real_num_rx_queues)
2986	WARN_ON(netif_set_real_num_rx_queues(dev, rxq));
2987	if (txq < dev->real_num_tx_queues)
2988	WARN_ON(netif_set_real_num_tx_queues(dev, txq));
2989
2990	return `0`;
2991	undo_rx:
2992	WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq));
2993	return err;
2994	}
2995	EXPORT_SYMBOL(netif_set_real_num_queues);
2996
2997	/**
2998	* netif_set_tso_max_size() - set the max size of TSO frames supported
2999	* @dev: netdev to update
3000	* @size: max skb->len of a TSO frame
3001	*
3002	* Set the limit on the size of TSO super-frames the device can handle.
3003	* Unless explicitly set the stack will assume the value of
3004	* %GSO_LEGACY_MAX_SIZE.
3005	*/
3006	void netif_set_tso_max_size(struct net_device dev, unsigned* int size)
3007	{
3008	dev->tso_max_size = min(GSO_MAX_SIZE, size);
3009	if (size < READ_ONCE(dev->gso_max_size))
3010	netif_set_gso_max_size(dev, size);
3011	if (size < READ_ONCE(dev->gso_ipv4_max_size))
3012	netif_set_gso_ipv4_max_size(dev, size);
3013	}
3014	EXPORT_SYMBOL(netif_set_tso_max_size);
3015
3016	/**
3017	* netif_set_tso_max_segs() - set the max number of segs supported for TSO
3018	* @dev: netdev to update
3019	* @segs: max number of TCP segments
3020	*
3021	* Set the limit on the number of TCP segments the device can generate from
3022	* a single TSO super-frame.
3023	* Unless explicitly set the stack will assume the value of %GSO_MAX_SEGS.
3024	*/
3025	void netif_set_tso_max_segs(struct net_device dev, unsigned* int segs)
3026	{
3027	dev->tso_max_segs = segs;
3028	if (segs < READ_ONCE(dev->gso_max_segs))
3029	netif_set_gso_max_segs(dev, segs);
3030	}
3031	EXPORT_SYMBOL(netif_set_tso_max_segs);
3032
3033	/**
3034	* netif_inherit_tso_max() - copy all TSO limits from a lower device to an upper
3035	* @to: netdev to update
3036	* @from: netdev from which to copy the limits
3037	*/
3038	void netif_inherit_tso_max(struct net_device to, const* struct net_device *from)
3039	{
3040	netif_set_tso_max_size(to, from->tso_max_size);
3041	netif_set_tso_max_segs(to, from->tso_max_segs);
3042	}
3043	EXPORT_SYMBOL(netif_inherit_tso_max);
3044
3045	/**
3046	* netif_get_num_default_rss_queues - default number of RSS queues
3047	*
3048	* Default value is the number of physical cores if there are only 1 or 2, or
3049	* divided by 2 if there are more.
3050	*/
3051	int netif_get_num_default_rss_queues(void)
3052	{
3053	cpumask_var_t cpus;
3054	int cpu, count = `0`;
3055
3056	if (unlikely(is_kdump_kernel() \|\| !zalloc_cpumask_var(&cpus, GFP_KERNEL)))
3057	return `1`;
3058
3059	cpumask_copy(dstp: cpus, cpu_online_mask);
3060	for_each_cpu(cpu, cpus) {
3061	++count;
3062	cpumask_andnot(dstp: cpus, src1p: cpus, topology_sibling_cpumask(cpu));
3063	}
3064	free_cpumask_var(mask: cpus);
3065
3066	return count > `2` ? DIV_ROUND_UP(count, `2`) : count;
3067	}
3068	EXPORT_SYMBOL(netif_get_num_default_rss_queues);
3069
3070	static void __netif_reschedule(struct Qdisc *q)
3071	{
3072	struct softnet_data *sd;
3073	unsigned long flags;
3074
3075	local_irq_save(flags);
3076	sd = this_cpu_ptr(&softnet_data);
3077	q->next_sched = NULL;
3078	*sd->output_queue_tailp = q;
3079	sd->output_queue_tailp = &q->next_sched;
3080	raise_softirq_irqoff(nr: NET_TX_SOFTIRQ);
3081	local_irq_restore(flags);
3082	}
3083
3084	void __netif_schedule(struct Qdisc *q)
3085	{
3086	if (!test_and_set_bit(nr: __QDISC_STATE_SCHED, addr: &q->state))
3087	__netif_reschedule(q);
3088	}
3089	EXPORT_SYMBOL(__netif_schedule);
3090
3091	struct dev_kfree_skb_cb {
3092	enum skb_drop_reason reason;
3093	};
3094
3095	static struct dev_kfree_skb_cb get_kfree_skb_cb(const* struct sk_buff *skb)
3096	{
3097	return (struct dev_kfree_skb_cb *)skb->cb;
3098	}
3099
3100	void netif_schedule_queue(struct netdev_queue *txq)
3101	{
3102	rcu_read_lock();
3103	if (!netif_xmit_stopped(dev_queue: txq)) {
3104	struct Qdisc *q = rcu_dereference(txq->qdisc);
3105
3106	__netif_schedule(q);
3107	}
3108	rcu_read_unlock();
3109	}
3110	EXPORT_SYMBOL(netif_schedule_queue);
3111
3112	void netif_tx_wake_queue(struct netdev_queue *dev_queue)
3113	{
3114	if (test_and_clear_bit(nr: __QUEUE_STATE_DRV_XOFF, addr: &dev_queue->state)) {
3115	struct Qdisc *q;
3116
3117	rcu_read_lock();
3118	q = rcu_dereference(dev_queue->qdisc);
3119	__netif_schedule(q);
3120	rcu_read_unlock();
3121	}
3122	}
3123	EXPORT_SYMBOL(netif_tx_wake_queue);
3124
3125	void dev_kfree_skb_irq_reason(struct sk_buff skb, enum* skb_drop_reason reason)
3126	{
3127	unsigned long flags;
3128
3129	if (unlikely(!skb))
3130	return;
3131
3132	if (likely(refcount_read(&skb->users) == `1`)) {
3133	smp_rmb();
3134	refcount_set(r: &skb->users, n: `0`);
3135	} else if (likely(!refcount_dec_and_test(&skb->users))) {
3136	return;
3137	}
3138	get_kfree_skb_cb(skb)->reason = reason;
3139	local_irq_save(flags);
3140	skb->next = __this_cpu_read(softnet_data.completion_queue);
3141	__this_cpu_write(softnet_data.completion_queue, skb);
3142	raise_softirq_irqoff(nr: NET_TX_SOFTIRQ);
3143	local_irq_restore(flags);
3144	}
3145	EXPORT_SYMBOL(dev_kfree_skb_irq_reason);
3146
3147	void dev_kfree_skb_any_reason(struct sk_buff skb, enum* skb_drop_reason reason)
3148	{
3149	if (in_hardirq() \|\| irqs_disabled())
3150	dev_kfree_skb_irq_reason(skb, reason);
3151	else
3152	kfree_skb_reason(skb, reason);
3153	}
3154	EXPORT_SYMBOL(dev_kfree_skb_any_reason);
3155
3156
3157	/**
3158	* netif_device_detach - mark device as removed
3159	* @dev: network device
3160	*
3161	* Mark device as removed from system and therefore no longer available.
3162	*/
3163	void netif_device_detach(struct net_device *dev)
3164	{
3165	if (test_and_clear_bit(nr: __LINK_STATE_PRESENT, addr: &dev->state) &&
3166	netif_running(dev)) {
3167	netif_tx_stop_all_queues(dev);
3168	}
3169	}
3170	EXPORT_SYMBOL(netif_device_detach);
3171
3172	/**
3173	* netif_device_attach - mark device as attached
3174	* @dev: network device
3175	*
3176	* Mark device as attached from system and restart if needed.
3177	*/
3178	void netif_device_attach(struct net_device *dev)
3179	{
3180	if (!test_and_set_bit(nr: __LINK_STATE_PRESENT, addr: &dev->state) &&
3181	netif_running(dev)) {
3182	netif_tx_wake_all_queues(dev);
3183	__netdev_watchdog_up(dev);
3184	}
3185	}
3186	EXPORT_SYMBOL(netif_device_attach);
3187
3188	/*
3189	* Returns a Tx hash based on the given packet descriptor a Tx queues' number
3190	* to be used as a distribution range.
3191	*/
3192	static u16 skb_tx_hash(const struct net_device *dev,
3193	const struct net_device *sb_dev,
3194	struct sk_buff *skb)
3195	{
3196	u32 hash;
3197	u16 qoffset = `0`;
3198	u16 qcount = dev->real_num_tx_queues;
3199
3200	if (dev->num_tc) {
3201	u8 tc = netdev_get_prio_tc_map(dev, prio: skb->priority);
3202
3203	qoffset = sb_dev->tc_to_txq[tc].offset;
3204	qcount = sb_dev->tc_to_txq[tc].count;
3205	if (unlikely(!qcount)) {
3206	net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n",
3207	sb_dev->name, qoffset, tc);
3208	qoffset = `0`;
3209	qcount = dev->real_num_tx_queues;
3210	}
3211	}
3212
3213	if (skb_rx_queue_recorded(skb)) {
3214	DEBUG_NET_WARN_ON_ONCE(qcount == `0`);
3215	hash = skb_get_rx_queue(skb);
3216	if (hash >= qoffset)
3217	hash -= qoffset;
3218	while (unlikely(hash >= qcount))
3219	hash -= qcount;
3220	return hash + qoffset;
3221	}
3222
3223	return (u16) reciprocal_scale(val: skb_get_hash(skb), ep_ro: qcount) + qoffset;
3224	}
3225
3226	void skb_warn_bad_offload(const struct sk_buff *skb)
3227	{
3228	static const netdev_features_t null_features;
3229	struct net_device *dev = skb->dev;
3230	const char *name = "";
3231
3232	if (!net_ratelimit())
3233	return;
3234
3235	if (dev) {
3236	if (dev->dev.parent)
3237	name = dev_driver_string(dev: dev->dev.parent);
3238	else
3239	name = netdev_name(dev);
3240	}
3241	skb_dump(KERN_WARNING, skb, full_pkt: false);
3242	WARN(`1`, "%s: caps=(%pNF, %pNF)\n",
3243	name, dev ? &dev->features : &null_features,
3244	skb->sk ? &skb->sk->sk_route_caps : &null_features);
3245	}
3246
3247	/*
3248	* Invalidate hardware checksum when packet is to be mangled, and
3249	* complete checksum manually on outgoing path.
3250	*/
3251	int skb_checksum_help(struct sk_buff *skb)
3252	{
3253	__wsum csum;
3254	int ret = `0`, offset;
3255
3256	if (skb->ip_summed == CHECKSUM_COMPLETE)
3257	goto out_set_summed;
3258
3259	if (unlikely(skb_is_gso(skb))) {
3260	skb_warn_bad_offload(skb);
3261	return -EINVAL;
3262	}
3263
3264	/ Before computing a checksum, we should make sure no frag could*
3265	* be modified by an external entity : checksum could be wrong.
3266	*/
3267	if (skb_has_shared_frag(skb)) {
3268	ret = __skb_linearize(skb);
3269	if (ret)
3270	goto out;
3271	}
3272
3273	offset = skb_checksum_start_offset(skb);
3274	ret = -EINVAL;
3275	if (unlikely(offset >= skb_headlen(skb))) {
3276	DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
3277	WARN_ONCE(true, "offset (%d) >= skb_headlen() (%u)\n",
3278	offset, skb_headlen(skb));
3279	goto out;
3280	}
3281	csum = skb_checksum(skb, offset, len: skb->len - offset, csum: `0`);
3282
3283	offset += skb->csum_offset;
3284	if (unlikely(offset + sizeof(__sum16) > skb_headlen(skb))) {
3285	DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
3286	WARN_ONCE(true, "offset+2 (%zu) > skb_headlen() (%u)\n",
3287	offset + sizeof(__sum16), skb_headlen(skb));
3288	goto out;
3289	}
3290	ret = skb_ensure_writable(skb, write_len: offset + sizeof(__sum16));
3291	if (ret)
3292	goto out;
3293
3294	(__sum16 )(skb->data + offset) = csum_fold(sum: csum) ?: CSUM_MANGLED_0;
3295	out_set_summed:
3296	skb->ip_summed = CHECKSUM_NONE;
3297	out:
3298	return ret;
3299	}
3300	EXPORT_SYMBOL(skb_checksum_help);
3301
3302	int skb_crc32c_csum_help(struct sk_buff *skb)
3303	{
3304	__le32 crc32c_csum;
3305	int ret = `0`, offset, start;
3306
3307	if (skb->ip_summed != CHECKSUM_PARTIAL)
3308	goto out;
3309
3310	if (unlikely(skb_is_gso(skb)))
3311	goto out;
3312
3313	/ Before computing a checksum, we should make sure no frag could*
3314	* be modified by an external entity : checksum could be wrong.
3315	*/
3316	if (unlikely(skb_has_shared_frag(skb))) {
3317	ret = __skb_linearize(skb);
3318	if (ret)
3319	goto out;
3320	}
3321	start = skb_checksum_start_offset(skb);
3322	offset = start + offsetof(struct sctphdr, checksum);
3323	if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
3324	ret = -EINVAL;
3325	goto out;
3326	}
3327
3328	ret = skb_ensure_writable(skb, write_len: offset + sizeof(__le32));
3329	if (ret)
3330	goto out;
3331
3332	crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
3333	skb->len - start, ~(__u32)`0`,
3334	crc32c_csum_stub));
3335	(__le32 )(skb->data + offset) = crc32c_csum;
3336	skb_reset_csum_not_inet(skb);
3337	out:
3338	return ret;
3339	}
3340
3341	__be16 skb_network_protocol(struct sk_buff skb, int* *depth)
3342	{
3343	__be16 type = skb->protocol;
3344
3345	/ Tunnel gso handlers can set protocol to ethernet. /
3346	if (type == htons(ETH_P_TEB)) {
3347	struct ethhdr *eth;
3348
3349	if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
3350	return `0`;
3351
3352	eth = (struct ethhdr *)skb->data;
3353	type = eth->h_proto;
3354	}
3355
3356	return vlan_get_protocol_and_depth(skb, type, depth);
3357	}
3358
3359
3360	/ Take action when hardware reception checksum errors are detected. /
3361	#ifdef CONFIG_BUG
3362	static void do_netdev_rx_csum_fault(struct net_device dev, struct* sk_buff *skb)
3363	{
3364	netdev_err(dev, format: "hw csum failure\n");
3365	skb_dump(KERN_ERR, skb, full_pkt: true);
3366	dump_stack();
3367	}
3368
3369	void netdev_rx_csum_fault(struct net_device dev, struct* sk_buff *skb)
3370	{
3371	DO_ONCE_LITE(do_netdev_rx_csum_fault, dev, skb);
3372	}
3373	EXPORT_SYMBOL(netdev_rx_csum_fault);
3374	#endif
3375
3376	/ XXX: check that highmem exists at all on the given machine. /
3377	static int illegal_highdma(struct net_device dev, struct* sk_buff *skb)
3378	{
3379	#ifdef CONFIG_HIGHMEM
3380	int i;
3381
3382	if (!(dev->features & NETIF_F_HIGHDMA)) {
3383	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
3384	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3385
3386	if (PageHighMem(skb_frag_page(frag)))
3387	return `1`;
3388	}
3389	}
3390	#endif
3391	return `0`;
3392	}
3393
3394	/ If MPLS offload request, verify we are testing hardware MPLS features*
3395	* instead of standard features for the netdev.
3396	*/
3397	#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
3398	static netdev_features_t net_mpls_features(struct sk_buff *skb,
3399	netdev_features_t features,
3400	__be16 type)
3401	{
3402	if (eth_p_mpls(eth_type: type))
3403	features &= skb->dev->mpls_features;
3404
3405	return features;
3406	}
3407	#else
3408	static netdev_features_t net_mpls_features(struct sk_buff *skb,
3409	netdev_features_t features,
3410	__be16 type)
3411	{
3412	return features;
3413	}
3414	#endif
3415
3416	static netdev_features_t harmonize_features(struct sk_buff *skb,
3417	netdev_features_t features)
3418	{
3419	__be16 type;
3420
3421	type = skb_network_protocol(skb, NULL);
3422	features = net_mpls_features(skb, features, type);
3423
3424	if (skb->ip_summed != CHECKSUM_NONE &&
3425	!can_checksum_protocol(features, protocol: type)) {
3426	features &= ~(NETIF_F_CSUM_MASK \| NETIF_F_GSO_MASK);
3427	}
3428	if (illegal_highdma(dev: skb->dev, skb))
3429	features &= ~NETIF_F_SG;
3430
3431	return features;
3432	}
3433
3434	netdev_features_t passthru_features_check(struct sk_buff *skb,
3435	struct net_device *dev,
3436	netdev_features_t features)
3437	{
3438	return features;
3439	}
3440	EXPORT_SYMBOL(passthru_features_check);
3441
3442	static netdev_features_t dflt_features_check(struct sk_buff *skb,
3443	struct net_device *dev,
3444	netdev_features_t features)
3445	{
3446	return vlan_features_check(skb, features);
3447	}
3448
3449	static netdev_features_t gso_features_check(const struct sk_buff *skb,
3450	struct net_device *dev,
3451	netdev_features_t features)
3452	{
3453	u16 gso_segs = skb_shinfo(skb)->gso_segs;
3454
3455	if (gso_segs > READ_ONCE(dev->gso_max_segs))
3456	return features & ~NETIF_F_GSO_MASK;
3457
3458	if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size)))
3459	return features & ~NETIF_F_GSO_MASK;
3460
3461	if (!skb_shinfo(skb)->gso_type) {
3462	skb_warn_bad_offload(skb);
3463	return features & ~NETIF_F_GSO_MASK;
3464	}
3465
3466	/ Support for GSO partial features requires software*
3467	* intervention before we can actually process the packets
3468	* so we need to strip support for any partial features now
3469	* and we can pull them back in after we have partially
3470	* segmented the frame.
3471	*/
3472	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
3473	features &= ~dev->gso_partial_features;
3474
3475	/ Make sure to clear the IPv4 ID mangling feature if the*
3476	* IPv4 header has the potential to be fragmented.
3477	*/
3478	if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
3479	struct iphdr *iph = skb->encapsulation ?
3480	inner_ip_hdr(skb) : ip_hdr(skb);
3481
3482	if (!(iph->frag_off & htons(IP_DF)))
3483	features &= ~NETIF_F_TSO_MANGLEID;
3484	}
3485
3486	return features;
3487	}
3488
3489	netdev_features_t netif_skb_features(struct sk_buff *skb)
3490	{
3491	struct net_device *dev = skb->dev;
3492	netdev_features_t features = dev->features;
3493
3494	if (skb_is_gso(skb))
3495	features = gso_features_check(skb, dev, features);
3496
3497	/ If encapsulation offload request, verify we are testing*
3498	* hardware encapsulation features instead of standard
3499	* features for the netdev
3500	*/
3501	if (skb->encapsulation)
3502	features &= dev->hw_enc_features;
3503
3504	if (skb_vlan_tagged(skb))
3505	features = netdev_intersect_features(f1: features,
3506	f2: dev->vlan_features \|
3507	NETIF_F_HW_VLAN_CTAG_TX \|
3508	NETIF_F_HW_VLAN_STAG_TX);
3509
3510	if (dev->netdev_ops->ndo_features_check)
3511	features &= dev->netdev_ops->ndo_features_check(skb, dev,
3512	features);
3513	else
3514	features &= dflt_features_check(skb, dev, features);
3515
3516	return harmonize_features(skb, features);
3517	}
3518	EXPORT_SYMBOL(netif_skb_features);
3519
3520	static int xmit_one(struct sk_buff skb, struct* net_device *dev,
3521	struct netdev_queue *txq, bool more)
3522	{
3523	unsigned int len;
3524	int rc;
3525
3526	if (dev_nit_active(dev))
3527	dev_queue_xmit_nit(skb, dev);
3528
3529	len = skb->len;
3530	trace_net_dev_start_xmit(skb, dev);
3531	rc = netdev_start_xmit(skb, dev, txq, more);
3532	trace_net_dev_xmit(skb, rc, dev, skb_len: len);
3533
3534	return rc;
3535	}
3536
3537	struct sk_buff dev_hard_start_xmit(struct* sk_buff first, struct* net_device *dev,
3538	struct netdev_queue txq, int* *ret)
3539	{
3540	struct sk_buff *skb = first;
3541	int rc = NETDEV_TX_OK;
3542
3543	while (skb) {
3544	struct sk_buff *next = skb->next;
3545
3546	skb_mark_not_on_list(skb);
3547	rc = xmit_one(skb, dev, txq, more: next != NULL);
3548	if (unlikely(!dev_xmit_complete(rc))) {
3549	skb->next = next;
3550	goto out;
3551	}
3552
3553	skb = next;
3554	if (netif_tx_queue_stopped(dev_queue: txq) && skb) {
3555	rc = NETDEV_TX_BUSY;
3556	break;
3557	}
3558	}
3559
3560	out:
3561	*ret = rc;
3562	return skb;
3563	}
3564
3565	static struct sk_buff validate_xmit_vlan(struct* sk_buff *skb,
3566	netdev_features_t features)
3567	{
3568	if (skb_vlan_tag_present(skb) &&
3569	!vlan_hw_offload_capable(features, proto: skb->vlan_proto))
3570	skb = __vlan_hwaccel_push_inside(skb);
3571	return skb;
3572	}
3573
3574	int skb_csum_hwoffload_help(struct sk_buff *skb,
3575	const netdev_features_t features)
3576	{
3577	if (unlikely(skb_csum_is_sctp(skb)))
3578	return !!(features & NETIF_F_SCTP_CRC) ? `0` :
3579	skb_crc32c_csum_help(skb);
3580
3581	if (features & NETIF_F_HW_CSUM)
3582	return `0`;
3583
3584	if (features & (NETIF_F_IP_CSUM \| NETIF_F_IPV6_CSUM)) {
3585	switch (skb->csum_offset) {
3586	case offsetof(struct tcphdr, check):
3587	case offsetof(struct udphdr, check):
3588	return `0`;
3589	}
3590	}
3591
3592	return skb_checksum_help(skb);
3593	}
3594	EXPORT_SYMBOL(skb_csum_hwoffload_help);
3595
3596	static struct sk_buff validate_xmit_skb(struct* sk_buff skb, struct* net_device dev, bool again)
3597	{
3598	netdev_features_t features;
3599
3600	features = netif_skb_features(skb);
3601	skb = validate_xmit_vlan(skb, features);
3602	if (unlikely(!skb))
3603	goto out_null;
3604
3605	skb = sk_validate_xmit_skb(skb, dev);
3606	if (unlikely(!skb))
3607	goto out_null;
3608
3609	if (netif_needs_gso(skb, features)) {
3610	struct sk_buff *segs;
3611
3612	segs = skb_gso_segment(skb, features);
3613	if (IS_ERR(ptr: segs)) {
3614	goto out_kfree_skb;
3615	} else if (segs) {
3616	consume_skb(skb);
3617	skb = segs;
3618	}
3619	} else {
3620	if (skb_needs_linearize(skb, features) &&
3621	__skb_linearize(skb))
3622	goto out_kfree_skb;
3623
3624	/ If packet is not checksummed and device does not*
3625	* support checksumming for this protocol, complete
3626	* checksumming here.
3627	*/
3628	if (skb->ip_summed == CHECKSUM_PARTIAL) {
3629	if (skb->encapsulation)
3630	skb_set_inner_transport_header(skb,
3631	offset: skb_checksum_start_offset(skb));
3632	else
3633	skb_set_transport_header(skb,
3634	offset: skb_checksum_start_offset(skb));
3635	if (skb_csum_hwoffload_help(skb, features))
3636	goto out_kfree_skb;
3637	}
3638	}
3639
3640	skb = validate_xmit_xfrm(skb, features, again);
3641
3642	return skb;
3643
3644	out_kfree_skb:
3645	kfree_skb(skb);
3646	out_null:
3647	dev_core_stats_tx_dropped_inc(dev);
3648	return NULL;
3649	}
3650
3651	struct sk_buff validate_xmit_skb_list(struct* sk_buff skb, struct* net_device dev, bool again)
3652	{
3653	struct sk_buff next, head = NULL, *tail;
3654
3655	for (; skb != NULL; skb = next) {
3656	next = skb->next;
3657	skb_mark_not_on_list(skb);
3658
3659	/ in case skb wont be segmented, point to itself /
3660	skb->prev = skb;
3661
3662	skb = validate_xmit_skb(skb, dev, again);
3663	if (!skb)
3664	continue;
3665
3666	if (!head)
3667	head = skb;
3668	else
3669	tail->next = skb;
3670	/ If skb was segmented, skb->prev points to*
3671	* the last segment. If not, it still contains skb.
3672	*/
3673	tail = skb->prev;
3674	}
3675	return head;
3676	}
3677	EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
3678
3679	static void qdisc_pkt_len_init(struct sk_buff *skb)
3680	{
3681	const struct skb_shared_info *shinfo = skb_shinfo(skb);
3682
3683	qdisc_skb_cb(skb)->pkt_len = skb->len;
3684
3685	/ To get more precise estimation of bytes sent on wire,*
3686	* we add to pkt_len the headers size of all segments
3687	*/
3688	if (shinfo->gso_size && skb_transport_header_was_set(skb)) {
3689	u16 gso_segs = shinfo->gso_segs;
3690	unsigned int hdr_len;
3691
3692	/ mac layer + network layer /
3693	hdr_len = skb_transport_offset(skb);
3694
3695	/ + transport layer /
3696	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 \| SKB_GSO_TCPV6))) {
3697	const struct tcphdr *th;
3698	struct tcphdr _tcphdr;
3699
3700	th = skb_header_pointer(skb, offset: hdr_len,
3701	len: sizeof(_tcphdr), buffer: &_tcphdr);
3702	if (likely(th))
3703	hdr_len += __tcp_hdrlen(th);
3704	} else {
3705	struct udphdr _udphdr;
3706
3707	if (skb_header_pointer(skb, offset: hdr_len,
3708	len: sizeof(_udphdr), buffer: &_udphdr))
3709	hdr_len += sizeof(struct udphdr);
3710	}
3711
3712	if (shinfo->gso_type & SKB_GSO_DODGY)
3713	gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
3714	shinfo->gso_size);
3715
3716	qdisc_skb_cb(skb)->pkt_len += (gso_segs - `1`) * hdr_len;
3717	}
3718	}
3719
3720	static int dev_qdisc_enqueue(struct sk_buff skb, struct* Qdisc *q,
3721	struct sk_buff **to_free,
3722	struct netdev_queue *txq)
3723	{
3724	int rc;
3725
3726	rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK;
3727	if (rc == NET_XMIT_SUCCESS)
3728	trace_qdisc_enqueue(qdisc: q, txq, skb);
3729	return rc;
3730	}
3731
3732	static inline int __dev_xmit_skb(struct sk_buff skb, struct* Qdisc *q,
3733	struct net_device *dev,
3734	struct netdev_queue *txq)
3735	{
3736	spinlock_t *root_lock = qdisc_lock(qdisc: q);
3737	struct sk_buff *to_free = NULL;
3738	bool contended;
3739	int rc;
3740
3741	qdisc_calculate_pkt_len(skb, sch: q);
3742
3743	tcf_set_drop_reason(skb, reason: SKB_DROP_REASON_QDISC_DROP);
3744
3745	if (q->flags & TCQ_F_NOLOCK) {
3746	if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(qdisc: q) &&
3747	qdisc_run_begin(qdisc: q)) {
3748	/ Retest nolock_qdisc_is_empty() within the protection*
3749	* of q->seqlock to protect from racing with requeuing.
3750	*/
3751	if (unlikely(!nolock_qdisc_is_empty(q))) {
3752	rc = dev_qdisc_enqueue(skb, q, to_free: &to_free, txq);
3753	__qdisc_run(q);
3754	qdisc_run_end(qdisc: q);
3755
3756	goto no_lock_out;
3757	}
3758
3759	qdisc_bstats_cpu_update(sch: q, skb);
3760	if (sch_direct_xmit(skb, q, dev, txq, NULL, validate: true) &&
3761	!nolock_qdisc_is_empty(qdisc: q))
3762	__qdisc_run(q);
3763
3764	qdisc_run_end(qdisc: q);
3765	return NET_XMIT_SUCCESS;
3766	}
3767
3768	rc = dev_qdisc_enqueue(skb, q, to_free: &to_free, txq);
3769	qdisc_run(q);
3770
3771	no_lock_out:
3772	if (unlikely(to_free))
3773	kfree_skb_list_reason(segs: to_free,
3774	reason: tcf_get_drop_reason(skb: to_free));
3775	return rc;
3776	}
3777
3778	if (unlikely(READ_ONCE(q->owner) == smp_processor_id())) {
3779	kfree_skb_reason(skb, reason: SKB_DROP_REASON_TC_RECLASSIFY_LOOP);
3780	return NET_XMIT_DROP;
3781	}
3782	/*
3783	* Heuristic to force contended enqueues to serialize on a
3784	* separate lock before trying to get qdisc main lock.
3785	* This permits qdisc->running owner to get the lock more
3786	* often and dequeue packets faster.
3787	* On PREEMPT_RT it is possible to preempt the qdisc owner during xmit
3788	* and then other tasks will only enqueue packets. The packets will be
3789	* sent after the qdisc owner is scheduled again. To prevent this
3790	* scenario the task always serialize on the lock.
3791	*/
3792	contended = qdisc_is_running(qdisc: q) \|\| IS_ENABLED(CONFIG_PREEMPT_RT);
3793	if (unlikely(contended))
3794	spin_lock(lock: &q->busylock);
3795
3796	spin_lock(lock: root_lock);
3797	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
3798	__qdisc_drop(skb, to_free: &to_free);
3799	rc = NET_XMIT_DROP;
3800	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
3801	qdisc_run_begin(qdisc: q)) {
3802	/*
3803	* This is a work-conserving queue; there are no old skbs
3804	* waiting to be sent out; and the qdisc is not running -
3805	* xmit the skb directly.
3806	*/
3807
3808	qdisc_bstats_update(sch: q, skb);
3809
3810	if (sch_direct_xmit(skb, q, dev, txq, root_lock, validate: true)) {
3811	if (unlikely(contended)) {
3812	spin_unlock(lock: &q->busylock);
3813	contended = false;
3814	}
3815	__qdisc_run(q);
3816	}
3817
3818	qdisc_run_end(qdisc: q);
3819	rc = NET_XMIT_SUCCESS;
3820	} else {
3821	WRITE_ONCE(q->owner, smp_processor_id());
3822	rc = dev_qdisc_enqueue(skb, q, to_free: &to_free, txq);
3823	WRITE_ONCE(q->owner, -`1`);
3824	if (qdisc_run_begin(qdisc: q)) {
3825	if (unlikely(contended)) {
3826	spin_unlock(lock: &q->busylock);
3827	contended = false;
3828	}
3829	__qdisc_run(q);
3830	qdisc_run_end(qdisc: q);
3831	}
3832	}
3833	spin_unlock(lock: root_lock);
3834	if (unlikely(to_free))
3835	kfree_skb_list_reason(segs: to_free,
3836	reason: tcf_get_drop_reason(skb: to_free));
3837	if (unlikely(contended))
3838	spin_unlock(lock: &q->busylock);
3839	return rc;
3840	}
3841
3842	#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
3843	static void skb_update_prio(struct sk_buff *skb)
3844	{
3845	const struct netprio_map *map;
3846	const struct sock *sk;
3847	unsigned int prioidx;
3848
3849	if (skb->priority)
3850	return;
3851	map = rcu_dereference_bh(skb->dev->priomap);
3852	if (!map)
3853	return;
3854	sk = skb_to_full_sk(skb);
3855	if (!sk)
3856	return;
3857
3858	prioidx = sock_cgroup_prioidx(skcd: &sk->sk_cgrp_data);
3859
3860	if (prioidx < map->priomap_len)
3861	skb->priority = map->priomap[prioidx];
3862	}
3863	#else
3864	#define skb_update_prio(skb)
3865	#endif
3866
3867	/**
3868	* dev_loopback_xmit - loop back @skb
3869	* @net: network namespace this loopback is happening in
3870	* @sk: sk needed to be a netfilter okfn
3871	* @skb: buffer to transmit
3872	*/
3873	int dev_loopback_xmit(struct net net, struct* sock sk, struct* sk_buff *skb)
3874	{
3875	skb_reset_mac_header(skb);
3876	__skb_pull(skb, len: skb_network_offset(skb));
3877	skb->pkt_type = PACKET_LOOPBACK;
3878	if (skb->ip_summed == CHECKSUM_NONE)
3879	skb->ip_summed = CHECKSUM_UNNECESSARY;
3880	DEBUG_NET_WARN_ON_ONCE(!skb_dst(skb));
3881	skb_dst_force(skb);
3882	netif_rx(skb);
3883	return `0`;
3884	}
3885	EXPORT_SYMBOL(dev_loopback_xmit);
3886
3887	#ifdef CONFIG_NET_EGRESS
3888	static struct netdev_queue *
3889	netdev_tx_queue_mapping(struct net_device dev, struct* sk_buff *skb)
3890	{
3891	int qm = skb_get_queue_mapping(skb);
3892
3893	return netdev_get_tx_queue(dev, index: netdev_cap_txqueue(dev, queue_index: qm));
3894	}
3895
3896	static bool netdev_xmit_txqueue_skipped(void)
3897	{
3898	return __this_cpu_read(softnet_data.xmit.skip_txqueue);
3899	}
3900
3901	void netdev_xmit_skip_txqueue(bool skip)
3902	{
3903	__this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
3904	}
3905	EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
3906	#endif /* CONFIG_NET_EGRESS */
3907
3908	#ifdef CONFIG_NET_XGRESS
3909	static int tc_run(struct tcx_entry entry, struct* sk_buff *skb,
3910	enum skb_drop_reason *drop_reason)
3911	{
3912	int ret = TC_ACT_UNSPEC;
3913	#ifdef CONFIG_NET_CLS_ACT
3914	struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
3915	struct tcf_result res;
3916
3917	if (!miniq)
3918	return ret;
3919
3920	tc_skb_cb(skb)->mru = `0`;
3921	tc_skb_cb(skb)->post_ct = false;
3922	tcf_set_drop_reason(skb, reason: *drop_reason);
3923
3924	mini_qdisc_bstats_cpu_update(miniq, skb);
3925	ret = tcf_classify(skb, block: miniq->block, tp: miniq->filter_list, res: &res, compat_mode: false);
3926	/ Only tcf related quirks below. /
3927	switch (ret) {
3928	case TC_ACT_SHOT:
3929	*drop_reason = tcf_get_drop_reason(skb);
3930	mini_qdisc_qstats_cpu_drop(miniq);
3931	break;
3932	case TC_ACT_OK:
3933	case TC_ACT_RECLASSIFY:
3934	skb->tc_index = TC_H_MIN(res.classid);
3935	break;
3936	}
3937	#endif /* CONFIG_NET_CLS_ACT */
3938	return ret;
3939	}
3940
3941	static DEFINE_STATIC_KEY_FALSE(tcx_needed_key);
3942
3943	void tcx_inc(void)
3944	{
3945	static_branch_inc(&tcx_needed_key);
3946	}
3947
3948	void tcx_dec(void)
3949	{
3950	static_branch_dec(&tcx_needed_key);
3951	}
3952
3953	static __always_inline enum tcx_action_base
3954	tcx_run(const struct bpf_mprog_entry entry, struct* sk_buff *skb,
3955	const bool needs_mac)
3956	{
3957	const struct bpf_mprog_fp *fp;
3958	const struct bpf_prog *prog;
3959	int ret = TCX_NEXT;
3960
3961	if (needs_mac)
3962	__skb_push(skb, len: skb->mac_len);
3963	bpf_mprog_foreach_prog(entry, fp, prog) {
3964	bpf_compute_data_pointers(skb);
3965	ret = bpf_prog_run(prog, ctx: skb);
3966	if (ret != TCX_NEXT)
3967	break;
3968	}
3969	if (needs_mac)
3970	__skb_pull(skb, len: skb->mac_len);
3971	return tcx_action_code(skb, code: ret);
3972	}
3973
3974	static __always_inline struct sk_buff *
3975	sch_handle_ingress(struct sk_buff skb, struct* packet_type *pt_prev, int* *ret,
3976	struct net_device orig_dev, bool another)
3977	{
3978	struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
3979	enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS;
3980	int sch_ret;
3981
3982	if (!entry)
3983	return skb;
3984	if (*pt_prev) {
3985	ret = deliver_skb(skb, pt_prev: pt_prev, orig_dev);
3986	*pt_prev = NULL;
3987	}
3988
3989	qdisc_skb_cb(skb)->pkt_len = skb->len;
3990	tcx_set_ingress(skb, ingress: true);
3991
3992	if (static_branch_unlikely(&tcx_needed_key)) {
3993	sch_ret = tcx_run(entry, skb, needs_mac: true);
3994	if (sch_ret != TC_ACT_UNSPEC)
3995	goto ingress_verdict;
3996	}
3997	sch_ret = tc_run(entry: tcx_entry(entry), skb, drop_reason: &drop_reason);
3998	ingress_verdict:
3999	switch (sch_ret) {
4000	case TC_ACT_REDIRECT:
4001	/ skb_mac_header check was done by BPF, so we can safely*
4002	* push the L2 header back before redirecting to another
4003	* netdev.
4004	*/
4005	__skb_push(skb, len: skb->mac_len);
4006	if (skb_do_redirect(skb) == -EAGAIN) {
4007	__skb_pull(skb, len: skb->mac_len);
4008	*another = true;
4009	break;
4010	}
4011	*ret = NET_RX_SUCCESS;
4012	return NULL;
4013	case TC_ACT_SHOT:
4014	kfree_skb_reason(skb, reason: drop_reason);
4015	*ret = NET_RX_DROP;
4016	return NULL;
4017	/ used by tc_run /
4018	case TC_ACT_STOLEN:
4019	case TC_ACT_QUEUED:
4020	case TC_ACT_TRAP:
4021	consume_skb(skb);
4022	fallthrough;
4023	case TC_ACT_CONSUMED:
4024	*ret = NET_RX_SUCCESS;
4025	return NULL;
4026	}
4027
4028	return skb;
4029	}
4030
4031	static __always_inline struct sk_buff *
4032	sch_handle_egress(struct sk_buff skb, int* ret, struct* net_device *dev)
4033	{
4034	struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
4035	enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS;
4036	int sch_ret;
4037
4038	if (!entry)
4039	return skb;
4040
4041	/ qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was*
4042	* already set by the caller.
4043	*/
4044	if (static_branch_unlikely(&tcx_needed_key)) {
4045	sch_ret = tcx_run(entry, skb, needs_mac: false);
4046	if (sch_ret != TC_ACT_UNSPEC)
4047	goto egress_verdict;
4048	}
4049	sch_ret = tc_run(entry: tcx_entry(entry), skb, drop_reason: &drop_reason);
4050	egress_verdict:
4051	switch (sch_ret) {
4052	case TC_ACT_REDIRECT:
4053	/ No need to push/pop skb's mac_header here on egress! /
4054	skb_do_redirect(skb);
4055	*ret = NET_XMIT_SUCCESS;
4056	return NULL;
4057	case TC_ACT_SHOT:
4058	kfree_skb_reason(skb, reason: drop_reason);
4059	*ret = NET_XMIT_DROP;
4060	return NULL;
4061	/ used by tc_run /
4062	case TC_ACT_STOLEN:
4063	case TC_ACT_QUEUED:
4064	case TC_ACT_TRAP:
4065	consume_skb(skb);
4066	fallthrough;
4067	case TC_ACT_CONSUMED:
4068	*ret = NET_XMIT_SUCCESS;
4069	return NULL;
4070	}
4071
4072	return skb;
4073	}
4074	#else
4075	static __always_inline struct sk_buff *
4076	sch_handle_ingress(struct sk_buff skb, struct* packet_type *pt_prev, int* *ret,
4077	struct net_device orig_dev, bool another)
4078	{
4079	return skb;
4080	}
4081
4082	static __always_inline struct sk_buff *
4083	sch_handle_egress(struct sk_buff skb, int* ret, struct* net_device *dev)
4084	{
4085	return skb;
4086	}
4087	#endif /* CONFIG_NET_XGRESS */
4088
4089	#ifdef CONFIG_XPS
4090	static int __get_xps_queue_idx(struct net_device dev, struct* sk_buff *skb,
4091	struct xps_dev_maps dev_maps, unsigned* int tci)
4092	{
4093	int tc = netdev_get_prio_tc_map(dev, prio: skb->priority);
4094	struct xps_map *map;
4095	int queue_index = -`1`;
4096
4097	if (tc >= dev_maps->num_tc \|\| tci >= dev_maps->nr_ids)
4098	return queue_index;
4099
4100	tci *= dev_maps->num_tc;
4101	tci += tc;
4102
4103	map = rcu_dereference(dev_maps->attr_map[tci]);
4104	if (map) {
4105	if (map->len == `1`)
4106	queue_index = map->queues[`0`];
4107	else
4108	queue_index = map->queues[reciprocal_scale(
4109	val: skb_get_hash(skb), ep_ro: map->len)];
4110	if (unlikely(queue_index >= dev->real_num_tx_queues))
4111	queue_index = -`1`;
4112	}
4113	return queue_index;
4114	}
4115	#endif
4116
4117	static int get_xps_queue(struct net_device dev, struct* net_device *sb_dev,
4118	struct sk_buff *skb)
4119	{
4120	#ifdef CONFIG_XPS
4121	struct xps_dev_maps *dev_maps;
4122	struct sock *sk = skb->sk;
4123	int queue_index = -`1`;
4124
4125	if (!static_key_false(key: &xps_needed))
4126	return -`1`;
4127
4128	rcu_read_lock();
4129	if (!static_key_false(key: &xps_rxqs_needed))
4130	goto get_cpus_map;
4131
4132	dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]);
4133	if (dev_maps) {
4134	int tci = sk_rx_queue_get(sk);
4135
4136	if (tci >= `0`)
4137	queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
4138	tci);
4139	}
4140
4141	get_cpus_map:
4142	if (queue_index < `0`) {
4143	dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]);
4144	if (dev_maps) {
4145	unsigned int tci = skb->sender_cpu - `1`;
4146
4147	queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
4148	tci);
4149	}
4150	}
4151	rcu_read_unlock();
4152
4153	return queue_index;
4154	#else
4155	return -`1`;
4156	#endif
4157	}
4158
4159	u16 dev_pick_tx_zero(struct net_device dev, struct* sk_buff *skb,
4160	struct net_device *sb_dev)
4161	{
4162	return `0`;
4163	}
4164	EXPORT_SYMBOL(dev_pick_tx_zero);
4165
4166	u16 dev_pick_tx_cpu_id(struct net_device dev, struct* sk_buff *skb,
4167	struct net_device *sb_dev)
4168	{
4169	return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
4170	}
4171	EXPORT_SYMBOL(dev_pick_tx_cpu_id);
4172
4173	u16 netdev_pick_tx(struct net_device dev, struct* sk_buff *skb,
4174	struct net_device *sb_dev)
4175	{
4176	struct sock *sk = skb->sk;
4177	int queue_index = sk_tx_queue_get(sk);
4178
4179	sb_dev = sb_dev ? : dev;
4180
4181	if (queue_index < `0` \|\| skb->ooo_okay \|\|
4182	queue_index >= dev->real_num_tx_queues) {
4183	int new_index = get_xps_queue(dev, sb_dev, skb);
4184
4185	if (new_index < `0`)
4186	new_index = skb_tx_hash(dev, sb_dev, skb);
4187
4188	if (queue_index != new_index && sk &&
4189	sk_fullsock(sk) &&
4190	rcu_access_pointer(sk->sk_dst_cache))
4191	sk_tx_queue_set(sk, tx_queue: new_index);
4192
4193	queue_index = new_index;
4194	}
4195
4196	return queue_index;
4197	}
4198	EXPORT_SYMBOL(netdev_pick_tx);
4199
4200	struct netdev_queue netdev_core_pick_tx(struct* net_device *dev,
4201	struct sk_buff *skb,
4202	struct net_device *sb_dev)
4203	{
4204	int queue_index = `0`;
4205
4206	#ifdef CONFIG_XPS
4207	u32 sender_cpu = skb->sender_cpu - `1`;
4208
4209	if (sender_cpu >= (u32)NR_CPUS)
4210	skb->sender_cpu = raw_smp_processor_id() + `1`;
4211	#endif
4212
4213	if (dev->real_num_tx_queues != `1`) {
4214	const struct net_device_ops *ops = dev->netdev_ops;
4215
4216	if (ops->ndo_select_queue)
4217	queue_index = ops->ndo_select_queue(dev, skb, sb_dev);
4218	else
4219	queue_index = netdev_pick_tx(dev, skb, sb_dev);
4220
4221	queue_index = netdev_cap_txqueue(dev, queue_index);
4222	}
4223
4224	skb_set_queue_mapping(skb, queue_mapping: queue_index);
4225	return netdev_get_tx_queue(dev, index: queue_index);
4226	}
4227
4228	/**
4229	* __dev_queue_xmit() - transmit a buffer
4230	* @skb: buffer to transmit
4231	* @sb_dev: suboordinate device used for L2 forwarding offload
4232	*
4233	* Queue a buffer for transmission to a network device. The caller must
4234	* have set the device and priority and built the buffer before calling
4235	* this function. The function can be called from an interrupt.
4236	*
4237	* When calling this method, interrupts MUST be enabled. This is because
4238	* the BH enable code must have IRQs enabled so that it will not deadlock.
4239	*
4240	* Regardless of the return value, the skb is consumed, so it is currently
4241	* difficult to retry a send to this method. (You can bump the ref count
4242	* before sending to hold a reference for retry if you are careful.)
4243	*
4244	* Return:
4245	* * 0 - buffer successfully transmitted
4246	* * positive qdisc return code - NET_XMIT_DROP etc.
4247	* * negative errno - other errors
4248	*/
4249	int __dev_queue_xmit(struct sk_buff skb, struct* net_device *sb_dev)
4250	{
4251	struct net_device *dev = skb->dev;
4252	struct netdev_queue *txq = NULL;
4253	struct Qdisc *q;
4254	int rc = -ENOMEM;
4255	bool again = false;
4256
4257	skb_reset_mac_header(skb);
4258	skb_assert_len(skb);
4259
4260	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
4261	__skb_tstamp_tx(orig_skb: skb, NULL, NULL, sk: skb->sk, tstype: SCM_TSTAMP_SCHED);
4262
4263	/ Disable soft irqs for various locks below. Also*
4264	* stops preemption for RCU.
4265	*/
4266	rcu_read_lock_bh();
4267
4268	skb_update_prio(skb);
4269
4270	qdisc_pkt_len_init(skb);
4271	tcx_set_ingress(skb, ingress: false);
4272	#ifdef CONFIG_NET_EGRESS
4273	if (static_branch_unlikely(&egress_needed_key)) {
4274	if (nf_hook_egress_active()) {
4275	skb = nf_hook_egress(skb, rc: &rc, dev);
4276	if (!skb)
4277	goto out;
4278	}
4279
4280	netdev_xmit_skip_txqueue(false);
4281
4282	nf_skip_egress(skb, skip: true);
4283	skb = sch_handle_egress(skb, ret: &rc, dev);
4284	if (!skb)
4285	goto out;
4286	nf_skip_egress(skb, skip: false);
4287
4288	if (netdev_xmit_txqueue_skipped())
4289	txq = netdev_tx_queue_mapping(dev, skb);
4290	}
4291	#endif
4292	/ If device/qdisc don't need skb->dst, release it right now while*
4293	* its hot in this cpu cache.
4294	*/
4295	if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
4296	skb_dst_drop(skb);
4297	else
4298	skb_dst_force(skb);
4299
4300	if (!txq)
4301	txq = netdev_core_pick_tx(dev, skb, sb_dev);
4302
4303	q = rcu_dereference_bh(txq->qdisc);
4304
4305	trace_net_dev_queue(skb);
4306	if (q->enqueue) {
4307	rc = __dev_xmit_skb(skb, q, dev, txq);
4308	goto out;
4309	}
4310
4311	/ The device has no queue. Common case for software devices:*
4312	* loopback, all the sorts of tunnels...
4313
4314	* Really, it is unlikely that netif_tx_lock protection is necessary
4315	* here. (f.e. loopback and IP tunnels are clean ignoring statistics
4316	* counters.)
4317	* However, it is possible, that they rely on protection
4318	* made by us here.
4319
4320	* Check this and shot the lock. It is not prone from deadlocks.
4321	*Either shot noqueue qdisc, it is even simpler 8)
4322	*/
4323	if (dev->flags & IFF_UP) {
4324	int cpu = smp_processor_id(); / ok because BHs are off /
4325
4326	/ Other cpus might concurrently change txq->xmit_lock_owner*
4327	* to -1 or to their cpu id, but not to our id.
4328	*/
4329	if (READ_ONCE(txq->xmit_lock_owner) != cpu) {
4330	if (dev_xmit_recursion())
4331	goto recursion_alert;
4332
4333	skb = validate_xmit_skb(skb, dev, again: &again);
4334	if (!skb)
4335	goto out;
4336
4337	HARD_TX_LOCK(dev, txq, cpu);
4338
4339	if (!netif_xmit_stopped(dev_queue: txq)) {
4340	dev_xmit_recursion_inc();
4341	skb = dev_hard_start_xmit(first: skb, dev, txq, ret: &rc);
4342	dev_xmit_recursion_dec();
4343	if (dev_xmit_complete(rc)) {
4344	HARD_TX_UNLOCK(dev, txq);
4345	goto out;
4346	}
4347	}
4348	HARD_TX_UNLOCK(dev, txq);
4349	net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
4350	dev->name);
4351	} else {
4352	/ Recursion is detected! It is possible,*
4353	* unfortunately
4354	*/
4355	recursion_alert:
4356	net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
4357	dev->name);
4358	}
4359	}
4360
4361	rc = -ENETDOWN;
4362	rcu_read_unlock_bh();
4363
4364	dev_core_stats_tx_dropped_inc(dev);
4365	kfree_skb_list(segs: skb);
4366	return rc;
4367	out:
4368	rcu_read_unlock_bh();
4369	return rc;
4370	}
4371	EXPORT_SYMBOL(__dev_queue_xmit);
4372
4373	int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
4374	{
4375	struct net_device *dev = skb->dev;
4376	struct sk_buff *orig_skb = skb;
4377	struct netdev_queue *txq;
4378	int ret = NETDEV_TX_BUSY;
4379	bool again = false;
4380
4381	if (unlikely(!netif_running(dev) \|\|
4382	!netif_carrier_ok(dev)))
4383	goto drop;
4384
4385	skb = validate_xmit_skb_list(skb, dev, &again);
4386	if (skb != orig_skb)
4387	goto drop;
4388
4389	skb_set_queue_mapping(skb, queue_mapping: queue_id);
4390	txq = skb_get_tx_queue(dev, skb);
4391
4392	local_bh_disable();
4393
4394	dev_xmit_recursion_inc();
4395	HARD_TX_LOCK(dev, txq, smp_processor_id());
4396	if (!netif_xmit_frozen_or_drv_stopped(dev_queue: txq))
4397	ret = netdev_start_xmit(skb, dev, txq, more: false);
4398	HARD_TX_UNLOCK(dev, txq);
4399	dev_xmit_recursion_dec();
4400
4401	local_bh_enable();
4402	return ret;
4403	drop:
4404	dev_core_stats_tx_dropped_inc(dev);
4405	kfree_skb_list(segs: skb);
4406	return NET_XMIT_DROP;
4407	}
4408	EXPORT_SYMBOL(__dev_direct_xmit);
4409
4410	/*************************************************************************
4411	* Receiver routines
4412	*************************************************************************/
4413
4414	unsigned int sysctl_skb_defer_max __read_mostly = `64`;
4415	int weight_p __read_mostly = `64`; / old backlog weight /
4416	int dev_weight_rx_bias __read_mostly = `1`; / bias for backlog weight /
4417	int dev_weight_tx_bias __read_mostly = `1`; / bias for output_queue quota /
4418
4419	/ Called with irq disabled /
4420	static inline void ____napi_schedule(struct softnet_data *sd,
4421	struct napi_struct *napi)
4422	{
4423	struct task_struct *thread;
4424
4425	lockdep_assert_irqs_disabled();
4426
4427	if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
4428	/ Paired with smp_mb__before_atomic() in*
4429	* napi_enable()/dev_set_threaded().
4430	* Use READ_ONCE() to guarantee a complete
4431	* read on napi->thread. Only call
4432	* wake_up_process() when it's not NULL.
4433	*/
4434	thread = READ_ONCE(napi->thread);
4435	if (thread) {
4436	/ Avoid doing set_bit() if the thread is in*
4437	* INTERRUPTIBLE state, cause napi_thread_wait()
4438	* makes sure to proceed with napi polling
4439	* if the thread is explicitly woken from here.
4440	*/
4441	if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE)
4442	set_bit(nr: NAPI_STATE_SCHED_THREADED, addr: &napi->state);
4443	wake_up_process(tsk: thread);
4444	return;
4445	}
4446	}
4447
4448	list_add_tail(new: &napi->poll_list, head: &sd->poll_list);
4449	WRITE_ONCE(napi->list_owner, smp_processor_id());
4450	/ If not called from net_rx_action()*
4451	* we have to raise NET_RX_SOFTIRQ.
4452	*/
4453	if (!sd->in_net_rx_action)
4454	__raise_softirq_irqoff(nr: NET_RX_SOFTIRQ);
4455	}
4456
4457	#ifdef CONFIG_RPS
4458
4459	struct static_key_false rps_needed __read_mostly;
4460	EXPORT_SYMBOL(rps_needed);
4461	struct static_key_false rfs_needed __read_mostly;
4462	EXPORT_SYMBOL(rfs_needed);
4463
4464	static struct rps_dev_flow *
4465	set_rps_cpu(struct net_device dev, struct* sk_buff *skb,
4466	struct rps_dev_flow *rflow, u16 next_cpu)
4467	{
4468	if (next_cpu < nr_cpu_ids) {
4469	#ifdef CONFIG_RFS_ACCEL
4470	struct netdev_rx_queue *rxqueue;
4471	struct rps_dev_flow_table *flow_table;
4472	struct rps_dev_flow *old_rflow;
4473	u32 flow_id;
4474	u16 rxq_index;
4475	int rc;
4476
4477	/ Should we steer this flow to a different hardware queue? /
4478	if (!skb_rx_queue_recorded(skb) \|\| !dev->rx_cpu_rmap \|\|
4479	!(dev->features & NETIF_F_NTUPLE))
4480	goto out;
4481	rxq_index = cpu_rmap_lookup_index(rmap: dev->rx_cpu_rmap, cpu: next_cpu);
4482	if (rxq_index == skb_get_rx_queue(skb))
4483	goto out;
4484
4485	rxqueue = dev->_rx + rxq_index;
4486	flow_table = rcu_dereference(rxqueue->rps_flow_table);
4487	if (!flow_table)
4488	goto out;
4489	flow_id = skb_get_hash(skb) & flow_table->mask;
4490	rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
4491	rxq_index, flow_id);
4492	if (rc < `0`)
4493	goto out;
4494	old_rflow = rflow;
4495	rflow = &flow_table->flows[flow_id];
4496	rflow->filter = rc;
4497	if (old_rflow->filter == rflow->filter)
4498	old_rflow->filter = RPS_NO_FILTER;
4499	out:
4500	#endif
4501	rflow->last_qtail =
4502	per_cpu(softnet_data, next_cpu).input_queue_head;
4503	}
4504
4505	rflow->cpu = next_cpu;
4506	return rflow;
4507	}
4508
4509	/*
4510	* get_rps_cpu is called from netif_receive_skb and returns the target
4511	* CPU from the RPS map of the receiving queue for a given skb.
4512	* rcu_read_lock must be held on entry.
4513	*/
4514	static int get_rps_cpu(struct net_device dev, struct* sk_buff *skb,
4515	struct rps_dev_flow **rflowp)
4516	{
4517	const struct rps_sock_flow_table *sock_flow_table;
4518	struct netdev_rx_queue *rxqueue = dev->_rx;
4519	struct rps_dev_flow_table *flow_table;
4520	struct rps_map *map;
4521	int cpu = -`1`;
4522	u32 tcpu;
4523	u32 hash;
4524
4525	if (skb_rx_queue_recorded(skb)) {
4526	u16 index = skb_get_rx_queue(skb);
4527
4528	if (unlikely(index >= dev->real_num_rx_queues)) {
4529	WARN_ONCE(dev->real_num_rx_queues > `1`,
4530	"%s received packet on queue %u, but number "
4531	"of RX queues is %u\n",
4532	dev->name, index, dev->real_num_rx_queues);
4533	goto done;
4534	}
4535	rxqueue += index;
4536	}
4537
4538	/ Avoid computing hash if RFS/RPS is not active for this rxqueue /
4539
4540	flow_table = rcu_dereference(rxqueue->rps_flow_table);
4541	map = rcu_dereference(rxqueue->rps_map);
4542	if (!flow_table && !map)
4543	goto done;
4544
4545	skb_reset_network_header(skb);
4546	hash = skb_get_hash(skb);
4547	if (!hash)
4548	goto done;
4549
4550	sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
4551	if (flow_table && sock_flow_table) {
4552	struct rps_dev_flow *rflow;
4553	u32 next_cpu;
4554	u32 ident;
4555
4556	/ First check into global flow table if there is a match.*
4557	* This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow().
4558	*/
4559	ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]);
4560	if ((ident ^ hash) & ~net_hotdata.rps_cpu_mask)
4561	goto try_rps;
4562
4563	next_cpu = ident & net_hotdata.rps_cpu_mask;
4564
4565	/ OK, now we know there is a match,*
4566	* we can look at the local (per receive queue) flow table
4567	*/
4568	rflow = &flow_table->flows[hash & flow_table->mask];
4569	tcpu = rflow->cpu;
4570
4571	/*
4572	* If the desired CPU (where last recvmsg was done) is
4573	* different from current CPU (one in the rx-queue flow
4574	* table entry), switch if one of the following holds:
4575	* - Current CPU is unset (>= nr_cpu_ids).
4576	* - Current CPU is offline.
4577	* - The current CPU's queue tail has advanced beyond the
4578	* last packet that was enqueued using this table entry.
4579	* This guarantees that all previous packets for the flow
4580	* have been dequeued, thus preserving in order delivery.
4581	*/
4582	if (unlikely(tcpu != next_cpu) &&
4583	(tcpu >= nr_cpu_ids \|\| !cpu_online(cpu: tcpu) \|\|
4584	((int)(per_cpu(softnet_data, tcpu).input_queue_head -
4585	rflow->last_qtail)) >= `0`)) {
4586	tcpu = next_cpu;
4587	rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
4588	}
4589
4590	if (tcpu < nr_cpu_ids && cpu_online(cpu: tcpu)) {
4591	*rflowp = rflow;
4592	cpu = tcpu;
4593	goto done;
4594	}
4595	}
4596
4597	try_rps:
4598
4599	if (map) {
4600	tcpu = map->cpus[reciprocal_scale(val: hash, ep_ro: map->len)];
4601	if (cpu_online(cpu: tcpu)) {
4602	cpu = tcpu;
4603	goto done;
4604	}
4605	}
4606
4607	done:
4608	return cpu;
4609	}
4610
4611	#ifdef CONFIG_RFS_ACCEL
4612
4613	/**
4614	* rps_may_expire_flow - check whether an RFS hardware filter may be removed
4615	* @dev: Device on which the filter was set
4616	* @rxq_index: RX queue index
4617	* @flow_id: Flow ID passed to ndo_rx_flow_steer()
4618	* @filter_id: Filter ID returned by ndo_rx_flow_steer()
4619	*
4620	* Drivers that implement ndo_rx_flow_steer() should periodically call
4621	* this function for each installed filter and remove the filters for
4622	* which it returns %true.
4623	*/
4624	bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
4625	u32 flow_id, u16 filter_id)
4626	{
4627	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
4628	struct rps_dev_flow_table *flow_table;
4629	struct rps_dev_flow *rflow;
4630	bool expire = true;
4631	unsigned int cpu;
4632
4633	rcu_read_lock();
4634	flow_table = rcu_dereference(rxqueue->rps_flow_table);
4635	if (flow_table && flow_id <= flow_table->mask) {
4636	rflow = &flow_table->flows[flow_id];
4637	cpu = READ_ONCE(rflow->cpu);
4638	if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
4639	((int)(per_cpu(softnet_data, cpu).input_queue_head -
4640	rflow->last_qtail) <
4641	(int)(`10` * flow_table->mask)))
4642	expire = false;
4643	}
4644	rcu_read_unlock();
4645	return expire;
4646	}
4647	EXPORT_SYMBOL(rps_may_expire_flow);
4648
4649	#endif /* CONFIG_RFS_ACCEL */
4650
4651	/ Called from hardirq (IPI) context /
4652	static void rps_trigger_softirq(void *data)
4653	{
4654	struct softnet_data *sd = data;
4655
4656	____napi_schedule(sd, napi: &sd->backlog);
4657	sd->received_rps++;
4658	}
4659
4660	#endif /* CONFIG_RPS */
4661
4662	/ Called from hardirq (IPI) context /
4663	static void trigger_rx_softirq(void *data)
4664	{
4665	struct softnet_data *sd = data;
4666
4667	__raise_softirq_irqoff(nr: NET_RX_SOFTIRQ);
4668	smp_store_release(&sd->defer_ipi_scheduled, `0`);
4669	}
4670
4671	/*
4672	* After we queued a packet into sd->input_pkt_queue,
4673	* we need to make sure this queue is serviced soon.
4674	*
4675	* - If this is another cpu queue, link it to our rps_ipi_list,
4676	* and make sure we will process rps_ipi_list from net_rx_action().
4677	*
4678	* - If this is our own queue, NAPI schedule our backlog.
4679	* Note that this also raises NET_RX_SOFTIRQ.
4680	*/
4681	static void napi_schedule_rps(struct softnet_data *sd)
4682	{
4683	struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
4684
4685	#ifdef CONFIG_RPS
4686	if (sd != mysd) {
4687	sd->rps_ipi_next = mysd->rps_ipi_list;
4688	mysd->rps_ipi_list = sd;
4689
4690	/ If not called from net_rx_action() or napi_threaded_poll()*
4691	* we have to raise NET_RX_SOFTIRQ.
4692	*/
4693	if (!mysd->in_net_rx_action && !mysd->in_napi_threaded_poll)
4694	__raise_softirq_irqoff(nr: NET_RX_SOFTIRQ);
4695	return;
4696	}
4697	#endif /* CONFIG_RPS */
4698	__napi_schedule_irqoff(n: &mysd->backlog);
4699	}
4700
4701	#ifdef CONFIG_NET_FLOW_LIMIT
4702	int netdev_flow_limit_table_len __read_mostly = (`1` << `12`);
4703	#endif
4704
4705	static bool skb_flow_limit(struct sk_buff skb, unsigned* int qlen)
4706	{
4707	#ifdef CONFIG_NET_FLOW_LIMIT
4708	struct sd_flow_limit *fl;
4709	struct softnet_data *sd;
4710	unsigned int old_flow, new_flow;
4711
4712	if (qlen < (READ_ONCE(net_hotdata.max_backlog) >> `1`))
4713	return false;
4714
4715	sd = this_cpu_ptr(&softnet_data);
4716
4717	rcu_read_lock();
4718	fl = rcu_dereference(sd->flow_limit);
4719	if (fl) {
4720	new_flow = skb_get_hash(skb) & (fl->num_buckets - `1`);
4721	old_flow = fl->history[fl->history_head];
4722	fl->history[fl->history_head] = new_flow;
4723
4724	fl->history_head++;
4725	fl->history_head &= FLOW_LIMIT_HISTORY - `1`;
4726
4727	if (likely(fl->buckets[old_flow]))
4728	fl->buckets[old_flow]--;
4729
4730	if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> `1`)) {
4731	fl->count++;
4732	rcu_read_unlock();
4733	return true;
4734	}
4735	}
4736	rcu_read_unlock();
4737	#endif
4738	return false;
4739	}
4740
4741	/*
4742	* enqueue_to_backlog is called to queue an skb to a per CPU backlog
4743	* queue (may be a remote CPU queue).
4744	*/
4745	static int enqueue_to_backlog(struct sk_buff skb, int* cpu,
4746	unsigned int *qtail)
4747	{
4748	enum skb_drop_reason reason;
4749	struct softnet_data *sd;
4750	unsigned long flags;
4751	unsigned int qlen;
4752
4753	reason = SKB_DROP_REASON_NOT_SPECIFIED;
4754	sd = &per_cpu(softnet_data, cpu);
4755
4756	rps_lock_irqsave(sd, flags: &flags);
4757	if (!netif_running(dev: skb->dev))
4758	goto drop;
4759	qlen = skb_queue_len(list_: &sd->input_pkt_queue);
4760	if (qlen <= READ_ONCE(net_hotdata.max_backlog) &&
4761	!skb_flow_limit(skb, qlen)) {
4762	if (qlen) {
4763	enqueue:
4764	__skb_queue_tail(list: &sd->input_pkt_queue, newsk: skb);
4765	input_queue_tail_incr_save(sd, qtail);
4766	rps_unlock_irq_restore(sd, flags: &flags);
4767	return NET_RX_SUCCESS;
4768	}
4769
4770	/ Schedule NAPI for backlog device*
4771	* We can use non atomic operation since we own the queue lock
4772	*/
4773	if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
4774	napi_schedule_rps(sd);
4775	goto enqueue;
4776	}
4777	reason = SKB_DROP_REASON_CPU_BACKLOG;
4778
4779	drop:
4780	sd->dropped++;
4781	rps_unlock_irq_restore(sd, flags: &flags);
4782
4783	dev_core_stats_rx_dropped_inc(dev: skb->dev);
4784	kfree_skb_reason(skb, reason);
4785	return NET_RX_DROP;
4786	}
4787
4788	static struct netdev_rx_queue netif_get_rxqueue(struct* sk_buff *skb)
4789	{
4790	struct net_device *dev = skb->dev;
4791	struct netdev_rx_queue *rxqueue;
4792
4793	rxqueue = dev->_rx;
4794
4795	if (skb_rx_queue_recorded(skb)) {
4796	u16 index = skb_get_rx_queue(skb);
4797
4798	if (unlikely(index >= dev->real_num_rx_queues)) {
4799	WARN_ONCE(dev->real_num_rx_queues > `1`,
4800	"%s received packet on queue %u, but number "
4801	"of RX queues is %u\n",
4802	dev->name, index, dev->real_num_rx_queues);
4803
4804	return rxqueue; / Return first rxqueue /
4805	}
4806	rxqueue += index;
4807	}
4808	return rxqueue;
4809	}
4810
4811	u32 bpf_prog_run_generic_xdp(struct sk_buff skb, struct* xdp_buff *xdp,
4812	struct bpf_prog *xdp_prog)
4813	{
4814	void orig_data, orig_data_end, *hard_start;
4815	struct netdev_rx_queue *rxqueue;
4816	bool orig_bcast, orig_host;
4817	u32 mac_len, frame_sz;
4818	__be16 orig_eth_type;
4819	struct ethhdr *eth;
4820	u32 metalen, act;
4821	int off;
4822
4823	/ The XDP program wants to see the packet starting at the MAC*
4824	* header.
4825	*/
4826	mac_len = skb->data - skb_mac_header(skb);
4827	hard_start = skb->data - skb_headroom(skb);
4828
4829	/ SKB "head" area always have tailroom for skb_shared_info /
4830	frame_sz = (void *)skb_end_pointer(skb) - hard_start;
4831	frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
4832
4833	rxqueue = netif_get_rxqueue(skb);
4834	xdp_init_buff(xdp, frame_sz, rxq: &rxqueue->xdp_rxq);
4835	xdp_prepare_buff(xdp, hard_start, headroom: skb_headroom(skb) - mac_len,
4836	data_len: skb_headlen(skb) + mac_len, meta_valid: true);
4837	if (skb_is_nonlinear(skb)) {
4838	skb_shinfo(skb)->xdp_frags_size = skb->data_len;
4839	xdp_buff_set_frags_flag(xdp);
4840	} else {
4841	xdp_buff_clear_frags_flag(xdp);
4842	}
4843
4844	orig_data_end = xdp->data_end;
4845	orig_data = xdp->data;
4846	eth = (struct ethhdr *)xdp->data;
4847	orig_host = ether_addr_equal_64bits(addr1: eth->h_dest, addr2: skb->dev->dev_addr);
4848	orig_bcast = is_multicast_ether_addr_64bits(addr: eth->h_dest);
4849	orig_eth_type = eth->h_proto;
4850
4851	act = bpf_prog_run_xdp(prog: xdp_prog, xdp);
4852
4853	/ check if bpf_xdp_adjust_head was used /
4854	off = xdp->data - orig_data;
4855	if (off) {
4856	if (off > `0`)
4857	__skb_pull(skb, len: off);
4858	else if (off < `0`)
4859	__skb_push(skb, len: -off);
4860
4861	skb->mac_header += off;
4862	skb_reset_network_header(skb);
4863	}
4864
4865	/ check if bpf_xdp_adjust_tail was used /
4866	off = xdp->data_end - orig_data_end;
4867	if (off != `0`) {
4868	skb_set_tail_pointer(skb, offset: xdp->data_end - xdp->data);
4869	skb->len += off; / positive on grow, negative on shrink /
4870	}
4871
4872	/ XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers*
4873	* (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
4874	*/
4875	if (xdp_buff_has_frags(xdp))
4876	skb->data_len = skb_shinfo(skb)->xdp_frags_size;
4877	else
4878	skb->data_len = `0`;
4879
4880	/ check if XDP changed eth hdr such SKB needs update /
4881	eth = (struct ethhdr *)xdp->data;
4882	if ((orig_eth_type != eth->h_proto) \|\|
4883	(orig_host != ether_addr_equal_64bits(addr1: eth->h_dest,
4884	addr2: skb->dev->dev_addr)) \|\|
4885	(orig_bcast != is_multicast_ether_addr_64bits(addr: eth->h_dest))) {
4886	__skb_push(skb, ETH_HLEN);
4887	skb->pkt_type = PACKET_HOST;
4888	skb->protocol = eth_type_trans(skb, dev: skb->dev);
4889	}
4890
4891	/ Redirect/Tx gives L2 packet, code that will reuse skb must __skb_pull*
4892	* before calling us again on redirect path. We do not call do_redirect
4893	* as we leave that up to the caller.
4894	*
4895	* Caller is responsible for managing lifetime of skb (i.e. calling
4896	* kfree_skb in response to actions it cannot handle/XDP_DROP).
4897	*/
4898	switch (act) {
4899	case XDP_REDIRECT:
4900	case XDP_TX:
4901	__skb_push(skb, len: mac_len);
4902	break;
4903	case XDP_PASS:
4904	metalen = xdp->data - xdp->data_meta;
4905	if (metalen)
4906	skb_metadata_set(skb, meta_len: metalen);
4907	break;
4908	}
4909
4910	return act;
4911	}
4912
4913	static int
4914	netif_skb_check_for_xdp(struct sk_buff pskb, struct** bpf_prog *prog)
4915	{
4916	struct sk_buff skb = pskb;
4917	int err, hroom, troom;
4918
4919	if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog))
4920	return `0`;
4921
4922	/ In case we have to go down the path and also linearize,*
4923	* then lets do the pskb_expand_head() work just once here.
4924	*/
4925	hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
4926	troom = skb->tail + skb->data_len - skb->end;
4927	err = pskb_expand_head(skb,
4928	nhead: hroom > `0` ? ALIGN(hroom, NET_SKB_PAD) : `0`,
4929	ntail: troom > `0` ? troom + `128` : `0`, GFP_ATOMIC);
4930	if (err)
4931	return err;
4932
4933	return skb_linearize(skb);
4934	}
4935
4936	static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
4937	struct xdp_buff *xdp,
4938	struct bpf_prog *xdp_prog)
4939	{
4940	struct sk_buff skb = pskb;
4941	u32 mac_len, act = XDP_DROP;
4942
4943	/ Reinjected packets coming from act_mirred or similar should*
4944	* not get XDP generic processing.
4945	*/
4946	if (skb_is_redirected(skb))
4947	return XDP_PASS;
4948
4949	/ XDP packets must have sufficient headroom of XDP_PACKET_HEADROOM*
4950	* bytes. This is the guarantee that also native XDP provides,
4951	* thus we need to do it here as well.
4952	*/
4953	mac_len = skb->data - skb_mac_header(skb);
4954	__skb_push(skb, len: mac_len);
4955
4956	if (skb_cloned(skb) \|\| skb_is_nonlinear(skb) \|\|
4957	skb_headroom(skb) < XDP_PACKET_HEADROOM) {
4958	if (netif_skb_check_for_xdp(pskb, prog: xdp_prog))
4959	goto do_drop;
4960	}
4961
4962	__skb_pull(skb: *pskb, len: mac_len);
4963
4964	act = bpf_prog_run_generic_xdp(skb: *pskb, xdp, xdp_prog);
4965	switch (act) {
4966	case XDP_REDIRECT:
4967	case XDP_TX:
4968	case XDP_PASS:
4969	break;
4970	default:
4971	bpf_warn_invalid_xdp_action(dev: (*pskb)->dev, prog: xdp_prog, act);
4972	fallthrough;
4973	case XDP_ABORTED:
4974	trace_xdp_exception(dev: (*pskb)->dev, xdp: xdp_prog, act);
4975	fallthrough;
4976	case XDP_DROP:
4977	do_drop:
4978	kfree_skb(skb: *pskb);
4979	break;
4980	}
4981
4982	return act;
4983	}
4984
4985	/ When doing generic XDP we have to bypass the qdisc layer and the*
4986	* network taps in order to match in-driver-XDP behavior. This also means
4987	* that XDP packets are able to starve other packets going through a qdisc,
4988	* and DDOS attacks will be more effective. In-driver-XDP use dedicated TX
4989	* queues, so they do not have this starvation issue.
4990	*/
4991	void generic_xdp_tx(struct sk_buff skb, struct* bpf_prog *xdp_prog)
4992	{
4993	struct net_device *dev = skb->dev;
4994	struct netdev_queue *txq;
4995	bool free_skb = true;
4996	int cpu, rc;
4997
4998	txq = netdev_core_pick_tx(dev, skb, NULL);
4999	cpu = smp_processor_id();
5000	HARD_TX_LOCK(dev, txq, cpu);
5001	if (!netif_xmit_frozen_or_drv_stopped(dev_queue: txq)) {
5002	rc = netdev_start_xmit(skb, dev, txq, more: `0`);
5003	if (dev_xmit_complete(rc))
5004	free_skb = false;
5005	}
5006	HARD_TX_UNLOCK(dev, txq);
5007	if (free_skb) {
5008	trace_xdp_exception(dev, xdp: xdp_prog, act: XDP_TX);
5009	dev_core_stats_tx_dropped_inc(dev);
5010	kfree_skb(skb);
5011	}
5012	}
5013
5014	static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
5015
5016	int do_xdp_generic(struct bpf_prog xdp_prog, struct* sk_buff **pskb)
5017	{
5018	if (xdp_prog) {
5019	struct xdp_buff xdp;
5020	u32 act;
5021	int err;
5022
5023	act = netif_receive_generic_xdp(pskb, xdp: &xdp, xdp_prog);
5024	if (act != XDP_PASS) {
5025	switch (act) {
5026	case XDP_REDIRECT:
5027	err = xdp_do_generic_redirect(dev: (pskb)->dev, skb: pskb,
5028	xdp: &xdp, prog: xdp_prog);
5029	if (err)
5030	goto out_redir;
5031	break;
5032	case XDP_TX:
5033	generic_xdp_tx(skb: *pskb, xdp_prog);
5034	break;
5035	}
5036	return XDP_DROP;
5037	}
5038	}
5039	return XDP_PASS;
5040	out_redir:
5041	kfree_skb_reason(skb: *pskb, reason: SKB_DROP_REASON_XDP);
5042	return XDP_DROP;
5043	}
5044	EXPORT_SYMBOL_GPL(do_xdp_generic);
5045
5046	static int netif_rx_internal(struct sk_buff *skb)
5047	{
5048	int ret;
5049
5050	net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb);
5051
5052	trace_netif_rx(skb);
5053
5054	#ifdef CONFIG_RPS
5055	if (static_branch_unlikely(&rps_needed)) {
5056	struct rps_dev_flow voidflow, *rflow = &voidflow;
5057	int cpu;
5058
5059	rcu_read_lock();
5060
5061	cpu = get_rps_cpu(dev: skb->dev, skb, rflowp: &rflow);
5062	if (cpu < `0`)
5063	cpu = smp_processor_id();
5064
5065	ret = enqueue_to_backlog(skb, cpu, qtail: &rflow->last_qtail);
5066
5067	rcu_read_unlock();
5068	} else
5069	#endif
5070	{
5071	unsigned int qtail;
5072
5073	ret = enqueue_to_backlog(skb, smp_processor_id(), qtail: &qtail);
5074	}
5075	return ret;
5076	}
5077
5078	/**
5079	* __netif_rx - Slightly optimized version of netif_rx
5080	* @skb: buffer to post
5081	*
5082	* This behaves as netif_rx except that it does not disable bottom halves.
5083	* As a result this function may only be invoked from the interrupt context
5084	* (either hard or soft interrupt).
5085	*/
5086	int __netif_rx(struct sk_buff *skb)
5087	{
5088	int ret;
5089
5090	lockdep_assert_once(hardirq_count() \| softirq_count());
5091
5092	trace_netif_rx_entry(skb);
5093	ret = netif_rx_internal(skb);
5094	trace_netif_rx_exit(ret);
5095	return ret;
5096	}
5097	EXPORT_SYMBOL(__netif_rx);
5098
5099	/**
5100	* netif_rx - post buffer to the network code
5101	* @skb: buffer to post
5102	*
5103	* This function receives a packet from a device driver and queues it for
5104	* the upper (protocol) levels to process via the backlog NAPI device. It
5105	* always succeeds. The buffer may be dropped during processing for
5106	* congestion control or by the protocol layers.
5107	* The network buffer is passed via the backlog NAPI device. Modern NIC
5108	* driver should use NAPI and GRO.
5109	* This function can used from interrupt and from process context. The
5110	* caller from process context must not disable interrupts before invoking
5111	* this function.
5112	*
5113	* return values:
5114	* NET_RX_SUCCESS (no congestion)
5115	* NET_RX_DROP (packet was dropped)
5116	*
5117	*/
5118	int netif_rx(struct sk_buff *skb)
5119	{
5120	bool need_bh_off = !(hardirq_count() \| softirq_count());
5121	int ret;
5122
5123	if (need_bh_off)
5124	local_bh_disable();
5125	trace_netif_rx_entry(skb);
5126	ret = netif_rx_internal(skb);
5127	trace_netif_rx_exit(ret);
5128	if (need_bh_off)
5129	local_bh_enable();
5130	return ret;
5131	}
5132	EXPORT_SYMBOL(netif_rx);
5133
5134	static __latent_entropy void net_tx_action(struct softirq_action *h)
5135	{
5136	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
5137
5138	if (sd->completion_queue) {
5139	struct sk_buff *clist;
5140
5141	local_irq_disable();
5142	clist = sd->completion_queue;
5143	sd->completion_queue = NULL;
5144	local_irq_enable();
5145
5146	while (clist) {
5147	struct sk_buff *skb = clist;
5148
5149	clist = clist->next;
5150
5151	WARN_ON(refcount_read(&skb->users));
5152	if (likely(get_kfree_skb_cb(skb)->reason == SKB_CONSUMED))
5153	trace_consume_skb(skb, location: net_tx_action);
5154	else
5155	trace_kfree_skb(skb, location: net_tx_action,
5156	reason: get_kfree_skb_cb(skb)->reason);
5157
5158	if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
5159	__kfree_skb(skb);
5160	else
5161	__napi_kfree_skb(skb,
5162	reason: get_kfree_skb_cb(skb)->reason);
5163	}
5164	}
5165
5166	if (sd->output_queue) {
5167	struct Qdisc *head;
5168
5169	local_irq_disable();
5170	head = sd->output_queue;
5171	sd->output_queue = NULL;
5172	sd->output_queue_tailp = &sd->output_queue;
5173	local_irq_enable();
5174
5175	rcu_read_lock();
5176
5177	while (head) {
5178	struct Qdisc *q = head;
5179	spinlock_t *root_lock = NULL;
5180
5181	head = head->next_sched;
5182
5183	/ We need to make sure head->next_sched is read*
5184	* before clearing __QDISC_STATE_SCHED
5185	*/
5186	smp_mb__before_atomic();
5187
5188	if (!(q->flags & TCQ_F_NOLOCK)) {
5189	root_lock = qdisc_lock(qdisc: q);
5190	spin_lock(lock: root_lock);
5191	} else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
5192	&q->state))) {
5193	/ There is a synchronize_net() between*
5194	* STATE_DEACTIVATED flag being set and
5195	* qdisc_reset()/some_qdisc_is_busy() in
5196	* dev_deactivate(), so we can safely bail out
5197	* early here to avoid data race between
5198	* qdisc_deactivate() and some_qdisc_is_busy()
5199	* for lockless qdisc.
5200	*/
5201	clear_bit(nr: __QDISC_STATE_SCHED, addr: &q->state);
5202	continue;
5203	}
5204
5205	clear_bit(nr: __QDISC_STATE_SCHED, addr: &q->state);
5206	qdisc_run(q);
5207	if (root_lock)
5208	spin_unlock(lock: root_lock);
5209	}
5210
5211	rcu_read_unlock();
5212	}
5213
5214	xfrm_dev_backlog(sd);
5215	}
5216
5217	#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
5218	/ This hook is defined here for ATM LANE /
5219	int (br_fdb_test_addr_hook)(struct* net_device *dev,
5220	unsigned char *addr) __read_mostly;
5221	EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
5222	#endif
5223
5224	/**
5225	* netdev_is_rx_handler_busy - check if receive handler is registered
5226	* @dev: device to check
5227	*
5228	* Check if a receive handler is already registered for a given device.
5229	* Return true if there one.
5230	*
5231	* The caller must hold the rtnl_mutex.
5232	*/
5233	bool netdev_is_rx_handler_busy(struct net_device *dev)
5234	{
5235	ASSERT_RTNL();
5236	return dev && rtnl_dereference(dev->rx_handler);
5237	}
5238	EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
5239
5240	/**
5241	* netdev_rx_handler_register - register receive handler
5242	* @dev: device to register a handler for
5243	* @rx_handler: receive handler to register
5244	* @rx_handler_data: data pointer that is used by rx handler
5245	*
5246	* Register a receive handler for a device. This handler will then be
5247	* called from __netif_receive_skb. A negative errno code is returned
5248	* on a failure.
5249	*
5250	* The caller must hold the rtnl_mutex.
5251	*
5252	* For a general description of rx_handler, see enum rx_handler_result.
5253	*/
5254	int netdev_rx_handler_register(struct net_device *dev,
5255	rx_handler_func_t *rx_handler,
5256	void *rx_handler_data)
5257	{
5258	if (netdev_is_rx_handler_busy(dev))
5259	return -EBUSY;
5260
5261	if (dev->priv_flags & IFF_NO_RX_HANDLER)
5262	return -EINVAL;
5263
5264	/ Note: rx_handler_data must be set before rx_handler /
5265	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
5266	rcu_assign_pointer(dev->rx_handler, rx_handler);
5267
5268	return `0`;
5269	}
5270	EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
5271
5272	/**
5273	* netdev_rx_handler_unregister - unregister receive handler
5274	* @dev: device to unregister a handler from
5275	*
5276	* Unregister a receive handler from a device.
5277	*
5278	* The caller must hold the rtnl_mutex.
5279	*/
5280	void netdev_rx_handler_unregister(struct net_device *dev)
5281	{
5282
5283	ASSERT_RTNL();
5284	RCU_INIT_POINTER(dev->rx_handler, NULL);
5285	/ a reader seeing a non NULL rx_handler in a rcu_read_lock()*
5286	* section has a guarantee to see a non NULL rx_handler_data
5287	* as well.
5288	*/
5289	synchronize_net();
5290	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
5291	}
5292	EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
5293
5294	/*
5295	* Limit the use of PFMEMALLOC reserves to those protocols that implement
5296	* the special handling of PFMEMALLOC skbs.
5297	*/
5298	static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
5299	{
5300	switch (skb->protocol) {
5301	case htons(ETH_P_ARP):
5302	case htons(ETH_P_IP):
5303	case htons(ETH_P_IPV6):
5304	case htons(ETH_P_8021Q):
5305	case htons(ETH_P_8021AD):
5306	return true;
5307	default:
5308	return false;
5309	}
5310	}
5311
5312	static inline int nf_ingress(struct sk_buff skb, struct* packet_type **pt_prev,
5313	int ret, struct* net_device *orig_dev)
5314	{
5315	if (nf_hook_ingress_active(skb)) {
5316	int ingress_retval;
5317
5318	if (*pt_prev) {
5319	ret = deliver_skb(skb, pt_prev: pt_prev, orig_dev);
5320	*pt_prev = NULL;
5321	}
5322
5323	rcu_read_lock();
5324	ingress_retval = nf_hook_ingress(skb);
5325	rcu_read_unlock();
5326	return ingress_retval;
5327	}
5328	return `0`;
5329	}
5330
5331	static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
5332	struct packet_type **ppt_prev)
5333	{
5334	struct packet_type ptype, pt_prev;
5335	rx_handler_func_t *rx_handler;
5336	struct sk_buff skb = pskb;
5337	struct net_device *orig_dev;
5338	bool deliver_exact = false;
5339	int ret = NET_RX_DROP;
5340	__be16 type;
5341
5342	net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb);
5343
5344	trace_netif_receive_skb(skb);
5345
5346	orig_dev = skb->dev;
5347
5348	skb_reset_network_header(skb);
5349	if (!skb_transport_header_was_set(skb))
5350	skb_reset_transport_header(skb);
5351	skb_reset_mac_len(skb);
5352
5353	pt_prev = NULL;
5354
5355	another_round:
5356	skb->skb_iif = skb->dev->ifindex;
5357
5358	__this_cpu_inc(softnet_data.processed);
5359
5360	if (static_branch_unlikely(&generic_xdp_needed_key)) {
5361	int ret2;
5362
5363	migrate_disable();
5364	ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
5365	&skb);
5366	migrate_enable();
5367
5368	if (ret2 != XDP_PASS) {
5369	ret = NET_RX_DROP;
5370	goto out;
5371	}
5372	}
5373
5374	if (eth_type_vlan(ethertype: skb->protocol)) {
5375	skb = skb_vlan_untag(skb);
5376	if (unlikely(!skb))
5377	goto out;
5378	}
5379
5380	if (skb_skip_tc_classify(skb))
5381	goto skip_classify;
5382
5383	if (pfmemalloc)
5384	goto skip_taps;
5385
5386	list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
5387	if (pt_prev)
5388	ret = deliver_skb(skb, pt_prev, orig_dev);
5389	pt_prev = ptype;
5390	}
5391
5392	list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
5393	if (pt_prev)
5394	ret = deliver_skb(skb, pt_prev, orig_dev);
5395	pt_prev = ptype;
5396	}
5397
5398	skip_taps:
5399	#ifdef CONFIG_NET_INGRESS
5400	if (static_branch_unlikely(&ingress_needed_key)) {
5401	bool another = false;
5402
5403	nf_skip_egress(skb, skip: true);
5404	skb = sch_handle_ingress(skb, pt_prev: &pt_prev, ret: &ret, orig_dev,
5405	another: &another);
5406	if (another)
5407	goto another_round;
5408	if (!skb)
5409	goto out;
5410
5411	nf_skip_egress(skb, skip: false);
5412	if (nf_ingress(skb, pt_prev: &pt_prev, ret: &ret, orig_dev) < `0`)
5413	goto out;
5414	}
5415	#endif
5416	skb_reset_redirect(skb);
5417	skip_classify:
5418	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
5419	goto drop;
5420
5421	if (skb_vlan_tag_present(skb)) {
5422	if (pt_prev) {
5423	ret = deliver_skb(skb, pt_prev, orig_dev);
5424	pt_prev = NULL;
5425	}
5426	if (vlan_do_receive(skb: &skb))
5427	goto another_round;
5428	else if (unlikely(!skb))
5429	goto out;
5430	}
5431
5432	rx_handler = rcu_dereference(skb->dev->rx_handler);
5433	if (rx_handler) {
5434	if (pt_prev) {
5435	ret = deliver_skb(skb, pt_prev, orig_dev);
5436	pt_prev = NULL;
5437	}
5438	switch (rx_handler(&skb)) {
5439	case RX_HANDLER_CONSUMED:
5440	ret = NET_RX_SUCCESS;
5441	goto out;
5442	case RX_HANDLER_ANOTHER:
5443	goto another_round;
5444	case RX_HANDLER_EXACT:
5445	deliver_exact = true;
5446	break;
5447	case RX_HANDLER_PASS:
5448	break;
5449	default:
5450	BUG();
5451	}
5452	}
5453
5454	if (unlikely(skb_vlan_tag_present(skb)) && !netdev_uses_dsa(dev: skb->dev)) {
5455	check_vlan_id:
5456	if (skb_vlan_tag_get_id(skb)) {
5457	/ Vlan id is non 0 and vlan_do_receive() above couldn't*
5458	* find vlan device.
5459	*/
5460	skb->pkt_type = PACKET_OTHERHOST;
5461	} else if (eth_type_vlan(ethertype: skb->protocol)) {
5462	/ Outer header is 802.1P with vlan 0, inner header is*
5463	* 802.1Q or 802.1AD and vlan_do_receive() above could
5464	* not find vlan dev for vlan id 0.
5465	*/
5466	__vlan_hwaccel_clear_tag(skb);
5467	skb = skb_vlan_untag(skb);
5468	if (unlikely(!skb))
5469	goto out;
5470	if (vlan_do_receive(skb: &skb))
5471	/ After stripping off 802.1P header with vlan 0*
5472	* vlan dev is found for inner header.
5473	*/
5474	goto another_round;
5475	else if (unlikely(!skb))
5476	goto out;
5477	else
5478	/ We have stripped outer 802.1P vlan 0 header.*
5479	* But could not find vlan dev.
5480	* check again for vlan id to set OTHERHOST.
5481	*/
5482	goto check_vlan_id;
5483	}
5484	/ Note: we might in the future use prio bits*
5485	* and set skb->priority like in vlan_do_receive()
5486	* For the time being, just ignore Priority Code Point
5487	*/
5488	__vlan_hwaccel_clear_tag(skb);
5489	}
5490
5491	type = skb->protocol;
5492
5493	/ deliver only exact match when indicated /
5494	if (likely(!deliver_exact)) {
5495	deliver_ptype_list_skb(skb, pt: &pt_prev, orig_dev, type,
5496	ptype_list: &ptype_base[ntohs(type) &
5497	PTYPE_HASH_MASK]);
5498	}
5499
5500	deliver_ptype_list_skb(skb, pt: &pt_prev, orig_dev, type,
5501	ptype_list: &orig_dev->ptype_specific);
5502
5503	if (unlikely(skb->dev != orig_dev)) {
5504	deliver_ptype_list_skb(skb, pt: &pt_prev, orig_dev, type,
5505	ptype_list: &skb->dev->ptype_specific);
5506	}
5507
5508	if (pt_prev) {
5509	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
5510	goto drop;
5511	*ppt_prev = pt_prev;
5512	} else {
5513	drop:
5514	if (!deliver_exact)
5515	dev_core_stats_rx_dropped_inc(dev: skb->dev);
5516	else
5517	dev_core_stats_rx_nohandler_inc(dev: skb->dev);
5518	kfree_skb_reason(skb, reason: SKB_DROP_REASON_UNHANDLED_PROTO);
5519	/ Jamal, now you will not able to escape explaining*
5520	* me how you were going to use this. :-)
5521	*/
5522	ret = NET_RX_DROP;
5523	}
5524
5525	out:
5526	/ The invariant here is that if ppt_prev is not NULL
5527	* then skb should also be non-NULL.
5528	*
5529	* Apparently *ppt_prev assignment above holds this invariant due to
5530	* skb dereferencing near it.
5531	*/
5532	*pskb = skb;
5533	return ret;
5534	}
5535
5536	static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
5537	{
5538	struct net_device *orig_dev = skb->dev;
5539	struct packet_type *pt_prev = NULL;
5540	int ret;
5541
5542	ret = __netif_receive_skb_core(pskb: &skb, pfmemalloc, ppt_prev: &pt_prev);
5543	if (pt_prev)
5544	ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
5545	skb->dev, pt_prev, orig_dev);
5546	return ret;
5547	}
5548
5549	/**
5550	* netif_receive_skb_core - special purpose version of netif_receive_skb
5551	* @skb: buffer to process
5552	*
5553	* More direct receive version of netif_receive_skb(). It should
5554	* only be used by callers that have a need to skip RPS and Generic XDP.
5555	* Caller must also take care of handling if ``(page_is_)pfmemalloc``.
5556	*
5557	* This function may only be called from softirq context and interrupts
5558	* should be enabled.
5559	*
5560	* Return values (usually ignored):
5561	* NET_RX_SUCCESS: no congestion
5562	* NET_RX_DROP: packet was dropped
5563	*/
5564	int netif_receive_skb_core(struct sk_buff *skb)
5565	{
5566	int ret;
5567
5568	rcu_read_lock();
5569	ret = __netif_receive_skb_one_core(skb, pfmemalloc: false);
5570	rcu_read_unlock();
5571
5572	return ret;
5573	}
5574	EXPORT_SYMBOL(netif_receive_skb_core);
5575
5576	static inline void __netif_receive_skb_list_ptype(struct list_head *head,
5577	struct packet_type *pt_prev,
5578	struct net_device *orig_dev)
5579	{
5580	struct sk_buff skb, next;
5581
5582	if (!pt_prev)
5583	return;
5584	if (list_empty(head))
5585	return;
5586	if (pt_prev->list_func != NULL)
5587	INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv,
5588	ip_list_rcv, head, pt_prev, orig_dev);
5589	else
5590	list_for_each_entry_safe(skb, next, head, list) {
5591	skb_list_del_init(skb);
5592	pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
5593	}
5594	}
5595
5596	static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
5597	{
5598	/ Fast-path assumptions:*
5599	* - There is no RX handler.
5600	* - Only one packet_type matches.
5601	* If either of these fails, we will end up doing some per-packet
5602	* processing in-line, then handling the 'last ptype' for the whole
5603	* sublist. This can't cause out-of-order delivery to any single ptype,
5604	* because the 'last ptype' must be constant across the sublist, and all
5605	* other ptypes are handled per-packet.
5606	*/
5607	/ Current (common) ptype of sublist /
5608	struct packet_type *pt_curr = NULL;
5609	/ Current (common) orig_dev of sublist /
5610	struct net_device *od_curr = NULL;
5611	struct list_head sublist;
5612	struct sk_buff skb, next;
5613
5614	INIT_LIST_HEAD(list: &sublist);
5615	list_for_each_entry_safe(skb, next, head, list) {
5616	struct net_device *orig_dev = skb->dev;
5617	struct packet_type *pt_prev = NULL;
5618
5619	skb_list_del_init(skb);
5620	__netif_receive_skb_core(pskb: &skb, pfmemalloc, ppt_prev: &pt_prev);
5621	if (!pt_prev)
5622	continue;
5623	if (pt_curr != pt_prev \|\| od_curr != orig_dev) {
5624	/ dispatch old sublist /
5625	__netif_receive_skb_list_ptype(head: &sublist, pt_prev: pt_curr, orig_dev: od_curr);
5626	/ start new sublist /
5627	INIT_LIST_HEAD(list: &sublist);
5628	pt_curr = pt_prev;
5629	od_curr = orig_dev;
5630	}
5631	list_add_tail(new: &skb->list, head: &sublist);
5632	}
5633
5634	/ dispatch final sublist /
5635	__netif_receive_skb_list_ptype(head: &sublist, pt_prev: pt_curr, orig_dev: od_curr);
5636	}
5637
5638	static int __netif_receive_skb(struct sk_buff *skb)
5639	{
5640	int ret;
5641
5642	if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
5643	unsigned int noreclaim_flag;
5644
5645	/*
5646	* PFMEMALLOC skbs are special, they should
5647	* - be delivered to SOCK_MEMALLOC sockets only
5648	* - stay away from userspace
5649	* - have bounded memory usage
5650	*
5651	* Use PF_MEMALLOC as this saves us from propagating the allocation
5652	* context down to all allocation sites.
5653	*/
5654	noreclaim_flag = memalloc_noreclaim_save();
5655	ret = __netif_receive_skb_one_core(skb, pfmemalloc: true);
5656	memalloc_noreclaim_restore(flags: noreclaim_flag);
5657	} else
5658	ret = __netif_receive_skb_one_core(skb, pfmemalloc: false);
5659
5660	return ret;
5661	}
5662
5663	static void __netif_receive_skb_list(struct list_head *head)
5664	{
5665	unsigned long noreclaim_flag = `0`;
5666	struct sk_buff skb, next;
5667	bool pfmemalloc = false; / Is current sublist PF_MEMALLOC? /
5668
5669	list_for_each_entry_safe(skb, next, head, list) {
5670	if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
5671	struct list_head sublist;
5672
5673	/ Handle the previous sublist /
5674	list_cut_before(list: &sublist, head, entry: &skb->list);
5675	if (!list_empty(head: &sublist))
5676	__netif_receive_skb_list_core(head: &sublist, pfmemalloc);
5677	pfmemalloc = !pfmemalloc;
5678	/ See comments in __netif_receive_skb /
5679	if (pfmemalloc)
5680	noreclaim_flag = memalloc_noreclaim_save();
5681	else
5682	memalloc_noreclaim_restore(flags: noreclaim_flag);
5683	}
5684	}
5685	/ Handle the remaining sublist /
5686	if (!list_empty(head))
5687	__netif_receive_skb_list_core(head, pfmemalloc);
5688	/ Restore pflags /
5689	if (pfmemalloc)
5690	memalloc_noreclaim_restore(flags: noreclaim_flag);
5691	}
5692
5693	static int generic_xdp_install(struct net_device dev, struct* netdev_bpf *xdp)
5694	{
5695	struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
5696	struct bpf_prog *new = xdp->prog;
5697	int ret = `0`;
5698
5699	switch (xdp->command) {
5700	case XDP_SETUP_PROG:
5701	rcu_assign_pointer(dev->xdp_prog, new);
5702	if (old)
5703	bpf_prog_put(prog: old);
5704
5705	if (old && !new) {
5706	static_branch_dec(&generic_xdp_needed_key);
5707	} else if (new && !old) {
5708	static_branch_inc(&generic_xdp_needed_key);
5709	dev_disable_lro(dev);
5710	dev_disable_gro_hw(dev);
5711	}
5712	break;
5713
5714	default:
5715	ret = -EINVAL;
5716	break;
5717	}
5718
5719	return ret;
5720	}
5721
5722	static int netif_receive_skb_internal(struct sk_buff *skb)
5723	{
5724	int ret;
5725
5726	net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb);
5727
5728	if (skb_defer_rx_timestamp(skb))
5729	return NET_RX_SUCCESS;
5730
5731	rcu_read_lock();
5732	#ifdef CONFIG_RPS
5733	if (static_branch_unlikely(&rps_needed)) {
5734	struct rps_dev_flow voidflow, *rflow = &voidflow;
5735	int cpu = get_rps_cpu(dev: skb->dev, skb, rflowp: &rflow);
5736
5737	if (cpu >= `0`) {
5738	ret = enqueue_to_backlog(skb, cpu, qtail: &rflow->last_qtail);
5739	rcu_read_unlock();
5740	return ret;
5741	}
5742	}
5743	#endif
5744	ret = __netif_receive_skb(skb);
5745	rcu_read_unlock();
5746	return ret;
5747	}
5748
5749	void netif_receive_skb_list_internal(struct list_head *head)
5750	{
5751	struct sk_buff skb, next;
5752	struct list_head sublist;
5753
5754	INIT_LIST_HEAD(list: &sublist);
5755	list_for_each_entry_safe(skb, next, head, list) {
5756	net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue),
5757	skb);
5758	skb_list_del_init(skb);
5759	if (!skb_defer_rx_timestamp(skb))
5760	list_add_tail(new: &skb->list, head: &sublist);
5761	}
5762	list_splice_init(list: &sublist, head);
5763
5764	rcu_read_lock();
5765	#ifdef CONFIG_RPS
5766	if (static_branch_unlikely(&rps_needed)) {
5767	list_for_each_entry_safe(skb, next, head, list) {
5768	struct rps_dev_flow voidflow, *rflow = &voidflow;
5769	int cpu = get_rps_cpu(dev: skb->dev, skb, rflowp: &rflow);
5770
5771	if (cpu >= `0`) {
5772	/ Will be handled, remove from list /
5773	skb_list_del_init(skb);
5774	enqueue_to_backlog(skb, cpu, qtail: &rflow->last_qtail);
5775	}
5776	}
5777	}
5778	#endif
5779	__netif_receive_skb_list(head);
5780	rcu_read_unlock();
5781	}
5782
5783	/**
5784	* netif_receive_skb - process receive buffer from network
5785	* @skb: buffer to process
5786	*
5787	* netif_receive_skb() is the main receive data processing function.
5788	* It always succeeds. The buffer may be dropped during processing
5789	* for congestion control or by the protocol layers.
5790	*
5791	* This function may only be called from softirq context and interrupts
5792	* should be enabled.
5793	*
5794	* Return values (usually ignored):
5795	* NET_RX_SUCCESS: no congestion
5796	* NET_RX_DROP: packet was dropped
5797	*/
5798	int netif_receive_skb(struct sk_buff *skb)
5799	{
5800	int ret;
5801
5802	trace_netif_receive_skb_entry(skb);
5803
5804	ret = netif_receive_skb_internal(skb);
5805	trace_netif_receive_skb_exit(ret);
5806
5807	return ret;
5808	}
5809	EXPORT_SYMBOL(netif_receive_skb);
5810
5811	/**
5812	* netif_receive_skb_list - process many receive buffers from network
5813	* @head: list of skbs to process.
5814	*
5815	* Since return value of netif_receive_skb() is normally ignored, and
5816	* wouldn't be meaningful for a list, this function returns void.
5817	*
5818	* This function may only be called from softirq context and interrupts
5819	* should be enabled.
5820	*/
5821	void netif_receive_skb_list(struct list_head *head)
5822	{
5823	struct sk_buff *skb;
5824
5825	if (list_empty(head))
5826	return;
5827	if (trace_netif_receive_skb_list_entry_enabled()) {
5828	list_for_each_entry(skb, head, list)
5829	trace_netif_receive_skb_list_entry(skb);
5830	}
5831	netif_receive_skb_list_internal(head);
5832	trace_netif_receive_skb_list_exit(ret: `0`);
5833	}
5834	EXPORT_SYMBOL(netif_receive_skb_list);
5835
5836	static DEFINE_PER_CPU(struct work_struct, flush_works);
5837
5838	/ Network device is going away, flush any packets still pending /
5839	static void flush_backlog(struct work_struct *work)
5840	{
5841	struct sk_buff skb, tmp;
5842	struct softnet_data *sd;
5843
5844	local_bh_disable();
5845	sd = this_cpu_ptr(&softnet_data);
5846
5847	rps_lock_irq_disable(sd);
5848	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
5849	if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5850	__skb_unlink(skb, list: &sd->input_pkt_queue);
5851	dev_kfree_skb_irq(skb);
5852	input_queue_head_incr(sd);
5853	}
5854	}
5855	rps_unlock_irq_enable(sd);
5856
5857	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
5858	if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5859	__skb_unlink(skb, list: &sd->process_queue);
5860	kfree_skb(skb);
5861	input_queue_head_incr(sd);
5862	}
5863	}
5864	local_bh_enable();
5865	}
5866
5867	static bool flush_required(int cpu)
5868	{
5869	#if IS_ENABLED(CONFIG_RPS)
5870	struct softnet_data *sd = &per_cpu(softnet_data, cpu);
5871	bool do_flush;
5872
5873	rps_lock_irq_disable(sd);
5874
5875	/ as insertion into process_queue happens with the rps lock held,*
5876	* process_queue access may race only with dequeue
5877	*/
5878	do_flush = !skb_queue_empty(list: &sd->input_pkt_queue) \|\|
5879	!skb_queue_empty_lockless(list: &sd->process_queue);
5880	rps_unlock_irq_enable(sd);
5881
5882	return do_flush;
5883	#endif
5884	/ without RPS we can't safely check input_pkt_queue: during a*
5885	* concurrent remote skb_queue_splice() we can detect as empty both
5886	* input_pkt_queue and process_queue even if the latter could end-up
5887	* containing a lot of packets.
5888	*/
5889	return true;
5890	}
5891
5892	static void flush_all_backlogs(void)
5893	{
5894	static cpumask_t flush_cpus;
5895	unsigned int cpu;
5896
5897	/ since we are under rtnl lock protection we can use static data*
5898	* for the cpumask and avoid allocating on stack the possibly
5899	* large mask
5900	*/
5901	ASSERT_RTNL();
5902
5903	cpus_read_lock();
5904
5905	cpumask_clear(dstp: &flush_cpus);
5906	for_each_online_cpu(cpu) {
5907	if (flush_required(cpu)) {
5908	queue_work_on(cpu, wq: system_highpri_wq,
5909	per_cpu_ptr(&flush_works, cpu));
5910	cpumask_set_cpu(cpu, dstp: &flush_cpus);
5911	}
5912	}
5913
5914	/ we can have in flight packet[s] on the cpus we are not flushing,*
5915	* synchronize_net() in unregister_netdevice_many() will take care of
5916	* them
5917	*/
5918	for_each_cpu(cpu, &flush_cpus)
5919	flush_work(per_cpu_ptr(&flush_works, cpu));
5920
5921	cpus_read_unlock();
5922	}
5923
5924	static void net_rps_send_ipi(struct softnet_data *remsd)
5925	{
5926	#ifdef CONFIG_RPS
5927	while (remsd) {
5928	struct softnet_data *next = remsd->rps_ipi_next;
5929
5930	if (cpu_online(cpu: remsd->cpu))
5931	smp_call_function_single_async(cpu: remsd->cpu, csd: &remsd->csd);
5932	remsd = next;
5933	}
5934	#endif
5935	}
5936
5937	/*
5938	* net_rps_action_and_irq_enable sends any pending IPI's for rps.
5939	* Note: called with local irq disabled, but exits with local irq enabled.
5940	*/
5941	static void net_rps_action_and_irq_enable(struct softnet_data *sd)
5942	{
5943	#ifdef CONFIG_RPS
5944	struct softnet_data *remsd = sd->rps_ipi_list;
5945
5946	if (remsd) {
5947	sd->rps_ipi_list = NULL;
5948
5949	local_irq_enable();
5950
5951	/ Send pending IPI's to kick RPS processing on remote cpus. /
5952	net_rps_send_ipi(remsd);
5953	} else
5954	#endif
5955	local_irq_enable();
5956	}
5957
5958	static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
5959	{
5960	#ifdef CONFIG_RPS
5961	return sd->rps_ipi_list != NULL;
5962	#else
5963	return false;
5964	#endif
5965	}
5966
5967	static int process_backlog(struct napi_struct napi, int* quota)
5968	{
5969	struct softnet_data sd = container_of(napi, struct* softnet_data, backlog);
5970	bool again = true;
5971	int work = `0`;
5972
5973	/ Check if we have pending ipi, its better to send them now,*
5974	* not waiting net_rx_action() end.
5975	*/
5976	if (sd_has_rps_ipi_waiting(sd)) {
5977	local_irq_disable();
5978	net_rps_action_and_irq_enable(sd);
5979	}
5980
5981	napi->weight = READ_ONCE(net_hotdata.dev_rx_weight);
5982	while (again) {
5983	struct sk_buff *skb;
5984
5985	while ((skb = __skb_dequeue(list: &sd->process_queue))) {
5986	rcu_read_lock();
5987	__netif_receive_skb(skb);
5988	rcu_read_unlock();
5989	input_queue_head_incr(sd);
5990	if (++work >= quota)
5991	return work;
5992
5993	}
5994
5995	rps_lock_irq_disable(sd);
5996	if (skb_queue_empty(list: &sd->input_pkt_queue)) {
5997	/*
5998	* Inline a custom version of __napi_complete().
5999	* only current cpu owns and manipulates this napi,
6000	* and NAPI_STATE_SCHED is the only possible flag set
6001	* on backlog.
6002	* We can use a plain write instead of clear_bit(),
6003	* and we dont need an smp_mb() memory barrier.
6004	*/
6005	napi->state = `0`;
6006	again = false;
6007	} else {
6008	skb_queue_splice_tail_init(list: &sd->input_pkt_queue,
6009	head: &sd->process_queue);
6010	}
6011	rps_unlock_irq_enable(sd);
6012	}
6013
6014	return work;
6015	}
6016
6017	/**
6018	* __napi_schedule - schedule for receive
6019	* @n: entry to schedule
6020	*
6021	* The entry's receive function will be scheduled to run.
6022	* Consider using __napi_schedule_irqoff() if hard irqs are masked.
6023	*/
6024	void __napi_schedule(struct napi_struct *n)
6025	{
6026	unsigned long flags;
6027
6028	local_irq_save(flags);
6029	____napi_schedule(this_cpu_ptr(&softnet_data), napi: n);
6030	local_irq_restore(flags);
6031	}
6032	EXPORT_SYMBOL(__napi_schedule);
6033
6034	/**
6035	* napi_schedule_prep - check if napi can be scheduled
6036	* @n: napi context
6037	*
6038	* Test if NAPI routine is already running, and if not mark
6039	* it as running. This is used as a condition variable to
6040	* insure only one NAPI poll instance runs. We also make
6041	* sure there is no pending NAPI disable.
6042	*/
6043	bool napi_schedule_prep(struct napi_struct *n)
6044	{
6045	unsigned long new, val = READ_ONCE(n->state);
6046
6047	do {
6048	if (unlikely(val & NAPIF_STATE_DISABLE))
6049	return false;
6050	new = val \| NAPIF_STATE_SCHED;
6051
6052	/ Sets STATE_MISSED bit if STATE_SCHED was already set*
6053	* This was suggested by Alexander Duyck, as compiler
6054	* emits better code than :
6055	* if (val & NAPIF_STATE_SCHED)
6056	* new \|= NAPIF_STATE_MISSED;
6057	*/
6058	new \|= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
6059	NAPIF_STATE_MISSED;
6060	} while (!try_cmpxchg(&n->state, &val, new));
6061
6062	return !(val & NAPIF_STATE_SCHED);
6063	}
6064	EXPORT_SYMBOL(napi_schedule_prep);
6065
6066	/**
6067	* __napi_schedule_irqoff - schedule for receive
6068	* @n: entry to schedule
6069	*
6070	* Variant of __napi_schedule() assuming hard irqs are masked.
6071	*
6072	* On PREEMPT_RT enabled kernels this maps to __napi_schedule()
6073	* because the interrupt disabled assumption might not be true
6074	* due to force-threaded interrupts and spinlock substitution.
6075	*/
6076	void __napi_schedule_irqoff(struct napi_struct *n)
6077	{
6078	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
6079	____napi_schedule(this_cpu_ptr(&softnet_data), napi: n);
6080	else
6081	__napi_schedule(n);
6082	}
6083	EXPORT_SYMBOL(__napi_schedule_irqoff);
6084
6085	bool napi_complete_done(struct napi_struct n, int* work_done)
6086	{
6087	unsigned long flags, val, new, timeout = `0`;
6088	bool ret = true;
6089
6090	/*
6091	* 1) Don't let napi dequeue from the cpu poll list
6092	* just in case its running on a different cpu.
6093	* 2) If we are busy polling, do nothing here, we have
6094	* the guarantee we will be called later.
6095	*/
6096	if (unlikely(n->state & (NAPIF_STATE_NPSVC \|
6097	NAPIF_STATE_IN_BUSY_POLL)))
6098	return false;
6099
6100	if (work_done) {
6101	if (n->gro_bitmask)
6102	timeout = READ_ONCE(n->dev->gro_flush_timeout);
6103	n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
6104	}
6105	if (n->defer_hard_irqs_count > `0`) {
6106	n->defer_hard_irqs_count--;
6107	timeout = READ_ONCE(n->dev->gro_flush_timeout);
6108	if (timeout)
6109	ret = false;
6110	}
6111	if (n->gro_bitmask) {
6112	/ When the NAPI instance uses a timeout and keeps postponing*
6113	* it, we need to bound somehow the time packets are kept in
6114	* the GRO layer
6115	*/
6116	napi_gro_flush(napi: n, flush_old: !!timeout);
6117	}
6118
6119	gro_normal_list(napi: n);
6120
6121	if (unlikely(!list_empty(&n->poll_list))) {
6122	/ If n->poll_list is not empty, we need to mask irqs /
6123	local_irq_save(flags);
6124	list_del_init(entry: &n->poll_list);
6125	local_irq_restore(flags);
6126	}
6127	WRITE_ONCE(n->list_owner, -`1`);
6128
6129	val = READ_ONCE(n->state);
6130	do {
6131	WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
6132
6133	new = val & ~(NAPIF_STATE_MISSED \| NAPIF_STATE_SCHED \|
6134	NAPIF_STATE_SCHED_THREADED \|
6135	NAPIF_STATE_PREFER_BUSY_POLL);
6136
6137	/ If STATE_MISSED was set, leave STATE_SCHED set,*
6138	* because we will call napi->poll() one more time.
6139	* This C code was suggested by Alexander Duyck to help gcc.
6140	*/
6141	new \|= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
6142	NAPIF_STATE_SCHED;
6143	} while (!try_cmpxchg(&n->state, &val, new));
6144
6145	if (unlikely(val & NAPIF_STATE_MISSED)) {
6146	__napi_schedule(n);
6147	return false;
6148	}
6149
6150	if (timeout)
6151	hrtimer_start(timer: &n->timer, tim: ns_to_ktime(ns: timeout),
6152	mode: HRTIMER_MODE_REL_PINNED);
6153	return ret;
6154	}
6155	EXPORT_SYMBOL(napi_complete_done);
6156
6157	/ must be called under rcu_read_lock(), as we dont take a reference /
6158	struct napi_struct napi_by_id(unsigned* int napi_id)
6159	{
6160	unsigned int hash = napi_id % HASH_SIZE(napi_hash);
6161	struct napi_struct *napi;
6162
6163	hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
6164	if (napi->napi_id == napi_id)
6165	return napi;
6166
6167	return NULL;
6168	}
6169
6170	static void skb_defer_free_flush(struct softnet_data *sd)
6171	{
6172	struct sk_buff skb, next;
6173
6174	/ Paired with WRITE_ONCE() in skb_attempt_defer_free() /
6175	if (!READ_ONCE(sd->defer_list))
6176	return;
6177
6178	spin_lock(lock: &sd->defer_lock);
6179	skb = sd->defer_list;
6180	sd->defer_list = NULL;
6181	sd->defer_count = `0`;
6182	spin_unlock(lock: &sd->defer_lock);
6183
6184	while (skb != NULL) {
6185	next = skb->next;
6186	napi_consume_skb(skb, budget: `1`);
6187	skb = next;
6188	}
6189	}
6190
6191	#if defined(CONFIG_NET_RX_BUSY_POLL)
6192
6193	static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
6194	{
6195	if (!skip_schedule) {
6196	gro_normal_list(napi);
6197	__napi_schedule(napi);
6198	return;
6199	}
6200
6201	if (napi->gro_bitmask) {
6202	/ flush too old packets*
6203	* If HZ < 1000, flush all packets.
6204	*/
6205	napi_gro_flush(napi, HZ >= `1000`);
6206	}
6207
6208	gro_normal_list(napi);
6209	clear_bit(nr: NAPI_STATE_SCHED, addr: &napi->state);
6210	}
6211
6212	enum {
6213	NAPI_F_PREFER_BUSY_POLL = `1`,
6214	NAPI_F_END_ON_RESCHED = `2`,
6215	};
6216
6217	static void busy_poll_stop(struct napi_struct napi, void* *have_poll_lock,
6218	unsigned flags, u16 budget)
6219	{
6220	bool skip_schedule = false;
6221	unsigned long timeout;
6222	int rc;
6223
6224	/ Busy polling means there is a high chance device driver hard irq*
6225	* could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
6226	* set in napi_schedule_prep().
6227	* Since we are about to call napi->poll() once more, we can safely
6228	* clear NAPI_STATE_MISSED.
6229	*
6230	* Note: x86 could use a single "lock and ..." instruction
6231	* to perform these two clear_bit()
6232	*/
6233	clear_bit(nr: NAPI_STATE_MISSED, addr: &napi->state);
6234	clear_bit(nr: NAPI_STATE_IN_BUSY_POLL, addr: &napi->state);
6235
6236	local_bh_disable();
6237
6238	if (flags & NAPI_F_PREFER_BUSY_POLL) {
6239	napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
6240	timeout = READ_ONCE(napi->dev->gro_flush_timeout);
6241	if (napi->defer_hard_irqs_count && timeout) {
6242	hrtimer_start(timer: &napi->timer, tim: ns_to_ktime(ns: timeout), mode: HRTIMER_MODE_REL_PINNED);
6243	skip_schedule = true;
6244	}
6245	}
6246
6247	/ All we really want here is to re-enable device interrupts.*
6248	* Ideally, a new ndo_busy_poll_stop() could avoid another round.
6249	*/
6250	rc = napi->poll(napi, budget);
6251	/ We can't gro_normal_list() here, because napi->poll() might have*
6252	* rearmed the napi (napi_complete_done()) in which case it could
6253	* already be running on another CPU.
6254	*/
6255	trace_napi_poll(napi, work: rc, budget);
6256	netpoll_poll_unlock(have: have_poll_lock);
6257	if (rc == budget)
6258	__busy_poll_stop(napi, skip_schedule);
6259	local_bh_enable();
6260	}
6261
6262	static void __napi_busy_loop(unsigned int napi_id,
6263	bool (loop_end)(void* , unsigned* long),
6264	void loop_end_arg, unsigned* flags, u16 budget)
6265	{
6266	unsigned long start_time = loop_end ? busy_loop_current_time() : `0`;
6267	int (napi_poll)(struct* napi_struct napi, int* budget);
6268	void *have_poll_lock = NULL;
6269	struct napi_struct *napi;
6270
6271	WARN_ON_ONCE(!rcu_read_lock_held());
6272
6273	restart:
6274	napi_poll = NULL;
6275
6276	napi = napi_by_id(napi_id);
6277	if (!napi)
6278	return;
6279
6280	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
6281	preempt_disable();
6282	for (;;) {
6283	int work = `0`;
6284
6285	local_bh_disable();
6286	if (!napi_poll) {
6287	unsigned long val = READ_ONCE(napi->state);
6288
6289	/ If multiple threads are competing for this napi,*
6290	* we avoid dirtying napi->state as much as we can.
6291	*/
6292	if (val & (NAPIF_STATE_DISABLE \| NAPIF_STATE_SCHED \|
6293	NAPIF_STATE_IN_BUSY_POLL)) {
6294	if (flags & NAPI_F_PREFER_BUSY_POLL)
6295	set_bit(nr: NAPI_STATE_PREFER_BUSY_POLL, addr: &napi->state);
6296	goto count;
6297	}
6298	if (cmpxchg(&napi->state, val,
6299	val \| NAPIF_STATE_IN_BUSY_POLL \|
6300	NAPIF_STATE_SCHED) != val) {
6301	if (flags & NAPI_F_PREFER_BUSY_POLL)
6302	set_bit(nr: NAPI_STATE_PREFER_BUSY_POLL, addr: &napi->state);
6303	goto count;
6304	}
6305	have_poll_lock = netpoll_poll_lock(napi);
6306	napi_poll = napi->poll;
6307	}
6308	work = napi_poll(napi, budget);
6309	trace_napi_poll(napi, work, budget);
6310	gro_normal_list(napi);
6311	count:
6312	if (work > `0`)
6313	__NET_ADD_STATS(dev_net(napi->dev),
6314	LINUX_MIB_BUSYPOLLRXPACKETS, work);
6315	skb_defer_free_flush(this_cpu_ptr(&softnet_data));
6316	local_bh_enable();
6317
6318	if (!loop_end \|\| loop_end(loop_end_arg, start_time))
6319	break;
6320
6321	if (unlikely(need_resched())) {
6322	if (flags & NAPI_F_END_ON_RESCHED)
6323	break;
6324	if (napi_poll)
6325	busy_poll_stop(napi, have_poll_lock, flags, budget);
6326	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
6327	preempt_enable();
6328	rcu_read_unlock();
6329	cond_resched();
6330	rcu_read_lock();
6331	if (loop_end(loop_end_arg, start_time))
6332	return;
6333	goto restart;
6334	}
6335	cpu_relax();
6336	}
6337	if (napi_poll)
6338	busy_poll_stop(napi, have_poll_lock, flags, budget);
6339	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
6340	preempt_enable();
6341	}
6342
6343	void napi_busy_loop_rcu(unsigned int napi_id,
6344	bool (loop_end)(void* , unsigned* long),
6345	void *loop_end_arg, bool prefer_busy_poll, u16 budget)
6346	{
6347	unsigned flags = NAPI_F_END_ON_RESCHED;
6348
6349	if (prefer_busy_poll)
6350	flags \|= NAPI_F_PREFER_BUSY_POLL;
6351
6352	__napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
6353	}
6354
6355	void napi_busy_loop(unsigned int napi_id,
6356	bool (loop_end)(void* , unsigned* long),
6357	void *loop_end_arg, bool prefer_busy_poll, u16 budget)
6358	{
6359	unsigned flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : `0`;
6360
6361	rcu_read_lock();
6362	__napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
6363	rcu_read_unlock();
6364	}
6365	EXPORT_SYMBOL(napi_busy_loop);
6366
6367	#endif /* CONFIG_NET_RX_BUSY_POLL */
6368
6369	static void napi_hash_add(struct napi_struct *napi)
6370	{
6371	if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
6372	return;
6373
6374	spin_lock(lock: &napi_hash_lock);
6375
6376	/ 0..NR_CPUS range is reserved for sender_cpu use /
6377	do {
6378	if (unlikely(++napi_gen_id < MIN_NAPI_ID))
6379	napi_gen_id = MIN_NAPI_ID;
6380	} while (napi_by_id(napi_id: napi_gen_id));
6381	napi->napi_id = napi_gen_id;
6382
6383	hlist_add_head_rcu(n: &napi->napi_hash_node,
6384	h: &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
6385
6386	spin_unlock(lock: &napi_hash_lock);
6387	}
6388
6389	/ Warning : caller is responsible to make sure rcu grace period*
6390	* is respected before freeing memory containing @napi
6391	*/
6392	static void napi_hash_del(struct napi_struct *napi)
6393	{
6394	spin_lock(lock: &napi_hash_lock);
6395
6396	hlist_del_init_rcu(n: &napi->napi_hash_node);
6397
6398	spin_unlock(lock: &napi_hash_lock);
6399	}
6400
6401	static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
6402	{
6403	struct napi_struct *napi;
6404
6405	napi = container_of(timer, struct napi_struct, timer);
6406
6407	/ Note : we use a relaxed variant of napi_schedule_prep() not setting*
6408	* NAPI_STATE_MISSED, since we do not react to a device IRQ.
6409	*/
6410	if (!napi_disable_pending(n: napi) &&
6411	!test_and_set_bit(nr: NAPI_STATE_SCHED, addr: &napi->state)) {
6412	clear_bit(nr: NAPI_STATE_PREFER_BUSY_POLL, addr: &napi->state);
6413	__napi_schedule_irqoff(napi);
6414	}
6415
6416	return HRTIMER_NORESTART;
6417	}
6418
6419	static void init_gro_hash(struct napi_struct *napi)
6420	{
6421	int i;
6422
6423	for (i = `0`; i < GRO_HASH_BUCKETS; i++) {
6424	INIT_LIST_HEAD(list: &napi->gro_hash[i].list);
6425	napi->gro_hash[i].count = `0`;
6426	}
6427	napi->gro_bitmask = `0`;
6428	}
6429
6430	int dev_set_threaded(struct net_device *dev, bool threaded)
6431	{
6432	struct napi_struct *napi;
6433	int err = `0`;
6434
6435	if (dev->threaded == threaded)
6436	return `0`;
6437
6438	if (threaded) {
6439	list_for_each_entry(napi, &dev->napi_list, dev_list) {
6440	if (!napi->thread) {
6441	err = napi_kthread_create(n: napi);
6442	if (err) {
6443	threaded = false;
6444	break;
6445	}
6446	}
6447	}
6448	}
6449
6450	dev->threaded = threaded;
6451
6452	/ Make sure kthread is created before THREADED bit*
6453	* is set.
6454	*/
6455	smp_mb__before_atomic();
6456
6457	/ Setting/unsetting threaded mode on a napi might not immediately*
6458	* take effect, if the current napi instance is actively being
6459	* polled. In this case, the switch between threaded mode and
6460	* softirq mode will happen in the next round of napi_schedule().
6461	* This should not cause hiccups/stalls to the live traffic.
6462	*/
6463	list_for_each_entry(napi, &dev->napi_list, dev_list)
6464	assign_bit(nr: NAPI_STATE_THREADED, addr: &napi->state, value: threaded);
6465
6466	return err;
6467	}
6468	EXPORT_SYMBOL(dev_set_threaded);
6469
6470	/**
6471	* netif_queue_set_napi - Associate queue with the napi
6472	* @dev: device to which NAPI and queue belong
6473	* @queue_index: Index of queue
6474	* @type: queue type as RX or TX
6475	* @napi: NAPI context, pass NULL to clear previously set NAPI
6476	*
6477	* Set queue with its corresponding napi context. This should be done after
6478	* registering the NAPI handler for the queue-vector and the queues have been
6479	* mapped to the corresponding interrupt vector.
6480	*/
6481	void netif_queue_set_napi(struct net_device dev, unsigned* int queue_index,
6482	enum netdev_queue_type type, struct napi_struct *napi)
6483	{
6484	struct netdev_rx_queue *rxq;
6485	struct netdev_queue *txq;
6486
6487	if (WARN_ON_ONCE(napi && !napi->dev))
6488	return;
6489	if (dev->reg_state >= NETREG_REGISTERED)
6490	ASSERT_RTNL();
6491
6492	switch (type) {
6493	case NETDEV_QUEUE_TYPE_RX:
6494	rxq = __netif_get_rx_queue(dev, rxq: queue_index);
6495	rxq->napi = napi;
6496	return;
6497	case NETDEV_QUEUE_TYPE_TX:
6498	txq = netdev_get_tx_queue(dev, index: queue_index);
6499	txq->napi = napi;
6500	return;
6501	default:
6502	return;
6503	}
6504	}
6505	EXPORT_SYMBOL(netif_queue_set_napi);
6506
6507	void netif_napi_add_weight(struct net_device dev, struct* napi_struct *napi,
6508	int (poll)(struct* napi_struct , int), int* weight)
6509	{
6510	if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
6511	return;
6512
6513	INIT_LIST_HEAD(list: &napi->poll_list);
6514	INIT_HLIST_NODE(h: &napi->napi_hash_node);
6515	hrtimer_init(timer: &napi->timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL_PINNED);
6516	napi->timer.function = napi_watchdog;
6517	init_gro_hash(napi);
6518	napi->skb = NULL;
6519	INIT_LIST_HEAD(list: &napi->rx_list);
6520	napi->rx_count = `0`;
6521	napi->poll = poll;
6522	if (weight > NAPI_POLL_WEIGHT)
6523	netdev_err_once(dev, "%s() called with weight %d\n", __func__,
6524	weight);
6525	napi->weight = weight;
6526	napi->dev = dev;
6527	#ifdef CONFIG_NETPOLL
6528	napi->poll_owner = -`1`;
6529	#endif
6530	napi->list_owner = -`1`;
6531	set_bit(nr: NAPI_STATE_SCHED, addr: &napi->state);
6532	set_bit(nr: NAPI_STATE_NPSVC, addr: &napi->state);
6533	list_add_rcu(new: &napi->dev_list, head: &dev->napi_list);
6534	napi_hash_add(napi);
6535	napi_get_frags_check(napi);
6536	/ Create kthread for this napi if dev->threaded is set.*
6537	* Clear dev->threaded if kthread creation failed so that
6538	* threaded mode will not be enabled in napi_enable().
6539	*/
6540	if (dev->threaded && napi_kthread_create(n: napi))
6541	dev->threaded = `0`;
6542	netif_napi_set_irq(napi, irq: -`1`);
6543	}
6544	EXPORT_SYMBOL(netif_napi_add_weight);
6545
6546	void napi_disable(struct napi_struct *n)
6547	{
6548	unsigned long val, new;
6549
6550	might_sleep();
6551	set_bit(nr: NAPI_STATE_DISABLE, addr: &n->state);
6552
6553	val = READ_ONCE(n->state);
6554	do {
6555	while (val & (NAPIF_STATE_SCHED \| NAPIF_STATE_NPSVC)) {
6556	usleep_range(min: `20`, max: `200`);
6557	val = READ_ONCE(n->state);
6558	}
6559
6560	new = val \| NAPIF_STATE_SCHED \| NAPIF_STATE_NPSVC;
6561	new &= ~(NAPIF_STATE_THREADED \| NAPIF_STATE_PREFER_BUSY_POLL);
6562	} while (!try_cmpxchg(&n->state, &val, new));
6563
6564	hrtimer_cancel(timer: &n->timer);
6565
6566	clear_bit(nr: NAPI_STATE_DISABLE, addr: &n->state);
6567	}
6568	EXPORT_SYMBOL(napi_disable);
6569
6570	/**
6571	* napi_enable - enable NAPI scheduling
6572	* @n: NAPI context
6573	*
6574	* Resume NAPI from being scheduled on this context.
6575	* Must be paired with napi_disable.
6576	*/
6577	void napi_enable(struct napi_struct *n)
6578	{
6579	unsigned long new, val = READ_ONCE(n->state);
6580
6581	do {
6582	BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
6583
6584	new = val & ~(NAPIF_STATE_SCHED \| NAPIF_STATE_NPSVC);
6585	if (n->dev->threaded && n->thread)
6586	new \|= NAPIF_STATE_THREADED;
6587	} while (!try_cmpxchg(&n->state, &val, new));
6588	}
6589	EXPORT_SYMBOL(napi_enable);
6590
6591	static void flush_gro_hash(struct napi_struct *napi)
6592	{
6593	int i;
6594
6595	for (i = `0`; i < GRO_HASH_BUCKETS; i++) {
6596	struct sk_buff skb, n;
6597
6598	list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
6599	kfree_skb(skb);
6600	napi->gro_hash[i].count = `0`;
6601	}
6602	}
6603
6604	/ Must be called in process context /
6605	void __netif_napi_del(struct napi_struct *napi)
6606	{
6607	if (!test_and_clear_bit(nr: NAPI_STATE_LISTED, addr: &napi->state))
6608	return;
6609
6610	napi_hash_del(napi);
6611	list_del_rcu(entry: &napi->dev_list);
6612	napi_free_frags(napi);
6613
6614	flush_gro_hash(napi);
6615	napi->gro_bitmask = `0`;
6616
6617	if (napi->thread) {
6618	kthread_stop(k: napi->thread);
6619	napi->thread = NULL;
6620	}
6621	}
6622	EXPORT_SYMBOL(__netif_napi_del);
6623
6624	static int __napi_poll(struct napi_struct n, bool repoll)
6625	{
6626	int work, weight;
6627
6628	weight = n->weight;
6629
6630	/ This NAPI_STATE_SCHED test is for avoiding a race*
6631	* with netpoll's poll_napi(). Only the entity which
6632	* obtains the lock and sees NAPI_STATE_SCHED set will
6633	* actually make the ->poll() call. Therefore we avoid
6634	* accidentally calling ->poll() when NAPI is not scheduled.
6635	*/
6636	work = `0`;
6637	if (napi_is_scheduled(n)) {
6638	work = n->poll(n, weight);
6639	trace_napi_poll(napi: n, work, budget: weight);
6640
6641	xdp_do_check_flushed(napi: n);
6642	}
6643
6644	if (unlikely(work > weight))
6645	netdev_err_once(n->dev, "NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
6646	n->poll, work, weight);
6647
6648	if (likely(work < weight))
6649	return work;
6650
6651	/ Drivers must not modify the NAPI state if they*
6652	* consume the entire weight. In such cases this code
6653	* still "owns" the NAPI instance and therefore can
6654	* move the instance around on the list at-will.
6655	*/
6656	if (unlikely(napi_disable_pending(n))) {
6657	napi_complete(n);
6658	return work;
6659	}
6660
6661	/ The NAPI context has more processing work, but busy-polling*
6662	* is preferred. Exit early.
6663	*/
6664	if (napi_prefer_busy_poll(n)) {
6665	if (napi_complete_done(n, work)) {
6666	/ If timeout is not set, we need to make sure*
6667	* that the NAPI is re-scheduled.
6668	*/
6669	napi_schedule(n);
6670	}
6671	return work;
6672	}
6673
6674	if (n->gro_bitmask) {
6675	/ flush too old packets*
6676	* If HZ < 1000, flush all packets.
6677	*/
6678	napi_gro_flush(napi: n, HZ >= `1000`);
6679	}
6680
6681	gro_normal_list(napi: n);
6682
6683	/ Some drivers may have called napi_schedule*
6684	* prior to exhausting their budget.
6685	*/
6686	if (unlikely(!list_empty(&n->poll_list))) {
6687	pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
6688	n->dev ? n->dev->name : "backlog");
6689	return work;
6690	}
6691
6692	*repoll = true;
6693
6694	return work;
6695	}
6696
6697	static int napi_poll(struct napi_struct n, struct* list_head *repoll)
6698	{
6699	bool do_repoll = false;
6700	void *have;
6701	int work;
6702
6703	list_del_init(entry: &n->poll_list);
6704
6705	have = netpoll_poll_lock(napi: n);
6706
6707	work = __napi_poll(n, repoll: &do_repoll);
6708
6709	if (do_repoll)
6710	list_add_tail(new: &n->poll_list, head: repoll);
6711
6712	netpoll_poll_unlock(have);
6713
6714	return work;
6715	}
6716
6717	static int napi_thread_wait(struct napi_struct *napi)
6718	{
6719	bool woken = false;
6720
6721	set_current_state(TASK_INTERRUPTIBLE);
6722
6723	while (!kthread_should_stop()) {
6724	/ Testing SCHED_THREADED bit here to make sure the current*
6725	* kthread owns this napi and could poll on this napi.
6726	* Testing SCHED bit is not enough because SCHED bit might be
6727	* set by some other busy poll thread or by napi_disable().
6728	*/
6729	if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) \|\| woken) {
6730	WARN_ON(!list_empty(&napi->poll_list));
6731	__set_current_state(TASK_RUNNING);
6732	return `0`;
6733	}
6734
6735	schedule();
6736	/ woken being true indicates this thread owns this napi. /
6737	woken = true;
6738	set_current_state(TASK_INTERRUPTIBLE);
6739	}
6740	__set_current_state(TASK_RUNNING);
6741
6742	return -`1`;
6743	}
6744
6745	static int napi_threaded_poll(void *data)
6746	{
6747	struct napi_struct *napi = data;
6748	struct softnet_data *sd;
6749	void *have;
6750
6751	while (!napi_thread_wait(napi)) {
6752	unsigned long last_qs = jiffies;
6753
6754	for (;;) {
6755	bool repoll = false;
6756
6757	local_bh_disable();
6758	sd = this_cpu_ptr(&softnet_data);
6759	sd->in_napi_threaded_poll = true;
6760
6761	have = netpoll_poll_lock(napi);
6762	__napi_poll(n: napi, repoll: &repoll);
6763	netpoll_poll_unlock(have);
6764
6765	sd->in_napi_threaded_poll = false;
6766	barrier();
6767
6768	if (sd_has_rps_ipi_waiting(sd)) {
6769	local_irq_disable();
6770	net_rps_action_and_irq_enable(sd);
6771	}
6772	skb_defer_free_flush(sd);
6773	local_bh_enable();
6774
6775	if (!repoll)
6776	break;
6777
6778	rcu_softirq_qs_periodic(last_qs);
6779	cond_resched();
6780	}
6781	}
6782	return `0`;
6783	}
6784
6785	static __latent_entropy void net_rx_action(struct softirq_action *h)
6786	{
6787	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
6788	unsigned long time_limit = jiffies +
6789	usecs_to_jiffies(READ_ONCE(net_hotdata.netdev_budget_usecs));
6790	int budget = READ_ONCE(net_hotdata.netdev_budget);
6791	LIST_HEAD(list);
6792	LIST_HEAD(repoll);
6793
6794	start:
6795	sd->in_net_rx_action = true;
6796	local_irq_disable();
6797	list_splice_init(list: &sd->poll_list, head: &list);
6798	local_irq_enable();
6799
6800	for (;;) {
6801	struct napi_struct *n;
6802
6803	skb_defer_free_flush(sd);
6804
6805	if (list_empty(head: &list)) {
6806	if (list_empty(head: &repoll)) {
6807	sd->in_net_rx_action = false;
6808	barrier();
6809	/ We need to check if ____napi_schedule()*
6810	* had refilled poll_list while
6811	* sd->in_net_rx_action was true.
6812	*/
6813	if (!list_empty(head: &sd->poll_list))
6814	goto start;
6815	if (!sd_has_rps_ipi_waiting(sd))
6816	goto end;
6817	}
6818	break;
6819	}
6820
6821	n = list_first_entry(&list, struct napi_struct, poll_list);
6822	budget -= napi_poll(n, repoll: &repoll);
6823
6824	/ If softirq window is exhausted then punt.*
6825	* Allow this to run for 2 jiffies since which will allow
6826	* an average latency of 1.5/HZ.
6827	*/
6828	if (unlikely(budget <= `0` \|\|
6829	time_after_eq(jiffies, time_limit))) {
6830	sd->time_squeeze++;
6831	break;
6832	}
6833	}
6834
6835	local_irq_disable();
6836
6837	list_splice_tail_init(list: &sd->poll_list, head: &list);
6838	list_splice_tail(list: &repoll, head: &list);
6839	list_splice(list: &list, head: &sd->poll_list);
6840	if (!list_empty(head: &sd->poll_list))
6841	__raise_softirq_irqoff(nr: NET_RX_SOFTIRQ);
6842	else
6843	sd->in_net_rx_action = false;
6844
6845	net_rps_action_and_irq_enable(sd);
6846	end:;
6847	}
6848
6849	struct netdev_adjacent {
6850	struct net_device *dev;
6851	netdevice_tracker dev_tracker;
6852
6853	/ upper master flag, there can only be one master device per list /
6854	bool master;
6855
6856	/ lookup ignore flag /
6857	bool ignore;
6858
6859	/ counter for the number of times this device was added to us /
6860	u16 ref_nr;
6861
6862	/ private field for the users /
6863	void *private;
6864
6865	struct list_head list;
6866	struct rcu_head rcu;
6867	};
6868
6869	static struct netdev_adjacent __netdev_find_adj(struct* net_device *adj_dev,
6870	struct list_head *adj_list)
6871	{
6872	struct netdev_adjacent *adj;
6873
6874	list_for_each_entry(adj, adj_list, list) {
6875	if (adj->dev == adj_dev)
6876	return adj;
6877	}
6878	return NULL;
6879	}
6880
6881	static int ____netdev_has_upper_dev(struct net_device *upper_dev,
6882	struct netdev_nested_priv *priv)
6883	{
6884	struct net_device dev = (struct* net_device *)priv->data;
6885
6886	return upper_dev == dev;
6887	}
6888
6889	/**
6890	* netdev_has_upper_dev - Check if device is linked to an upper device
6891	* @dev: device
6892	* @upper_dev: upper device to check
6893	*
6894	* Find out if a device is linked to specified upper device and return true
6895	* in case it is. Note that this checks only immediate upper device,
6896	* not through a complete stack of devices. The caller must hold the RTNL lock.
6897	*/
6898	bool netdev_has_upper_dev(struct net_device *dev,
6899	struct net_device *upper_dev)
6900	{
6901	struct netdev_nested_priv priv = {
6902	.data = (void *)upper_dev,
6903	};
6904
6905	ASSERT_RTNL();
6906
6907	return netdev_walk_all_upper_dev_rcu(dev, fn: ____netdev_has_upper_dev,
6908	priv: &priv);
6909	}
6910	EXPORT_SYMBOL(netdev_has_upper_dev);
6911
6912	/**
6913	* netdev_has_upper_dev_all_rcu - Check if device is linked to an upper device
6914	* @dev: device
6915	* @upper_dev: upper device to check
6916	*
6917	* Find out if a device is linked to specified upper device and return true
6918	* in case it is. Note that this checks the entire upper device chain.
6919	* The caller must hold rcu lock.
6920	*/
6921
6922	bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
6923	struct net_device *upper_dev)
6924	{
6925	struct netdev_nested_priv priv = {
6926	.data = (void *)upper_dev,
6927	};
6928
6929	return !!netdev_walk_all_upper_dev_rcu(dev, fn: ____netdev_has_upper_dev,
6930	priv: &priv);
6931	}
6932	EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
6933
6934	/**
6935	* netdev_has_any_upper_dev - Check if device is linked to some device
6936	* @dev: device
6937	*
6938	* Find out if a device is linked to an upper device and return true in case
6939	* it is. The caller must hold the RTNL lock.
6940	*/
6941	bool netdev_has_any_upper_dev(struct net_device *dev)
6942	{
6943	ASSERT_RTNL();
6944
6945	return !list_empty(head: &dev->adj_list.upper);
6946	}
6947	EXPORT_SYMBOL(netdev_has_any_upper_dev);
6948
6949	/**
6950	* netdev_master_upper_dev_get - Get master upper device
6951	* @dev: device
6952	*
6953	* Find a master upper device and return pointer to it or NULL in case
6954	* it's not there. The caller must hold the RTNL lock.
6955	*/
6956	struct net_device netdev_master_upper_dev_get(struct* net_device *dev)
6957	{
6958	struct netdev_adjacent *upper;
6959
6960	ASSERT_RTNL();
6961
6962	if (list_empty(head: &dev->adj_list.upper))
6963	return NULL;
6964
6965	upper = list_first_entry(&dev->adj_list.upper,
6966	struct netdev_adjacent, list);
6967	if (likely(upper->master))
6968	return upper->dev;
6969	return NULL;
6970	}
6971	EXPORT_SYMBOL(netdev_master_upper_dev_get);
6972
6973	static struct net_device __netdev_master_upper_dev_get(struct* net_device *dev)
6974	{
6975	struct netdev_adjacent *upper;
6976
6977	ASSERT_RTNL();
6978
6979	if (list_empty(head: &dev->adj_list.upper))
6980	return NULL;
6981
6982	upper = list_first_entry(&dev->adj_list.upper,
6983	struct netdev_adjacent, list);
6984	if (likely(upper->master) && !upper->ignore)
6985	return upper->dev;
6986	return NULL;
6987	}
6988
6989	/**
6990	* netdev_has_any_lower_dev - Check if device is linked to some device
6991	* @dev: device
6992	*
6993	* Find out if a device is linked to a lower device and return true in case
6994	* it is. The caller must hold the RTNL lock.
6995	*/
6996	static bool netdev_has_any_lower_dev(struct net_device *dev)
6997	{
6998	ASSERT_RTNL();
6999
7000	return !list_empty(head: &dev->adj_list.lower);
7001	}
7002
7003	void netdev_adjacent_get_private(struct* list_head *adj_list)
7004	{
7005	struct netdev_adjacent *adj;
7006
7007	adj = list_entry(adj_list, struct netdev_adjacent, list);
7008
7009	return adj->private;
7010	}
7011	EXPORT_SYMBOL(netdev_adjacent_get_private);
7012
7013	/**
7014	* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
7015	* @dev: device
7016	* @iter: list_head ** of the current position
7017	*
7018	* Gets the next device from the dev's upper list, starting from iter
7019	* position. The caller must hold RCU read lock.
7020	*/
7021	struct net_device netdev_upper_get_next_dev_rcu(struct* net_device *dev,
7022	struct list_head **iter)
7023	{
7024	struct netdev_adjacent *upper;
7025
7026	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
7027
7028	upper = list_entry_rcu((iter)->next, struct* netdev_adjacent, list);
7029
7030	if (&upper->list == &dev->adj_list.upper)
7031	return NULL;
7032
7033	*iter = &upper->list;
7034
7035	return upper->dev;
7036	}
7037	EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
7038
7039	static struct net_device __netdev_next_upper_dev(struct* net_device *dev,
7040	struct list_head **iter,
7041	bool *ignore)
7042	{
7043	struct netdev_adjacent *upper;
7044
7045	upper = list_entry((iter)->next, struct* netdev_adjacent, list);
7046
7047	if (&upper->list == &dev->adj_list.upper)
7048	return NULL;
7049
7050	*iter = &upper->list;
7051	*ignore = upper->ignore;
7052
7053	return upper->dev;
7054	}
7055
7056	static struct net_device netdev_next_upper_dev_rcu(struct* net_device *dev,
7057	struct list_head **iter)
7058	{
7059	struct netdev_adjacent *upper;
7060
7061	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
7062
7063	upper = list_entry_rcu((iter)->next, struct* netdev_adjacent, list);
7064
7065	if (&upper->list == &dev->adj_list.upper)
7066	return NULL;
7067
7068	*iter = &upper->list;
7069
7070	return upper->dev;
7071	}
7072
7073	static int __netdev_walk_all_upper_dev(struct net_device *dev,
7074	int (fn)(struct* net_device *dev,
7075	struct netdev_nested_priv *priv),
7076	struct netdev_nested_priv *priv)
7077	{
7078	struct net_device udev, next, now, dev_stack[MAX_NEST_DEV + `1`];
7079	struct list_head niter, iter, *iter_stack[MAX_NEST_DEV + `1`];
7080	int ret, cur = `0`;
7081	bool ignore;
7082
7083	now = dev;
7084	iter = &dev->adj_list.upper;
7085
7086	while (`1`) {
7087	if (now != dev) {
7088	ret = fn(now, priv);
7089	if (ret)
7090	return ret;
7091	}
7092
7093	next = NULL;
7094	while (`1`) {
7095	udev = __netdev_next_upper_dev(dev: now, iter: &iter, ignore: &ignore);
7096	if (!udev)
7097	break;
7098	if (ignore)
7099	continue;
7100
7101	next = udev;
7102	niter = &udev->adj_list.upper;
7103	dev_stack[cur] = now;
7104	iter_stack[cur++] = iter;
7105	break;
7106	}
7107
7108	if (!next) {
7109	if (!cur)
7110	return `0`;
7111	next = dev_stack[--cur];
7112	niter = iter_stack[cur];
7113	}
7114
7115	now = next;
7116	iter = niter;
7117	}
7118
7119	return `0`;
7120	}
7121
7122	int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
7123	int (fn)(struct* net_device *dev,
7124	struct netdev_nested_priv *priv),
7125	struct netdev_nested_priv *priv)
7126	{
7127	struct net_device udev, next, now, dev_stack[MAX_NEST_DEV + `1`];
7128	struct list_head niter, iter, *iter_stack[MAX_NEST_DEV + `1`];
7129	int ret, cur = `0`;
7130
7131	now = dev;
7132	iter = &dev->adj_list.upper;
7133
7134	while (`1`) {
7135	if (now != dev) {
7136	ret = fn(now, priv);
7137	if (ret)
7138	return ret;
7139	}
7140
7141	next = NULL;
7142	while (`1`) {
7143	udev = netdev_next_upper_dev_rcu(dev: now, iter: &iter);
7144	if (!udev)
7145	break;
7146
7147	next = udev;
7148	niter = &udev->adj_list.upper;
7149	dev_stack[cur] = now;
7150	iter_stack[cur++] = iter;
7151	break;
7152	}
7153
7154	if (!next) {
7155	if (!cur)
7156	return `0`;
7157	next = dev_stack[--cur];
7158	niter = iter_stack[cur];
7159	}
7160
7161	now = next;
7162	iter = niter;
7163	}
7164
7165	return `0`;
7166	}
7167	EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
7168
7169	static bool __netdev_has_upper_dev(struct net_device *dev,
7170	struct net_device *upper_dev)
7171	{
7172	struct netdev_nested_priv priv = {
7173	.flags = `0`,
7174	.data = (void *)upper_dev,
7175	};
7176
7177	ASSERT_RTNL();
7178
7179	return __netdev_walk_all_upper_dev(dev, fn: ____netdev_has_upper_dev,
7180	priv: &priv);
7181	}
7182
7183	/**
7184	* netdev_lower_get_next_private - Get the next ->private from the
7185	* lower neighbour list
7186	* @dev: device
7187	* @iter: list_head ** of the current position
7188	*
7189	* Gets the next netdev_adjacent->private from the dev's lower neighbour
7190	* list, starting from iter position. The caller must hold either hold the
7191	* RTNL lock or its own locking that guarantees that the neighbour lower
7192	* list will remain unchanged.
7193	*/
7194	void netdev_lower_get_next_private(struct* net_device *dev,
7195	struct list_head **iter)
7196	{
7197	struct netdev_adjacent *lower;
7198
7199	lower = list_entry(iter, struct* netdev_adjacent, list);
7200
7201	if (&lower->list == &dev->adj_list.lower)
7202	return NULL;
7203
7204	*iter = lower->list.next;
7205
7206	return lower->private;
7207	}
7208	EXPORT_SYMBOL(netdev_lower_get_next_private);
7209
7210	/**
7211	* netdev_lower_get_next_private_rcu - Get the next ->private from the
7212	* lower neighbour list, RCU
7213	* variant
7214	* @dev: device
7215	* @iter: list_head ** of the current position
7216	*
7217	* Gets the next netdev_adjacent->private from the dev's lower neighbour
7218	* list, starting from iter position. The caller must hold RCU read lock.
7219	*/
7220	void netdev_lower_get_next_private_rcu(struct* net_device *dev,
7221	struct list_head **iter)
7222	{
7223	struct netdev_adjacent *lower;
7224
7225	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
7226
7227	lower = list_entry_rcu((iter)->next, struct* netdev_adjacent, list);
7228
7229	if (&lower->list == &dev->adj_list.lower)
7230	return NULL;
7231
7232	*iter = &lower->list;
7233
7234	return lower->private;
7235	}
7236	EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
7237
7238	/**
7239	* netdev_lower_get_next - Get the next device from the lower neighbour
7240	* list
7241	* @dev: device
7242	* @iter: list_head ** of the current position
7243	*
7244	* Gets the next netdev_adjacent from the dev's lower neighbour
7245	* list, starting from iter position. The caller must hold RTNL lock or
7246	* its own locking that guarantees that the neighbour lower
7247	* list will remain unchanged.
7248	*/
7249	void netdev_lower_get_next(struct* net_device dev, struct* list_head **iter)
7250	{
7251	struct netdev_adjacent *lower;
7252
7253	lower = list_entry(iter, struct* netdev_adjacent, list);
7254
7255	if (&lower->list == &dev->adj_list.lower)
7256	return NULL;
7257
7258	*iter = lower->list.next;
7259
7260	return lower->dev;
7261	}
7262	EXPORT_SYMBOL(netdev_lower_get_next);
7263
7264	static struct net_device netdev_next_lower_dev(struct* net_device *dev,
7265	struct list_head **iter)
7266	{
7267	struct netdev_adjacent *lower;
7268
7269	lower = list_entry((iter)->next, struct* netdev_adjacent, list);
7270
7271	if (&lower->list == &dev->adj_list.lower)
7272	return NULL;
7273
7274	*iter = &lower->list;
7275
7276	return lower->dev;
7277	}
7278
7279	static struct net_device __netdev_next_lower_dev(struct* net_device *dev,
7280	struct list_head **iter,
7281	bool *ignore)
7282	{
7283	struct netdev_adjacent *lower;
7284
7285	lower = list_entry((iter)->next, struct* netdev_adjacent, list);
7286
7287	if (&lower->list == &dev->adj_list.lower)
7288	return NULL;
7289
7290	*iter = &lower->list;
7291	*ignore = lower->ignore;
7292
7293	return lower->dev;
7294	}
7295
7296	int netdev_walk_all_lower_dev(struct net_device *dev,
7297	int (fn)(struct* net_device *dev,
7298	struct netdev_nested_priv *priv),
7299	struct netdev_nested_priv *priv)
7300	{
7301	struct net_device ldev, next, now, dev_stack[MAX_NEST_DEV + `1`];
7302	struct list_head niter, iter, *iter_stack[MAX_NEST_DEV + `1`];
7303	int ret, cur = `0`;
7304
7305	now = dev;
7306	iter = &dev->adj_list.lower;
7307
7308	while (`1`) {
7309	if (now != dev) {
7310	ret = fn(now, priv);
7311	if (ret)
7312	return ret;
7313	}
7314
7315	next = NULL;
7316	while (`1`) {
7317	ldev = netdev_next_lower_dev(dev: now, iter: &iter);
7318	if (!ldev)
7319	break;
7320
7321	next = ldev;
7322	niter = &ldev->adj_list.lower;
7323	dev_stack[cur] = now;
7324	iter_stack[cur++] = iter;
7325	break;
7326	}
7327
7328	if (!next) {
7329	if (!cur)
7330	return `0`;
7331	next = dev_stack[--cur];
7332	niter = iter_stack[cur];
7333	}
7334
7335	now = next;
7336	iter = niter;
7337	}
7338
7339	return `0`;
7340	}
7341	EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
7342
7343	static int __netdev_walk_all_lower_dev(struct net_device *dev,
7344	int (fn)(struct* net_device *dev,
7345	struct netdev_nested_priv *priv),
7346	struct netdev_nested_priv *priv)
7347	{
7348	struct net_device ldev, next, now, dev_stack[MAX_NEST_DEV + `1`];
7349	struct list_head niter, iter, *iter_stack[MAX_NEST_DEV + `1`];
7350	int ret, cur = `0`;
7351	bool ignore;
7352
7353	now = dev;
7354	iter = &dev->adj_list.lower;
7355
7356	while (`1`) {
7357	if (now != dev) {
7358	ret = fn(now, priv);
7359	if (ret)
7360	return ret;
7361	}
7362
7363	next = NULL;
7364	while (`1`) {
7365	ldev = __netdev_next_lower_dev(dev: now, iter: &iter, ignore: &ignore);
7366	if (!ldev)
7367	break;
7368	if (ignore)
7369	continue;
7370
7371	next = ldev;
7372	niter = &ldev->adj_list.lower;
7373	dev_stack[cur] = now;
7374	iter_stack[cur++] = iter;
7375	break;
7376	}
7377
7378	if (!next) {
7379	if (!cur)
7380	return `0`;
7381	next = dev_stack[--cur];
7382	niter = iter_stack[cur];
7383	}
7384
7385	now = next;
7386	iter = niter;
7387	}
7388
7389	return `0`;
7390	}
7391
7392	struct net_device netdev_next_lower_dev_rcu(struct* net_device *dev,
7393	struct list_head **iter)
7394	{
7395	struct netdev_adjacent *lower;
7396
7397	lower = list_entry_rcu((iter)->next, struct* netdev_adjacent, list);
7398	if (&lower->list == &dev->adj_list.lower)
7399	return NULL;
7400
7401	*iter = &lower->list;
7402
7403	return lower->dev;
7404	}
7405	EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
7406
7407	static u8 __netdev_upper_depth(struct net_device *dev)
7408	{
7409	struct net_device *udev;
7410	struct list_head *iter;
7411	u8 max_depth = `0`;
7412	bool ignore;
7413
7414	for (iter = &dev->adj_list.upper,
7415	udev = __netdev_next_upper_dev(dev, iter: &iter, ignore: &ignore);
7416	udev;
7417	udev = __netdev_next_upper_dev(dev, iter: &iter, ignore: &ignore)) {
7418	if (ignore)
7419	continue;
7420	if (max_depth < udev->upper_level)
7421	max_depth = udev->upper_level;
7422	}
7423
7424	return max_depth;
7425	}
7426
7427	static u8 __netdev_lower_depth(struct net_device *dev)
7428	{
7429	struct net_device *ldev;
7430	struct list_head *iter;
7431	u8 max_depth = `0`;
7432	bool ignore;
7433
7434	for (iter = &dev->adj_list.lower,
7435	ldev = __netdev_next_lower_dev(dev, iter: &iter, ignore: &ignore);
7436	ldev;
7437	ldev = __netdev_next_lower_dev(dev, iter: &iter, ignore: &ignore)) {
7438	if (ignore)
7439	continue;
7440	if (max_depth < ldev->lower_level)
7441	max_depth = ldev->lower_level;
7442	}
7443
7444	return max_depth;
7445	}
7446
7447	static int __netdev_update_upper_level(struct net_device *dev,
7448	struct netdev_nested_priv *__unused)
7449	{
7450	dev->upper_level = __netdev_upper_depth(dev) + `1`;
7451	return `0`;
7452	}
7453
7454	#ifdef CONFIG_LOCKDEP
7455	static LIST_HEAD(net_unlink_list);
7456
7457	static void net_unlink_todo(struct net_device *dev)
7458	{
7459	if (list_empty(head: &dev->unlink_list))
7460	list_add_tail(new: &dev->unlink_list, head: &net_unlink_list);
7461	}
7462	#endif
7463
7464	static int __netdev_update_lower_level(struct net_device *dev,
7465	struct netdev_nested_priv *priv)
7466	{
7467	dev->lower_level = __netdev_lower_depth(dev) + `1`;
7468
7469	#ifdef CONFIG_LOCKDEP
7470	if (!priv)
7471	return `0`;
7472
7473	if (priv->flags & NESTED_SYNC_IMM)
7474	dev->nested_level = dev->lower_level - `1`;
7475	if (priv->flags & NESTED_SYNC_TODO)
7476	net_unlink_todo(dev);
7477	#endif
7478	return `0`;
7479	}
7480
7481	int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
7482	int (fn)(struct* net_device *dev,
7483	struct netdev_nested_priv *priv),
7484	struct netdev_nested_priv *priv)
7485	{
7486	struct net_device ldev, next, now, dev_stack[MAX_NEST_DEV + `1`];
7487	struct list_head niter, iter, *iter_stack[MAX_NEST_DEV + `1`];
7488	int ret, cur = `0`;
7489
7490	now = dev;
7491	iter = &dev->adj_list.lower;
7492
7493	while (`1`) {
7494	if (now != dev) {
7495	ret = fn(now, priv);
7496	if (ret)
7497	return ret;
7498	}
7499
7500	next = NULL;
7501	while (`1`) {
7502	ldev = netdev_next_lower_dev_rcu(now, &iter);
7503	if (!ldev)
7504	break;
7505
7506	next = ldev;
7507	niter = &ldev->adj_list.lower;
7508	dev_stack[cur] = now;
7509	iter_stack[cur++] = iter;
7510	break;
7511	}
7512
7513	if (!next) {
7514	if (!cur)
7515	return `0`;
7516	next = dev_stack[--cur];
7517	niter = iter_stack[cur];
7518	}
7519
7520	now = next;
7521	iter = niter;
7522	}
7523
7524	return `0`;
7525	}
7526	EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);
7527
7528	/**
7529	* netdev_lower_get_first_private_rcu - Get the first ->private from the
7530	* lower neighbour list, RCU
7531	* variant
7532	* @dev: device
7533	*
7534	* Gets the first netdev_adjacent->private from the dev's lower neighbour
7535	* list. The caller must hold RCU read lock.
7536	*/
7537	void netdev_lower_get_first_private_rcu(struct* net_device *dev)
7538	{
7539	struct netdev_adjacent *lower;
7540
7541	lower = list_first_or_null_rcu(&dev->adj_list.lower,
7542	struct netdev_adjacent, list);
7543	if (lower)
7544	return lower->private;
7545	return NULL;
7546	}
7547	EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
7548
7549	/**
7550	* netdev_master_upper_dev_get_rcu - Get master upper device
7551	* @dev: device
7552	*
7553	* Find a master upper device and return pointer to it or NULL in case
7554	* it's not there. The caller must hold the RCU read lock.
7555	*/
7556	struct net_device netdev_master_upper_dev_get_rcu(struct* net_device *dev)
7557	{
7558	struct netdev_adjacent *upper;
7559
7560	upper = list_first_or_null_rcu(&dev->adj_list.upper,
7561	struct netdev_adjacent, list);
7562	if (upper && likely(upper->master))
7563	return upper->dev;
7564	return NULL;
7565	}
7566	EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
7567
7568	static int netdev_adjacent_sysfs_add(struct net_device *dev,
7569	struct net_device *adj_dev,
7570	struct list_head *dev_list)
7571	{
7572	char linkname[IFNAMSIZ+`7`];
7573
7574	sprintf(buf: linkname, fmt: dev_list == &dev->adj_list.upper ?
7575	"upper_%s" : "lower_%s", adj_dev->name);
7576	return sysfs_create_link(kobj: &(dev->dev.kobj), target: &(adj_dev->dev.kobj),
7577	name: linkname);
7578	}
7579	static void netdev_adjacent_sysfs_del(struct net_device *dev,
7580	char *name,
7581	struct list_head *dev_list)
7582	{
7583	char linkname[IFNAMSIZ+`7`];
7584
7585	sprintf(buf: linkname, fmt: dev_list == &dev->adj_list.upper ?
7586	"upper_%s" : "lower_%s", name);
7587	sysfs_remove_link(kobj: &(dev->dev.kobj), name: linkname);
7588	}
7589
7590	static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
7591	struct net_device *adj_dev,
7592	struct list_head *dev_list)
7593	{
7594	return (dev_list == &dev->adj_list.upper \|\|
7595	dev_list == &dev->adj_list.lower) &&
7596	net_eq(net1: dev_net(dev), net2: dev_net(dev: adj_dev));
7597	}
7598
7599	static int __netdev_adjacent_dev_insert(struct net_device *dev,
7600	struct net_device *adj_dev,
7601	struct list_head *dev_list,
7602	void *private, bool master)
7603	{
7604	struct netdev_adjacent *adj;
7605	int ret;
7606
7607	adj = __netdev_find_adj(adj_dev, adj_list: dev_list);
7608
7609	if (adj) {
7610	adj->ref_nr += `1`;
7611	pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n",
7612	dev->name, adj_dev->name, adj->ref_nr);
7613
7614	return `0`;
7615	}
7616
7617	adj = kmalloc(size: sizeof(*adj), GFP_KERNEL);
7618	if (!adj)
7619	return -ENOMEM;
7620
7621	adj->dev = adj_dev;
7622	adj->master = master;
7623	adj->ref_nr = `1`;
7624	adj->private = private;
7625	adj->ignore = false;
7626	netdev_hold(dev: adj_dev, tracker: &adj->dev_tracker, GFP_KERNEL);
7627
7628	pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
7629	dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
7630
7631	if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
7632	ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
7633	if (ret)
7634	goto free_adj;
7635	}
7636
7637	/ Ensure that master link is always the first item in list. /
7638	if (master) {
7639	ret = sysfs_create_link(kobj: &(dev->dev.kobj),
7640	target: &(adj_dev->dev.kobj), name: "master");
7641	if (ret)
7642	goto remove_symlinks;
7643
7644	list_add_rcu(new: &adj->list, head: dev_list);
7645	} else {
7646	list_add_tail_rcu(new: &adj->list, head: dev_list);
7647	}
7648
7649	return `0`;
7650
7651	remove_symlinks:
7652	if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
7653	netdev_adjacent_sysfs_del(dev, name: adj_dev->name, dev_list);
7654	free_adj:
7655	netdev_put(dev: adj_dev, tracker: &adj->dev_tracker);
7656	kfree(objp: adj);
7657
7658	return ret;
7659	}
7660
7661	static void __netdev_adjacent_dev_remove(struct net_device *dev,
7662	struct net_device *adj_dev,
7663	u16 ref_nr,
7664	struct list_head *dev_list)
7665	{
7666	struct netdev_adjacent *adj;
7667
7668	pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n",
7669	dev->name, adj_dev->name, ref_nr);
7670
7671	adj = __netdev_find_adj(adj_dev, adj_list: dev_list);
7672
7673	if (!adj) {
7674	pr_err("Adjacency does not exist for device %s from %s\n",
7675	dev->name, adj_dev->name);
7676	WARN_ON(`1`);
7677	return;
7678	}
7679
7680	if (adj->ref_nr > ref_nr) {
7681	pr_debug("adjacency: %s to %s ref_nr - %d = %d\n",
7682	dev->name, adj_dev->name, ref_nr,
7683	adj->ref_nr - ref_nr);
7684	adj->ref_nr -= ref_nr;
7685	return;
7686	}
7687
7688	if (adj->master)
7689	sysfs_remove_link(kobj: &(dev->dev.kobj), name: "master");
7690
7691	if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
7692	netdev_adjacent_sysfs_del(dev, name: adj_dev->name, dev_list);
7693
7694	list_del_rcu(entry: &adj->list);
7695	pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
7696	adj_dev->name, dev->name, adj_dev->name);
7697	netdev_put(dev: adj_dev, tracker: &adj->dev_tracker);
7698	kfree_rcu(adj, rcu);
7699	}
7700
7701	static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
7702	struct net_device *upper_dev,
7703	struct list_head *up_list,
7704	struct list_head *down_list,
7705	void *private, bool master)
7706	{
7707	int ret;
7708
7709	ret = __netdev_adjacent_dev_insert(dev, adj_dev: upper_dev, dev_list: up_list,
7710	private, master);
7711	if (ret)
7712	return ret;
7713
7714	ret = __netdev_adjacent_dev_insert(dev: upper_dev, adj_dev: dev, dev_list: down_list,
7715	private, master: false);
7716	if (ret) {
7717	__netdev_adjacent_dev_remove(dev, adj_dev: upper_dev, ref_nr: `1`, dev_list: up_list);
7718	return ret;
7719	}
7720
7721	return `0`;
7722	}
7723
7724	static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
7725	struct net_device *upper_dev,
7726	u16 ref_nr,
7727	struct list_head *up_list,
7728	struct list_head *down_list)
7729	{
7730	__netdev_adjacent_dev_remove(dev, adj_dev: upper_dev, ref_nr, dev_list: up_list);
7731	__netdev_adjacent_dev_remove(dev: upper_dev, adj_dev: dev, ref_nr, dev_list: down_list);
7732	}
7733
7734	static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
7735	struct net_device *upper_dev,
7736	void *private, bool master)
7737	{
7738	return __netdev_adjacent_dev_link_lists(dev, upper_dev,
7739	up_list: &dev->adj_list.upper,
7740	down_list: &upper_dev->adj_list.lower,
7741	private, master);
7742	}
7743
7744	static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
7745	struct net_device *upper_dev)
7746	{
7747	__netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr: `1`,
7748	up_list: &dev->adj_list.upper,
7749	down_list: &upper_dev->adj_list.lower);
7750	}
7751
7752	static int __netdev_upper_dev_link(struct net_device *dev,
7753	struct net_device *upper_dev, bool master,
7754	void upper_priv, void* *upper_info,
7755	struct netdev_nested_priv *priv,
7756	struct netlink_ext_ack *extack)
7757	{
7758	struct netdev_notifier_changeupper_info changeupper_info = {
7759	.info = {
7760	.dev = dev,
7761	.extack = extack,
7762	},
7763	.upper_dev = upper_dev,
7764	.master = master,
7765	.linking = true,
7766	.upper_info = upper_info,
7767	};
7768	struct net_device *master_dev;
7769	int ret = `0`;
7770
7771	ASSERT_RTNL();
7772
7773	if (dev == upper_dev)
7774	return -EBUSY;
7775
7776	/ To prevent loops, check if dev is not upper device to upper_dev. /
7777	if (__netdev_has_upper_dev(dev: upper_dev, upper_dev: dev))
7778	return -EBUSY;
7779
7780	if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
7781	return -EMLINK;
7782
7783	if (!master) {
7784	if (__netdev_has_upper_dev(dev, upper_dev))
7785	return -EEXIST;
7786	} else {
7787	master_dev = __netdev_master_upper_dev_get(dev);
7788	if (master_dev)
7789	return master_dev == upper_dev ? -EEXIST : -EBUSY;
7790	}
7791
7792	ret = call_netdevice_notifiers_info(val: NETDEV_PRECHANGEUPPER,
7793	info: &changeupper_info.info);
7794	ret = notifier_to_errno(ret);
7795	if (ret)
7796	return ret;
7797
7798	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private: upper_priv,
7799	master);
7800	if (ret)
7801	return ret;
7802
7803	ret = call_netdevice_notifiers_info(val: NETDEV_CHANGEUPPER,
7804	info: &changeupper_info.info);
7805	ret = notifier_to_errno(ret);
7806	if (ret)
7807	goto rollback;
7808
7809	__netdev_update_upper_level(dev, NULL);
7810	__netdev_walk_all_lower_dev(dev, fn: __netdev_update_upper_level, NULL);
7811
7812	__netdev_update_lower_level(dev: upper_dev, priv);
7813	__netdev_walk_all_upper_dev(dev: upper_dev, fn: __netdev_update_lower_level,
7814	priv);
7815
7816	return `0`;
7817
7818	rollback:
7819	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
7820
7821	return ret;
7822	}
7823
7824	/**
7825	* netdev_upper_dev_link - Add a link to the upper device
7826	* @dev: device
7827	* @upper_dev: new upper device
7828	* @extack: netlink extended ack
7829	*
7830	* Adds a link to device which is upper to this one. The caller must hold
7831	* the RTNL lock. On a failure a negative errno code is returned.
7832	* On success the reference counts are adjusted and the function
7833	* returns zero.
7834	*/
7835	int netdev_upper_dev_link(struct net_device *dev,
7836	struct net_device *upper_dev,
7837	struct netlink_ext_ack *extack)
7838	{
7839	struct netdev_nested_priv priv = {
7840	.flags = NESTED_SYNC_IMM \| NESTED_SYNC_TODO,
7841	.data = NULL,
7842	};
7843
7844	return __netdev_upper_dev_link(dev, upper_dev, master: false,
7845	NULL, NULL, priv: &priv, extack);
7846	}
7847	EXPORT_SYMBOL(netdev_upper_dev_link);
7848
7849	/**
7850	* netdev_master_upper_dev_link - Add a master link to the upper device
7851	* @dev: device
7852	* @upper_dev: new upper device
7853	* @upper_priv: upper device private
7854	* @upper_info: upper info to be passed down via notifier
7855	* @extack: netlink extended ack
7856	*
7857	* Adds a link to device which is upper to this one. In this case, only
7858	* one master upper device can be linked, although other non-master devices
7859	* might be linked as well. The caller must hold the RTNL lock.
7860	* On a failure a negative errno code is returned. On success the reference
7861	* counts are adjusted and the function returns zero.
7862	*/
7863	int netdev_master_upper_dev_link(struct net_device *dev,
7864	struct net_device *upper_dev,
7865	void upper_priv, void* *upper_info,
7866	struct netlink_ext_ack *extack)
7867	{
7868	struct netdev_nested_priv priv = {
7869	.flags = NESTED_SYNC_IMM \| NESTED_SYNC_TODO,
7870	.data = NULL,
7871	};
7872
7873	return __netdev_upper_dev_link(dev, upper_dev, master: true,
7874	upper_priv, upper_info, priv: &priv, extack);
7875	}
7876	EXPORT_SYMBOL(netdev_master_upper_dev_link);
7877
7878	static void __netdev_upper_dev_unlink(struct net_device *dev,
7879	struct net_device *upper_dev,
7880	struct netdev_nested_priv *priv)
7881	{
7882	struct netdev_notifier_changeupper_info changeupper_info = {
7883	.info = {
7884	.dev = dev,
7885	},
7886	.upper_dev = upper_dev,
7887	.linking = false,
7888	};
7889
7890	ASSERT_RTNL();
7891
7892	changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
7893
7894	call_netdevice_notifiers_info(val: NETDEV_PRECHANGEUPPER,
7895	info: &changeupper_info.info);
7896
7897	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
7898
7899	call_netdevice_notifiers_info(val: NETDEV_CHANGEUPPER,
7900	info: &changeupper_info.info);
7901
7902	__netdev_update_upper_level(dev, NULL);
7903	__netdev_walk_all_lower_dev(dev, fn: __netdev_update_upper_level, NULL);
7904
7905	__netdev_update_lower_level(dev: upper_dev, priv);
7906	__netdev_walk_all_upper_dev(dev: upper_dev, fn: __netdev_update_lower_level,
7907	priv);
7908	}
7909
7910	/**
7911	* netdev_upper_dev_unlink - Removes a link to upper device
7912	* @dev: device
7913	* @upper_dev: new upper device
7914	*
7915	* Removes a link to device which is upper to this one. The caller must hold
7916	* the RTNL lock.
7917	*/
7918	void netdev_upper_dev_unlink(struct net_device *dev,
7919	struct net_device *upper_dev)
7920	{
7921	struct netdev_nested_priv priv = {
7922	.flags = NESTED_SYNC_TODO,
7923	.data = NULL,
7924	};
7925
7926	__netdev_upper_dev_unlink(dev, upper_dev, priv: &priv);
7927	}
7928	EXPORT_SYMBOL(netdev_upper_dev_unlink);
7929
7930	static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
7931	struct net_device *lower_dev,
7932	bool val)
7933	{
7934	struct netdev_adjacent *adj;
7935
7936	adj = __netdev_find_adj(adj_dev: lower_dev, adj_list: &upper_dev->adj_list.lower);
7937	if (adj)
7938	adj->ignore = val;
7939
7940	adj = __netdev_find_adj(adj_dev: upper_dev, adj_list: &lower_dev->adj_list.upper);
7941	if (adj)
7942	adj->ignore = val;
7943	}
7944
7945	static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
7946	struct net_device *lower_dev)
7947	{
7948	__netdev_adjacent_dev_set(upper_dev, lower_dev, val: true);
7949	}
7950
7951	static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
7952	struct net_device *lower_dev)
7953	{
7954	__netdev_adjacent_dev_set(upper_dev, lower_dev, val: false);
7955	}
7956
7957	int netdev_adjacent_change_prepare(struct net_device *old_dev,
7958	struct net_device *new_dev,
7959	struct net_device *dev,
7960	struct netlink_ext_ack *extack)
7961	{
7962	struct netdev_nested_priv priv = {
7963	.flags = `0`,
7964	.data = NULL,
7965	};
7966	int err;
7967
7968	if (!new_dev)
7969	return `0`;
7970
7971	if (old_dev && new_dev != old_dev)
7972	netdev_adjacent_dev_disable(upper_dev: dev, lower_dev: old_dev);
7973	err = __netdev_upper_dev_link(dev: new_dev, upper_dev: dev, master: false, NULL, NULL, priv: &priv,
7974	extack);
7975	if (err) {
7976	if (old_dev && new_dev != old_dev)
7977	netdev_adjacent_dev_enable(upper_dev: dev, lower_dev: old_dev);
7978	return err;
7979	}
7980
7981	return `0`;
7982	}
7983	EXPORT_SYMBOL(netdev_adjacent_change_prepare);
7984
7985	void netdev_adjacent_change_commit(struct net_device *old_dev,
7986	struct net_device *new_dev,
7987	struct net_device *dev)
7988	{
7989	struct netdev_nested_priv priv = {
7990	.flags = NESTED_SYNC_IMM \| NESTED_SYNC_TODO,
7991	.data = NULL,
7992	};
7993
7994	if (!new_dev \|\| !old_dev)
7995	return;
7996
7997	if (new_dev == old_dev)
7998	return;
7999
8000	netdev_adjacent_dev_enable(upper_dev: dev, lower_dev: old_dev);
8001	__netdev_upper_dev_unlink(dev: old_dev, upper_dev: dev, priv: &priv);
8002	}
8003	EXPORT_SYMBOL(netdev_adjacent_change_commit);
8004
8005	void netdev_adjacent_change_abort(struct net_device *old_dev,
8006	struct net_device *new_dev,
8007	struct net_device *dev)
8008	{
8009	struct netdev_nested_priv priv = {
8010	.flags = `0`,
8011	.data = NULL,
8012	};
8013
8014	if (!new_dev)
8015	return;
8016
8017	if (old_dev && new_dev != old_dev)
8018	netdev_adjacent_dev_enable(upper_dev: dev, lower_dev: old_dev);
8019
8020	__netdev_upper_dev_unlink(dev: new_dev, upper_dev: dev, priv: &priv);
8021	}
8022	EXPORT_SYMBOL(netdev_adjacent_change_abort);
8023
8024	/**
8025	* netdev_bonding_info_change - Dispatch event about slave change
8026	* @dev: device
8027	* @bonding_info: info to dispatch
8028	*
8029	* Send NETDEV_BONDING_INFO to netdev notifiers with info.
8030	* The caller must hold the RTNL lock.
8031	*/
8032	void netdev_bonding_info_change(struct net_device *dev,
8033	struct netdev_bonding_info *bonding_info)
8034	{
8035	struct netdev_notifier_bonding_info info = {
8036	.info.dev = dev,
8037	};
8038
8039	memcpy(&info.bonding_info, bonding_info,
8040	sizeof(struct netdev_bonding_info));
8041	call_netdevice_notifiers_info(val: NETDEV_BONDING_INFO,
8042	info: &info.info);
8043	}
8044	EXPORT_SYMBOL(netdev_bonding_info_change);
8045
8046	static int netdev_offload_xstats_enable_l3(struct net_device *dev,
8047	struct netlink_ext_ack *extack)
8048	{
8049	struct netdev_notifier_offload_xstats_info info = {
8050	.info.dev = dev,
8051	.info.extack = extack,
8052	.type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
8053	};
8054	int err;
8055	int rc;
8056
8057	dev->offload_xstats_l3 = kzalloc(size: sizeof(*dev->offload_xstats_l3),
8058	GFP_KERNEL);
8059	if (!dev->offload_xstats_l3)
8060	return -ENOMEM;
8061
8062	rc = call_netdevice_notifiers_info_robust(val_up: NETDEV_OFFLOAD_XSTATS_ENABLE,
8063	val_down: NETDEV_OFFLOAD_XSTATS_DISABLE,
8064	info: &info.info);
8065	err = notifier_to_errno(ret: rc);
8066	if (err)
8067	goto free_stats;
8068
8069	return `0`;
8070
8071	free_stats:
8072	kfree(objp: dev->offload_xstats_l3);
8073	dev->offload_xstats_l3 = NULL;
8074	return err;
8075	}
8076
8077	int netdev_offload_xstats_enable(struct net_device *dev,
8078	enum netdev_offload_xstats_type type,
8079	struct netlink_ext_ack *extack)
8080	{
8081	ASSERT_RTNL();
8082
8083	if (netdev_offload_xstats_enabled(dev, type))
8084	return -EALREADY;
8085
8086	switch (type) {
8087	case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
8088	return netdev_offload_xstats_enable_l3(dev, extack);
8089	}
8090
8091	WARN_ON(`1`);
8092	return -EINVAL;
8093	}
8094	EXPORT_SYMBOL(netdev_offload_xstats_enable);
8095
8096	static void netdev_offload_xstats_disable_l3(struct net_device *dev)
8097	{
8098	struct netdev_notifier_offload_xstats_info info = {
8099	.info.dev = dev,
8100	.type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
8101	};
8102
8103	call_netdevice_notifiers_info(val: NETDEV_OFFLOAD_XSTATS_DISABLE,
8104	info: &info.info);
8105	kfree(objp: dev->offload_xstats_l3);
8106	dev->offload_xstats_l3 = NULL;
8107	}
8108
8109	int netdev_offload_xstats_disable(struct net_device *dev,
8110	enum netdev_offload_xstats_type type)
8111	{
8112	ASSERT_RTNL();
8113
8114	if (!netdev_offload_xstats_enabled(dev, type))
8115	return -EALREADY;
8116
8117	switch (type) {
8118	case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
8119	netdev_offload_xstats_disable_l3(dev);
8120	return `0`;
8121	}
8122
8123	WARN_ON(`1`);
8124	return -EINVAL;
8125	}
8126	EXPORT_SYMBOL(netdev_offload_xstats_disable);
8127
8128	static void netdev_offload_xstats_disable_all(struct net_device *dev)
8129	{
8130	netdev_offload_xstats_disable(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3);
8131	}
8132
8133	static struct rtnl_hw_stats64 *
8134	netdev_offload_xstats_get_ptr(const struct net_device *dev,
8135	enum netdev_offload_xstats_type type)
8136	{
8137	switch (type) {
8138	case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
8139	return dev->offload_xstats_l3;
8140	}
8141
8142	WARN_ON(`1`);
8143	return NULL;
8144	}
8145
8146	bool netdev_offload_xstats_enabled(const struct net_device *dev,
8147	enum netdev_offload_xstats_type type)
8148	{
8149	ASSERT_RTNL();
8150
8151	return netdev_offload_xstats_get_ptr(dev, type);
8152	}
8153	EXPORT_SYMBOL(netdev_offload_xstats_enabled);
8154
8155	struct netdev_notifier_offload_xstats_ru {
8156	bool used;
8157	};
8158
8159	struct netdev_notifier_offload_xstats_rd {
8160	struct rtnl_hw_stats64 stats;
8161	bool used;
8162	};
8163
8164	static void netdev_hw_stats64_add(struct rtnl_hw_stats64 *dest,
8165	const struct rtnl_hw_stats64 *src)
8166	{
8167	dest->rx_packets += src->rx_packets;
8168	dest->tx_packets += src->tx_packets;
8169	dest->rx_bytes += src->rx_bytes;
8170	dest->tx_bytes += src->tx_bytes;
8171	dest->rx_errors += src->rx_errors;
8172	dest->tx_errors += src->tx_errors;
8173	dest->rx_dropped += src->rx_dropped;
8174	dest->tx_dropped += src->tx_dropped;
8175	dest->multicast += src->multicast;
8176	}
8177
8178	static int netdev_offload_xstats_get_used(struct net_device *dev,
8179	enum netdev_offload_xstats_type type,
8180	bool *p_used,
8181	struct netlink_ext_ack *extack)
8182	{
8183	struct netdev_notifier_offload_xstats_ru report_used = {};
8184	struct netdev_notifier_offload_xstats_info info = {
8185	.info.dev = dev,
8186	.info.extack = extack,
8187	.type = type,
8188	.report_used = &report_used,
8189	};
8190	int rc;
8191
8192	WARN_ON(!netdev_offload_xstats_enabled(dev, type));
8193	rc = call_netdevice_notifiers_info(val: NETDEV_OFFLOAD_XSTATS_REPORT_USED,
8194	info: &info.info);
8195	*p_used = report_used.used;
8196	return notifier_to_errno(ret: rc);
8197	}
8198
8199	static int netdev_offload_xstats_get_stats(struct net_device *dev,
8200	enum netdev_offload_xstats_type type,
8201	struct rtnl_hw_stats64 *p_stats,
8202	bool *p_used,
8203	struct netlink_ext_ack *extack)
8204	{
8205	struct netdev_notifier_offload_xstats_rd report_delta = {};
8206	struct netdev_notifier_offload_xstats_info info = {
8207	.info.dev = dev,
8208	.info.extack = extack,
8209	.type = type,
8210	.report_delta = &report_delta,
8211	};
8212	struct rtnl_hw_stats64 *stats;
8213	int rc;
8214
8215	stats = netdev_offload_xstats_get_ptr(dev, type);
8216	if (WARN_ON(!stats))
8217	return -EINVAL;
8218
8219	rc = call_netdevice_notifiers_info(val: NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
8220	info: &info.info);
8221
8222	/ Cache whatever we got, even if there was an error, otherwise the*
8223	* successful stats retrievals would get lost.
8224	*/
8225	netdev_hw_stats64_add(dest: stats, src: &report_delta.stats);
8226
8227	if (p_stats)
8228	p_stats = stats;
8229	*p_used = report_delta.used;
8230
8231	return notifier_to_errno(ret: rc);
8232	}
8233
8234	int netdev_offload_xstats_get(struct net_device *dev,
8235	enum netdev_offload_xstats_type type,
8236	struct rtnl_hw_stats64 p_stats, bool p_used,
8237	struct netlink_ext_ack *extack)
8238	{
8239	ASSERT_RTNL();
8240
8241	if (p_stats)
8242	return netdev_offload_xstats_get_stats(dev, type, p_stats,
8243	p_used, extack);
8244	else
8245	return netdev_offload_xstats_get_used(dev, type, p_used,
8246	extack);
8247	}
8248	EXPORT_SYMBOL(netdev_offload_xstats_get);
8249
8250	void
8251	netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *report_delta,
8252	const struct rtnl_hw_stats64 *stats)
8253	{
8254	report_delta->used = true;
8255	netdev_hw_stats64_add(dest: &report_delta->stats, src: stats);
8256	}
8257	EXPORT_SYMBOL(netdev_offload_xstats_report_delta);
8258
8259	void
8260	netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *report_used)
8261	{
8262	report_used->used = true;
8263	}
8264	EXPORT_SYMBOL(netdev_offload_xstats_report_used);
8265
8266	void netdev_offload_xstats_push_delta(struct net_device *dev,
8267	enum netdev_offload_xstats_type type,
8268	const struct rtnl_hw_stats64 *p_stats)
8269	{
8270	struct rtnl_hw_stats64 *stats;
8271
8272	ASSERT_RTNL();
8273
8274	stats = netdev_offload_xstats_get_ptr(dev, type);
8275	if (WARN_ON(!stats))
8276	return;
8277
8278	netdev_hw_stats64_add(dest: stats, src: p_stats);
8279	}
8280	EXPORT_SYMBOL(netdev_offload_xstats_push_delta);
8281
8282	/**
8283	* netdev_get_xmit_slave - Get the xmit slave of master device
8284	* @dev: device
8285	* @skb: The packet
8286	* @all_slaves: assume all the slaves are active
8287	*
8288	* The reference counters are not incremented so the caller must be
8289	* careful with locks. The caller must hold RCU lock.
8290	* %NULL is returned if no slave is found.
8291	*/
8292
8293	struct net_device netdev_get_xmit_slave(struct* net_device *dev,
8294	struct sk_buff *skb,
8295	bool all_slaves)
8296	{
8297	const struct net_device_ops *ops = dev->netdev_ops;
8298
8299	if (!ops->ndo_get_xmit_slave)
8300	return NULL;
8301	return ops->ndo_get_xmit_slave(dev, skb, all_slaves);
8302	}
8303	EXPORT_SYMBOL(netdev_get_xmit_slave);
8304
8305	static struct net_device netdev_sk_get_lower_dev(struct* net_device *dev,
8306	struct sock *sk)
8307	{
8308	const struct net_device_ops *ops = dev->netdev_ops;
8309
8310	if (!ops->ndo_sk_get_lower_dev)
8311	return NULL;
8312	return ops->ndo_sk_get_lower_dev(dev, sk);
8313	}
8314
8315	/**
8316	* netdev_sk_get_lowest_dev - Get the lowest device in chain given device and socket
8317	* @dev: device
8318	* @sk: the socket
8319	*
8320	* %NULL is returned if no lower device is found.
8321	*/
8322
8323	struct net_device netdev_sk_get_lowest_dev(struct* net_device *dev,
8324	struct sock *sk)
8325	{
8326	struct net_device *lower;
8327
8328	lower = netdev_sk_get_lower_dev(dev, sk);
8329	while (lower) {
8330	dev = lower;
8331	lower = netdev_sk_get_lower_dev(dev, sk);
8332	}
8333
8334	return dev;
8335	}
8336	EXPORT_SYMBOL(netdev_sk_get_lowest_dev);
8337
8338	static void netdev_adjacent_add_links(struct net_device *dev)
8339	{
8340	struct netdev_adjacent *iter;
8341
8342	struct net *net = dev_net(dev);
8343
8344	list_for_each_entry(iter, &dev->adj_list.upper, list) {
8345	if (!net_eq(net1: net, net2: dev_net(dev: iter->dev)))
8346	continue;
8347	netdev_adjacent_sysfs_add(dev: iter->dev, adj_dev: dev,
8348	dev_list: &iter->dev->adj_list.lower);
8349	netdev_adjacent_sysfs_add(dev, adj_dev: iter->dev,
8350	dev_list: &dev->adj_list.upper);
8351	}
8352
8353	list_for_each_entry(iter, &dev->adj_list.lower, list) {
8354	if (!net_eq(net1: net, net2: dev_net(dev: iter->dev)))
8355	continue;
8356	netdev_adjacent_sysfs_add(dev: iter->dev, adj_dev: dev,
8357	dev_list: &iter->dev->adj_list.upper);
8358	netdev_adjacent_sysfs_add(dev, adj_dev: iter->dev,
8359	dev_list: &dev->adj_list.lower);
8360	}
8361	}
8362
8363	static void netdev_adjacent_del_links(struct net_device *dev)
8364	{
8365	struct netdev_adjacent *iter;
8366
8367	struct net *net = dev_net(dev);
8368
8369	list_for_each_entry(iter, &dev->adj_list.upper, list) {
8370	if (!net_eq(net1: net, net2: dev_net(dev: iter->dev)))
8371	continue;
8372	netdev_adjacent_sysfs_del(dev: iter->dev, name: dev->name,
8373	dev_list: &iter->dev->adj_list.lower);
8374	netdev_adjacent_sysfs_del(dev, name: iter->dev->name,
8375	dev_list: &dev->adj_list.upper);
8376	}
8377
8378	list_for_each_entry(iter, &dev->adj_list.lower, list) {
8379	if (!net_eq(net1: net, net2: dev_net(dev: iter->dev)))
8380	continue;
8381	netdev_adjacent_sysfs_del(dev: iter->dev, name: dev->name,
8382	dev_list: &iter->dev->adj_list.upper);
8383	netdev_adjacent_sysfs_del(dev, name: iter->dev->name,
8384	dev_list: &dev->adj_list.lower);
8385	}
8386	}
8387
8388	void netdev_adjacent_rename_links(struct net_device dev, char* *oldname)
8389	{
8390	struct netdev_adjacent *iter;
8391
8392	struct net *net = dev_net(dev);
8393
8394	list_for_each_entry(iter, &dev->adj_list.upper, list) {
8395	if (!net_eq(net1: net, net2: dev_net(dev: iter->dev)))
8396	continue;
8397	netdev_adjacent_sysfs_del(dev: iter->dev, name: oldname,
8398	dev_list: &iter->dev->adj_list.lower);
8399	netdev_adjacent_sysfs_add(dev: iter->dev, adj_dev: dev,
8400	dev_list: &iter->dev->adj_list.lower);
8401	}
8402
8403	list_for_each_entry(iter, &dev->adj_list.lower, list) {
8404	if (!net_eq(net1: net, net2: dev_net(dev: iter->dev)))
8405	continue;
8406	netdev_adjacent_sysfs_del(dev: iter->dev, name: oldname,
8407	dev_list: &iter->dev->adj_list.upper);
8408	netdev_adjacent_sysfs_add(dev: iter->dev, adj_dev: dev,
8409	dev_list: &iter->dev->adj_list.upper);
8410	}
8411	}
8412
8413	void netdev_lower_dev_get_private(struct* net_device *dev,
8414	struct net_device *lower_dev)
8415	{
8416	struct netdev_adjacent *lower;
8417
8418	if (!lower_dev)
8419	return NULL;
8420	lower = __netdev_find_adj(adj_dev: lower_dev, adj_list: &dev->adj_list.lower);
8421	if (!lower)
8422	return NULL;
8423
8424	return lower->private;
8425	}
8426	EXPORT_SYMBOL(netdev_lower_dev_get_private);
8427
8428
8429	/**
8430	* netdev_lower_state_changed - Dispatch event about lower device state change
8431	* @lower_dev: device
8432	* @lower_state_info: state to dispatch
8433	*
8434	* Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info.
8435	* The caller must hold the RTNL lock.
8436	*/
8437	void netdev_lower_state_changed(struct net_device *lower_dev,
8438	void *lower_state_info)
8439	{
8440	struct netdev_notifier_changelowerstate_info changelowerstate_info = {
8441	.info.dev = lower_dev,
8442	};
8443
8444	ASSERT_RTNL();
8445	changelowerstate_info.lower_state_info = lower_state_info;
8446	call_netdevice_notifiers_info(val: NETDEV_CHANGELOWERSTATE,
8447	info: &changelowerstate_info.info);
8448	}
8449	EXPORT_SYMBOL(netdev_lower_state_changed);
8450
8451	static void dev_change_rx_flags(struct net_device dev, int* flags)
8452	{
8453	const struct net_device_ops *ops = dev->netdev_ops;
8454
8455	if (ops->ndo_change_rx_flags)
8456	ops->ndo_change_rx_flags(dev, flags);
8457	}
8458
8459	static int __dev_set_promiscuity(struct net_device dev, int* inc, bool notify)
8460	{
8461	unsigned int old_flags = dev->flags;
8462	kuid_t uid;
8463	kgid_t gid;
8464
8465	ASSERT_RTNL();
8466
8467	dev->flags \|= IFF_PROMISC;
8468	dev->promiscuity += inc;
8469	if (dev->promiscuity == `0`) {
8470	/*
8471	* Avoid overflow.
8472	* If inc causes overflow, untouch promisc and return error.
8473	*/
8474	if (inc < `0`)
8475	dev->flags &= ~IFF_PROMISC;
8476	else {
8477	dev->promiscuity -= inc;
8478	netdev_warn(dev, format: "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n");
8479	return -EOVERFLOW;
8480	}
8481	}
8482	if (dev->flags != old_flags) {
8483	netdev_info(dev, format: "%s promiscuous mode\n",
8484	dev->flags & IFF_PROMISC ? "entered" : "left");
8485	if (audit_enabled) {
8486	current_uid_gid(&uid, &gid);
8487	audit_log(ctx: audit_context(), GFP_ATOMIC,
8488	AUDIT_ANOM_PROMISCUOUS,
8489	fmt: "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
8490	dev->name, (dev->flags & IFF_PROMISC),
8491	(old_flags & IFF_PROMISC),
8492	from_kuid(to: &init_user_ns, uid: audit_get_loginuid(current)),
8493	from_kuid(to: &init_user_ns, uid),
8494	from_kgid(to: &init_user_ns, gid),
8495	audit_get_sessionid(current));
8496	}
8497
8498	dev_change_rx_flags(dev, IFF_PROMISC);
8499	}
8500	if (notify)
8501	__dev_notify_flags(dev, old_flags, IFF_PROMISC, portid: `0`, NULL);
8502	return `0`;
8503	}
8504
8505	/**
8506	* dev_set_promiscuity - update promiscuity count on a device
8507	* @dev: device
8508	* @inc: modifier
8509	*
8510	* Add or remove promiscuity from a device. While the count in the device
8511	* remains above zero the interface remains promiscuous. Once it hits zero
8512	* the device reverts back to normal filtering operation. A negative inc
8513	* value is used to drop promiscuity on the device.
8514	* Return 0 if successful or a negative errno code on error.
8515	*/
8516	int dev_set_promiscuity(struct net_device dev, int* inc)
8517	{
8518	unsigned int old_flags = dev->flags;
8519	int err;
8520
8521	err = __dev_set_promiscuity(dev, inc, notify: true);
8522	if (err < `0`)
8523	return err;
8524	if (dev->flags != old_flags)
8525	dev_set_rx_mode(dev);
8526	return err;
8527	}
8528	EXPORT_SYMBOL(dev_set_promiscuity);
8529
8530	static int __dev_set_allmulti(struct net_device dev, int* inc, bool notify)
8531	{
8532	unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
8533
8534	ASSERT_RTNL();
8535
8536	dev->flags \|= IFF_ALLMULTI;
8537	dev->allmulti += inc;
8538	if (dev->allmulti == `0`) {
8539	/*
8540	* Avoid overflow.
8541	* If inc causes overflow, untouch allmulti and return error.
8542	*/
8543	if (inc < `0`)
8544	dev->flags &= ~IFF_ALLMULTI;
8545	else {
8546	dev->allmulti -= inc;
8547	netdev_warn(dev, format: "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n");
8548	return -EOVERFLOW;
8549	}
8550	}
8551	if (dev->flags ^ old_flags) {
8552	netdev_info(dev, format: "%s allmulticast mode\n",
8553	dev->flags & IFF_ALLMULTI ? "entered" : "left");
8554	dev_change_rx_flags(dev, IFF_ALLMULTI);
8555	dev_set_rx_mode(dev);
8556	if (notify)
8557	__dev_notify_flags(dev, old_flags,
8558	gchanges: dev->gflags ^ old_gflags, portid: `0`, NULL);
8559	}
8560	return `0`;
8561	}
8562
8563	/**
8564	* dev_set_allmulti - update allmulti count on a device
8565	* @dev: device
8566	* @inc: modifier
8567	*
8568	* Add or remove reception of all multicast frames to a device. While the
8569	* count in the device remains above zero the interface remains listening
8570	* to all interfaces. Once it hits zero the device reverts back to normal
8571	* filtering operation. A negative @inc value is used to drop the counter
8572	* when releasing a resource needing all multicasts.
8573	* Return 0 if successful or a negative errno code on error.
8574	*/
8575
8576	int dev_set_allmulti(struct net_device dev, int* inc)
8577	{
8578	return __dev_set_allmulti(dev, inc, notify: true);
8579	}
8580	EXPORT_SYMBOL(dev_set_allmulti);
8581
8582	/*
8583	* Upload unicast and multicast address lists to device and
8584	* configure RX filtering. When the device doesn't support unicast
8585	* filtering it is put in promiscuous mode while unicast addresses
8586	* are present.
8587	*/
8588	void __dev_set_rx_mode(struct net_device *dev)
8589	{
8590	const struct net_device_ops *ops = dev->netdev_ops;
8591
8592	/ dev_open will call this function so the list will stay sane. /
8593	if (!(dev->flags&IFF_UP))
8594	return;
8595
8596	if (!netif_device_present(dev))
8597	return;
8598
8599	if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
8600	/ Unicast addresses changes may only happen under the rtnl,*
8601	* therefore calling __dev_set_promiscuity here is safe.
8602	*/
8603	if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
8604	__dev_set_promiscuity(dev, inc: `1`, notify: false);
8605	dev->uc_promisc = true;
8606	} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
8607	__dev_set_promiscuity(dev, inc: -`1`, notify: false);
8608	dev->uc_promisc = false;
8609	}
8610	}
8611
8612	if (ops->ndo_set_rx_mode)
8613	ops->ndo_set_rx_mode(dev);
8614	}
8615
8616	void dev_set_rx_mode(struct net_device *dev)
8617	{
8618	netif_addr_lock_bh(dev);
8619	__dev_set_rx_mode(dev);
8620	netif_addr_unlock_bh(dev);
8621	}
8622
8623	/**
8624	* dev_get_flags - get flags reported to userspace
8625	* @dev: device
8626	*
8627	* Get the combination of flag bits exported through APIs to userspace.
8628	*/
8629	unsigned int dev_get_flags(const struct net_device *dev)
8630	{
8631	unsigned int flags;
8632
8633	flags = (READ_ONCE(dev->flags) & ~(IFF_PROMISC \|
8634	IFF_ALLMULTI \|
8635	IFF_RUNNING \|
8636	IFF_LOWER_UP \|
8637	IFF_DORMANT)) \|
8638	(READ_ONCE(dev->gflags) & (IFF_PROMISC \|
8639	IFF_ALLMULTI));
8640
8641	if (netif_running(dev)) {
8642	if (netif_oper_up(dev))
8643	flags \|= IFF_RUNNING;
8644	if (netif_carrier_ok(dev))
8645	flags \|= IFF_LOWER_UP;
8646	if (netif_dormant(dev))
8647	flags \|= IFF_DORMANT;
8648	}
8649
8650	return flags;
8651	}
8652	EXPORT_SYMBOL(dev_get_flags);
8653
8654	int __dev_change_flags(struct net_device dev, unsigned* int flags,
8655	struct netlink_ext_ack *extack)
8656	{
8657	unsigned int old_flags = dev->flags;
8658	int ret;
8659
8660	ASSERT_RTNL();
8661
8662	/*
8663	* Set the flags on our device.
8664	*/
8665
8666	dev->flags = (flags & (IFF_DEBUG \| IFF_NOTRAILERS \| IFF_NOARP \|
8667	IFF_DYNAMIC \| IFF_MULTICAST \| IFF_PORTSEL \|
8668	IFF_AUTOMEDIA)) \|
8669	(dev->flags & (IFF_UP \| IFF_VOLATILE \| IFF_PROMISC \|
8670	IFF_ALLMULTI));
8671
8672	/*
8673	* Load in the correct multicast list now the flags have changed.
8674	*/
8675
8676	if ((old_flags ^ flags) & IFF_MULTICAST)
8677	dev_change_rx_flags(dev, IFF_MULTICAST);
8678
8679	dev_set_rx_mode(dev);
8680
8681	/*
8682	* Have we downed the interface. We handle IFF_UP ourselves
8683	* according to user attempts to set it, rather than blindly
8684	* setting it.
8685	*/
8686
8687	ret = `0`;
8688	if ((old_flags ^ flags) & IFF_UP) {
8689	if (old_flags & IFF_UP)
8690	__dev_close(dev);
8691	else
8692	ret = __dev_open(dev, extack);
8693	}
8694
8695	if ((flags ^ dev->gflags) & IFF_PROMISC) {
8696	int inc = (flags & IFF_PROMISC) ? `1` : -`1`;
8697	unsigned int old_flags = dev->flags;
8698
8699	dev->gflags ^= IFF_PROMISC;
8700
8701	if (__dev_set_promiscuity(dev, inc, notify: false) >= `0`)
8702	if (dev->flags != old_flags)
8703	dev_set_rx_mode(dev);
8704	}
8705
8706	/ NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI*
8707	* is important. Some (broken) drivers set IFF_PROMISC, when
8708	* IFF_ALLMULTI is requested not asking us and not reporting.
8709	*/
8710	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
8711	int inc = (flags & IFF_ALLMULTI) ? `1` : -`1`;
8712
8713	dev->gflags ^= IFF_ALLMULTI;
8714	__dev_set_allmulti(dev, inc, notify: false);
8715	}
8716
8717	return ret;
8718	}
8719
8720	void __dev_notify_flags(struct net_device dev, unsigned* int old_flags,
8721	unsigned int gchanges, u32 portid,
8722	const struct nlmsghdr *nlh)
8723	{
8724	unsigned int changes = dev->flags ^ old_flags;
8725
8726	if (gchanges)
8727	rtmsg_ifinfo(RTM_NEWLINK, dev, change: gchanges, GFP_ATOMIC, portid, nlh);
8728
8729	if (changes & IFF_UP) {
8730	if (dev->flags & IFF_UP)
8731	call_netdevice_notifiers(NETDEV_UP, dev);
8732	else
8733	call_netdevice_notifiers(NETDEV_DOWN, dev);
8734	}
8735
8736	if (dev->flags & IFF_UP &&
8737	(changes & ~(IFF_UP \| IFF_PROMISC \| IFF_ALLMULTI \| IFF_VOLATILE))) {
8738	struct netdev_notifier_change_info change_info = {
8739	.info = {
8740	.dev = dev,
8741	},
8742	.flags_changed = changes,
8743	};
8744
8745	call_netdevice_notifiers_info(val: NETDEV_CHANGE, info: &change_info.info);
8746	}
8747	}
8748
8749	/**
8750	* dev_change_flags - change device settings
8751	* @dev: device
8752	* @flags: device state flags
8753	* @extack: netlink extended ack
8754	*
8755	* Change settings on device based state flags. The flags are
8756	* in the userspace exported format.
8757	*/
8758	int dev_change_flags(struct net_device dev, unsigned* int flags,
8759	struct netlink_ext_ack *extack)
8760	{
8761	int ret;
8762	unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
8763
8764	ret = __dev_change_flags(dev, flags, extack);
8765	if (ret < `0`)
8766	return ret;
8767
8768	changes = (old_flags ^ dev->flags) \| (old_gflags ^ dev->gflags);
8769	__dev_notify_flags(dev, old_flags, gchanges: changes, portid: `0`, NULL);
8770	return ret;
8771	}
8772	EXPORT_SYMBOL(dev_change_flags);
8773
8774	int __dev_set_mtu(struct net_device dev, int* new_mtu)
8775	{
8776	const struct net_device_ops *ops = dev->netdev_ops;
8777
8778	if (ops->ndo_change_mtu)
8779	return ops->ndo_change_mtu(dev, new_mtu);
8780
8781	/ Pairs with all the lockless reads of dev->mtu in the stack /
8782	WRITE_ONCE(dev->mtu, new_mtu);
8783	return `0`;
8784	}
8785	EXPORT_SYMBOL(__dev_set_mtu);
8786
8787	int dev_validate_mtu(struct net_device dev, int* new_mtu,
8788	struct netlink_ext_ack *extack)
8789	{
8790	/ MTU must be positive, and in range /
8791	if (new_mtu < `0` \|\| new_mtu < dev->min_mtu) {
8792	NL_SET_ERR_MSG(extack, "mtu less than device minimum");
8793	return -EINVAL;
8794	}
8795
8796	if (dev->max_mtu > `0` && new_mtu > dev->max_mtu) {
8797	NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
8798	return -EINVAL;
8799	}
8800	return `0`;
8801	}
8802
8803	/**
8804	* dev_set_mtu_ext - Change maximum transfer unit
8805	* @dev: device
8806	* @new_mtu: new transfer unit
8807	* @extack: netlink extended ack
8808	*
8809	* Change the maximum transfer size of the network device.
8810	*/
8811	int dev_set_mtu_ext(struct net_device dev, int* new_mtu,
8812	struct netlink_ext_ack *extack)
8813	{
8814	int err, orig_mtu;
8815
8816	if (new_mtu == dev->mtu)
8817	return `0`;
8818
8819	err = dev_validate_mtu(dev, new_mtu, extack);
8820	if (err)
8821	return err;
8822
8823	if (!netif_device_present(dev))
8824	return -ENODEV;
8825
8826	err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
8827	err = notifier_to_errno(ret: err);
8828	if (err)
8829	return err;
8830
8831	orig_mtu = dev->mtu;
8832	err = __dev_set_mtu(dev, new_mtu);
8833
8834	if (!err) {
8835	err = call_netdevice_notifiers_mtu(val: NETDEV_CHANGEMTU, dev,
8836	arg: orig_mtu);
8837	err = notifier_to_errno(ret: err);
8838	if (err) {
8839	/ setting mtu back and notifying everyone again,*
8840	* so that they have a chance to revert changes.
8841	*/
8842	__dev_set_mtu(dev, orig_mtu);
8843	call_netdevice_notifiers_mtu(val: NETDEV_CHANGEMTU, dev,
8844	arg: new_mtu);
8845	}
8846	}
8847	return err;
8848	}
8849
8850	int dev_set_mtu(struct net_device dev, int* new_mtu)
8851	{
8852	struct netlink_ext_ack extack;
8853	int err;
8854
8855	memset(&extack, `0`, sizeof(extack));
8856	err = dev_set_mtu_ext(dev, new_mtu, extack: &extack);
8857	if (err && extack._msg)
8858	net_err_ratelimited("%s: %s\n", dev->name, extack._msg);
8859	return err;
8860	}
8861	EXPORT_SYMBOL(dev_set_mtu);
8862
8863	/**
8864	* dev_change_tx_queue_len - Change TX queue length of a netdevice
8865	* @dev: device
8866	* @new_len: new tx queue length
8867	*/
8868	int dev_change_tx_queue_len(struct net_device dev, unsigned* long new_len)
8869	{
8870	unsigned int orig_len = dev->tx_queue_len;
8871	int res;
8872
8873	if (new_len != (unsigned int)new_len)
8874	return -ERANGE;
8875
8876	if (new_len != orig_len) {
8877	dev->tx_queue_len = new_len;
8878	res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
8879	res = notifier_to_errno(ret: res);
8880	if (res)
8881	goto err_rollback;
8882	res = dev_qdisc_change_tx_queue_len(dev);
8883	if (res)
8884	goto err_rollback;
8885	}
8886
8887	return `0`;
8888
8889	err_rollback:
8890	netdev_err(dev, format: "refused to change device tx_queue_len\n");
8891	dev->tx_queue_len = orig_len;
8892	return res;
8893	}
8894
8895	/**
8896	* dev_set_group - Change group this device belongs to
8897	* @dev: device
8898	* @new_group: group this device should belong to
8899	*/
8900	void dev_set_group(struct net_device dev, int* new_group)
8901	{
8902	dev->group = new_group;
8903	}
8904
8905	/**
8906	* dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
8907	* @dev: device
8908	* @addr: new address
8909	* @extack: netlink extended ack
8910	*/
8911	int dev_pre_changeaddr_notify(struct net_device dev, const* char *addr,
8912	struct netlink_ext_ack *extack)
8913	{
8914	struct netdev_notifier_pre_changeaddr_info info = {
8915	.info.dev = dev,
8916	.info.extack = extack,
8917	.dev_addr = addr,
8918	};
8919	int rc;
8920
8921	rc = call_netdevice_notifiers_info(val: NETDEV_PRE_CHANGEADDR, info: &info.info);
8922	return notifier_to_errno(ret: rc);
8923	}
8924	EXPORT_SYMBOL(dev_pre_changeaddr_notify);
8925
8926	/**
8927	* dev_set_mac_address - Change Media Access Control Address
8928	* @dev: device
8929	* @sa: new address
8930	* @extack: netlink extended ack
8931	*
8932	* Change the hardware (MAC) address of the device
8933	*/
8934	int dev_set_mac_address(struct net_device dev, struct* sockaddr *sa,
8935	struct netlink_ext_ack *extack)
8936	{
8937	const struct net_device_ops *ops = dev->netdev_ops;
8938	int err;
8939
8940	if (!ops->ndo_set_mac_address)
8941	return -EOPNOTSUPP;
8942	if (sa->sa_family != dev->type)
8943	return -EINVAL;
8944	if (!netif_device_present(dev))
8945	return -ENODEV;
8946	err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
8947	if (err)
8948	return err;
8949	if (memcmp(p: dev->dev_addr, q: sa->sa_data, size: dev->addr_len)) {
8950	err = ops->ndo_set_mac_address(dev, sa);
8951	if (err)
8952	return err;
8953	}
8954	dev->addr_assign_type = NET_ADDR_SET;
8955	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
8956	add_device_randomness(buf: dev->dev_addr, len: dev->addr_len);
8957	return `0`;
8958	}
8959	EXPORT_SYMBOL(dev_set_mac_address);
8960
8961	DECLARE_RWSEM(dev_addr_sem);
8962
8963	int dev_set_mac_address_user(struct net_device dev, struct* sockaddr *sa,
8964	struct netlink_ext_ack *extack)
8965	{
8966	int ret;
8967
8968	down_write(sem: &dev_addr_sem);
8969	ret = dev_set_mac_address(dev, sa, extack);
8970	up_write(sem: &dev_addr_sem);
8971	return ret;
8972	}
8973	EXPORT_SYMBOL(dev_set_mac_address_user);
8974
8975	int dev_get_mac_address(struct sockaddr sa, struct* net net, char* *dev_name)
8976	{
8977	size_t size = sizeof(sa->sa_data_min);
8978	struct net_device *dev;
8979	int ret = `0`;
8980
8981	down_read(sem: &dev_addr_sem);
8982	rcu_read_lock();
8983
8984	dev = dev_get_by_name_rcu(net, dev_name);
8985	if (!dev) {
8986	ret = -ENODEV;
8987	goto unlock;
8988	}
8989	if (!dev->addr_len)
8990	memset(sa->sa_data, `0`, size);
8991	else
8992	memcpy(sa->sa_data, dev->dev_addr,
8993	min_t(size_t, size, dev->addr_len));
8994	sa->sa_family = dev->type;
8995
8996	unlock:
8997	rcu_read_unlock();
8998	up_read(sem: &dev_addr_sem);
8999	return ret;
9000	}
9001	EXPORT_SYMBOL(dev_get_mac_address);
9002
9003	/**
9004	* dev_change_carrier - Change device carrier
9005	* @dev: device
9006	* @new_carrier: new value
9007	*
9008	* Change device carrier
9009	*/
9010	int dev_change_carrier(struct net_device *dev, bool new_carrier)
9011	{
9012	const struct net_device_ops *ops = dev->netdev_ops;
9013
9014	if (!ops->ndo_change_carrier)
9015	return -EOPNOTSUPP;
9016	if (!netif_device_present(dev))
9017	return -ENODEV;
9018	return ops->ndo_change_carrier(dev, new_carrier);
9019	}
9020
9021	/**
9022	* dev_get_phys_port_id - Get device physical port ID
9023	* @dev: device
9024	* @ppid: port ID
9025	*
9026	* Get device physical port ID
9027	*/
9028	int dev_get_phys_port_id(struct net_device *dev,
9029	struct netdev_phys_item_id *ppid)
9030	{
9031	const struct net_device_ops *ops = dev->netdev_ops;
9032
9033	if (!ops->ndo_get_phys_port_id)
9034	return -EOPNOTSUPP;
9035	return ops->ndo_get_phys_port_id(dev, ppid);
9036	}
9037
9038	/**
9039	* dev_get_phys_port_name - Get device physical port name
9040	* @dev: device
9041	* @name: port name
9042	* @len: limit of bytes to copy to name
9043	*
9044	* Get device physical port name
9045	*/
9046	int dev_get_phys_port_name(struct net_device *dev,
9047	char *name, size_t len)
9048	{
9049	const struct net_device_ops *ops = dev->netdev_ops;
9050	int err;
9051
9052	if (ops->ndo_get_phys_port_name) {
9053	err = ops->ndo_get_phys_port_name(dev, name, len);
9054	if (err != -EOPNOTSUPP)
9055	return err;
9056	}
9057	return devlink_compat_phys_port_name_get(dev, name, len);
9058	}
9059
9060	/**
9061	* dev_get_port_parent_id - Get the device's port parent identifier
9062	* @dev: network device
9063	* @ppid: pointer to a storage for the port's parent identifier
9064	* @recurse: allow/disallow recursion to lower devices
9065	*
9066	* Get the devices's port parent identifier
9067	*/
9068	int dev_get_port_parent_id(struct net_device *dev,
9069	struct netdev_phys_item_id *ppid,
9070	bool recurse)
9071	{
9072	const struct net_device_ops *ops = dev->netdev_ops;
9073	struct netdev_phys_item_id first = { };
9074	struct net_device *lower_dev;
9075	struct list_head *iter;
9076	int err;
9077
9078	if (ops->ndo_get_port_parent_id) {
9079	err = ops->ndo_get_port_parent_id(dev, ppid);
9080	if (err != -EOPNOTSUPP)
9081	return err;
9082	}
9083
9084	err = devlink_compat_switch_id_get(dev, ppid);
9085	if (!recurse \|\| err != -EOPNOTSUPP)
9086	return err;
9087
9088	netdev_for_each_lower_dev(dev, lower_dev, iter) {
9089	err = dev_get_port_parent_id(dev: lower_dev, ppid, recurse: true);
9090	if (err)
9091	break;
9092	if (!first.id_len)
9093	first = *ppid;
9094	else if (memcmp(p: &first, q: ppid, size: sizeof(*ppid)))
9095	return -EOPNOTSUPP;
9096	}
9097
9098	return err;
9099	}
9100	EXPORT_SYMBOL(dev_get_port_parent_id);
9101
9102	/**
9103	* netdev_port_same_parent_id - Indicate if two network devices have
9104	* the same port parent identifier
9105	* @a: first network device
9106	* @b: second network device
9107	*/
9108	bool netdev_port_same_parent_id(struct net_device a, struct* net_device *b)
9109	{
9110	struct netdev_phys_item_id a_id = { };
9111	struct netdev_phys_item_id b_id = { };
9112
9113	if (dev_get_port_parent_id(a, &a_id, true) \|\|
9114	dev_get_port_parent_id(b, &b_id, true))
9115	return false;
9116
9117	return netdev_phys_item_id_same(a: &a_id, b: &b_id);
9118	}
9119	EXPORT_SYMBOL(netdev_port_same_parent_id);
9120
9121	/**
9122	* dev_change_proto_down - set carrier according to proto_down.
9123	*
9124	* @dev: device
9125	* @proto_down: new value
9126	*/
9127	int dev_change_proto_down(struct net_device *dev, bool proto_down)
9128	{
9129	if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN))
9130	return -EOPNOTSUPP;
9131	if (!netif_device_present(dev))
9132	return -ENODEV;
9133	if (proto_down)
9134	netif_carrier_off(dev);
9135	else
9136	netif_carrier_on(dev);
9137	dev->proto_down = proto_down;
9138	return `0`;
9139	}
9140
9141	/**
9142	* dev_change_proto_down_reason - proto down reason
9143	*
9144	* @dev: device
9145	* @mask: proto down mask
9146	* @value: proto down value
9147	*/
9148	void dev_change_proto_down_reason(struct net_device dev, unsigned* long mask,
9149	u32 value)
9150	{
9151	int b;
9152
9153	if (!mask) {
9154	dev->proto_down_reason = value;
9155	} else {
9156	for_each_set_bit(b, &mask, `32`) {
9157	if (value & (`1` << b))
9158	dev->proto_down_reason \|= BIT(b);
9159	else
9160	dev->proto_down_reason &= ~BIT(b);
9161	}
9162	}
9163	}
9164
9165	struct bpf_xdp_link {
9166	struct bpf_link link;
9167	struct net_device dev; /* protected by rtnl_lock, no refcnt held /
9168	int flags;
9169	};
9170
9171	static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags)
9172	{
9173	if (flags & XDP_FLAGS_HW_MODE)
9174	return XDP_MODE_HW;
9175	if (flags & XDP_FLAGS_DRV_MODE)
9176	return XDP_MODE_DRV;
9177	if (flags & XDP_FLAGS_SKB_MODE)
9178	return XDP_MODE_SKB;
9179	return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB;
9180	}
9181
9182	static bpf_op_t dev_xdp_bpf_op(struct net_device dev, enum* bpf_xdp_mode mode)
9183	{
9184	switch (mode) {
9185	case XDP_MODE_SKB:
9186	return generic_xdp_install;
9187	case XDP_MODE_DRV:
9188	case XDP_MODE_HW:
9189	return dev->netdev_ops->ndo_bpf;
9190	default:
9191	return NULL;
9192	}
9193	}
9194
9195	static struct bpf_xdp_link dev_xdp_link(struct* net_device *dev,
9196	enum bpf_xdp_mode mode)
9197	{
9198	return dev->xdp_state[mode].link;
9199	}
9200
9201	static struct bpf_prog dev_xdp_prog(struct* net_device *dev,
9202	enum bpf_xdp_mode mode)
9203	{
9204	struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
9205
9206	if (link)
9207	return link->link.prog;
9208	return dev->xdp_state[mode].prog;
9209	}
9210
9211	u8 dev_xdp_prog_count(struct net_device *dev)
9212	{
9213	u8 count = `0`;
9214	int i;
9215
9216	for (i = `0`; i < __MAX_XDP_MODE; i++)
9217	if (dev->xdp_state[i].prog \|\| dev->xdp_state[i].link)
9218	count++;
9219	return count;
9220	}
9221	EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
9222
9223	u32 dev_xdp_prog_id(struct net_device dev, enum* bpf_xdp_mode mode)
9224	{
9225	struct bpf_prog *prog = dev_xdp_prog(dev, mode);
9226
9227	return prog ? prog->aux->id : `0`;
9228	}
9229
9230	static void dev_xdp_set_link(struct net_device dev, enum* bpf_xdp_mode mode,
9231	struct bpf_xdp_link *link)
9232	{
9233	dev->xdp_state[mode].link = link;
9234	dev->xdp_state[mode].prog = NULL;
9235	}
9236
9237	static void dev_xdp_set_prog(struct net_device dev, enum* bpf_xdp_mode mode,
9238	struct bpf_prog *prog)
9239	{
9240	dev->xdp_state[mode].link = NULL;
9241	dev->xdp_state[mode].prog = prog;
9242	}
9243
9244	static int dev_xdp_install(struct net_device dev, enum* bpf_xdp_mode mode,
9245	bpf_op_t bpf_op, struct netlink_ext_ack *extack,
9246	u32 flags, struct bpf_prog *prog)
9247	{
9248	struct netdev_bpf xdp;
9249	int err;
9250
9251	memset(&xdp, `0`, sizeof(xdp));
9252	xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
9253	xdp.extack = extack;
9254	xdp.flags = flags;
9255	xdp.prog = prog;
9256
9257	/ Drivers assume refcnt is already incremented (i.e, prog pointer is*
9258	* "moved" into driver), so they don't increment it on their own, but
9259	* they do decrement refcnt when program is detached or replaced.
9260	* Given net_device also owns link/prog, we need to bump refcnt here
9261	* to prevent drivers from underflowing it.
9262	*/
9263	if (prog)
9264	bpf_prog_inc(prog);
9265	err = bpf_op(dev, &xdp);
9266	if (err) {
9267	if (prog)
9268	bpf_prog_put(prog);
9269	return err;
9270	}
9271
9272	if (mode != XDP_MODE_HW)
9273	bpf_prog_change_xdp(prev_prog: dev_xdp_prog(dev, mode), prog);
9274
9275	return `0`;
9276	}
9277
9278	static void dev_xdp_uninstall(struct net_device *dev)
9279	{
9280	struct bpf_xdp_link *link;
9281	struct bpf_prog *prog;
9282	enum bpf_xdp_mode mode;
9283	bpf_op_t bpf_op;
9284
9285	ASSERT_RTNL();
9286
9287	for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
9288	prog = dev_xdp_prog(dev, mode);
9289	if (!prog)
9290	continue;
9291
9292	bpf_op = dev_xdp_bpf_op(dev, mode);
9293	if (!bpf_op)
9294	continue;
9295
9296	WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, `0`, NULL));
9297
9298	/ auto-detach link from net device /
9299	link = dev_xdp_link(dev, mode);
9300	if (link)
9301	link->dev = NULL;
9302	else
9303	bpf_prog_put(prog);
9304
9305	dev_xdp_set_link(dev, mode, NULL);
9306	}
9307	}
9308
9309	static int dev_xdp_attach(struct net_device dev, struct* netlink_ext_ack *extack,
9310	struct bpf_xdp_link link, struct* bpf_prog *new_prog,
9311	struct bpf_prog *old_prog, u32 flags)
9312	{
9313	unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
9314	struct bpf_prog *cur_prog;
9315	struct net_device *upper;
9316	struct list_head *iter;
9317	enum bpf_xdp_mode mode;
9318	bpf_op_t bpf_op;
9319	int err;
9320
9321	ASSERT_RTNL();
9322
9323	/ either link or prog attachment, never both /
9324	if (link && (new_prog \|\| old_prog))
9325	return -EINVAL;
9326	/ link supports only XDP mode flags /
9327	if (link && (flags & ~XDP_FLAGS_MODES)) {
9328	NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
9329	return -EINVAL;
9330	}
9331	/ just one XDP mode bit should be set, zero defaults to drv/skb mode /
9332	if (num_modes > `1`) {
9333	NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
9334	return -EINVAL;
9335	}
9336	/ avoid ambiguity if offload + drv/skb mode progs are both loaded /
9337	if (!num_modes && dev_xdp_prog_count(dev) > `1`) {
9338	NL_SET_ERR_MSG(extack,
9339	"More than one program loaded, unset mode is ambiguous");
9340	return -EINVAL;
9341	}
9342	/ old_prog != NULL implies XDP_FLAGS_REPLACE is set /
9343	if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
9344	NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
9345	return -EINVAL;
9346	}
9347
9348	mode = dev_xdp_mode(dev, flags);
9349	/ can't replace attached link /
9350	if (dev_xdp_link(dev, mode)) {
9351	NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link");
9352	return -EBUSY;
9353	}
9354
9355	/ don't allow if an upper device already has a program /
9356	netdev_for_each_upper_dev_rcu(dev, upper, iter) {
9357	if (dev_xdp_prog_count(upper) > `0`) {
9358	NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
9359	return -EEXIST;
9360	}
9361	}
9362
9363	cur_prog = dev_xdp_prog(dev, mode);
9364	/ can't replace attached prog with link /
9365	if (link && cur_prog) {
9366	NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link");
9367	return -EBUSY;
9368	}
9369	if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) {
9370	NL_SET_ERR_MSG(extack, "Active program does not match expected");
9371	return -EEXIST;
9372	}
9373
9374	/ put effective new program into new_prog /
9375	if (link)
9376	new_prog = link->link.prog;
9377
9378	if (new_prog) {
9379	bool offload = mode == XDP_MODE_HW;
9380	enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
9381	? XDP_MODE_DRV : XDP_MODE_SKB;
9382
9383	if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
9384	NL_SET_ERR_MSG(extack, "XDP program already attached");
9385	return -EBUSY;
9386	}
9387	if (!offload && dev_xdp_prog(dev, mode: other_mode)) {
9388	NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
9389	return -EEXIST;
9390	}
9391	if (!offload && bpf_prog_is_offloaded(aux: new_prog->aux)) {
9392	NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported");
9393	return -EINVAL;
9394	}
9395	if (bpf_prog_is_dev_bound(aux: new_prog->aux) && !bpf_offload_dev_match(prog: new_prog, netdev: dev)) {
9396	NL_SET_ERR_MSG(extack, "Program bound to different device");
9397	return -EINVAL;
9398	}
9399	if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
9400	NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
9401	return -EINVAL;
9402	}
9403	if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) {
9404	NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device");
9405	return -EINVAL;
9406	}
9407	}
9408
9409	/ don't call drivers if the effective program didn't change /
9410	if (new_prog != cur_prog) {
9411	bpf_op = dev_xdp_bpf_op(dev, mode);
9412	if (!bpf_op) {
9413	NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode");
9414	return -EOPNOTSUPP;
9415	}
9416
9417	err = dev_xdp_install(dev, mode, bpf_op, extack, flags, prog: new_prog);
9418	if (err)
9419	return err;
9420	}
9421
9422	if (link)
9423	dev_xdp_set_link(dev, mode, link);
9424	else
9425	dev_xdp_set_prog(dev, mode, prog: new_prog);
9426	if (cur_prog)
9427	bpf_prog_put(prog: cur_prog);
9428
9429	return `0`;
9430	}
9431
9432	static int dev_xdp_attach_link(struct net_device *dev,
9433	struct netlink_ext_ack *extack,
9434	struct bpf_xdp_link *link)
9435	{
9436	return dev_xdp_attach(dev, extack, link, NULL, NULL, flags: link->flags);
9437	}
9438
9439	static int dev_xdp_detach_link(struct net_device *dev,
9440	struct netlink_ext_ack *extack,
9441	struct bpf_xdp_link *link)
9442	{
9443	enum bpf_xdp_mode mode;
9444	bpf_op_t bpf_op;
9445
9446	ASSERT_RTNL();
9447
9448	mode = dev_xdp_mode(dev, flags: link->flags);
9449	if (dev_xdp_link(dev, mode) != link)
9450	return -EINVAL;
9451
9452	bpf_op = dev_xdp_bpf_op(dev, mode);
9453	WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, `0`, NULL));
9454	dev_xdp_set_link(dev, mode, NULL);
9455	return `0`;
9456	}
9457
9458	static void bpf_xdp_link_release(struct bpf_link *link)
9459	{
9460	struct bpf_xdp_link xdp_link = container_of(link, struct* bpf_xdp_link, link);
9461
9462	rtnl_lock();
9463
9464	/ if racing with net_device's tear down, xdp_link->dev might be*
9465	* already NULL, in which case link was already auto-detached
9466	*/
9467	if (xdp_link->dev) {
9468	WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
9469	xdp_link->dev = NULL;
9470	}
9471
9472	rtnl_unlock();
9473	}
9474
9475	static int bpf_xdp_link_detach(struct bpf_link *link)
9476	{
9477	bpf_xdp_link_release(link);
9478	return `0`;
9479	}
9480
9481	static void bpf_xdp_link_dealloc(struct bpf_link *link)
9482	{
9483	struct bpf_xdp_link xdp_link = container_of(link, struct* bpf_xdp_link, link);
9484
9485	kfree(objp: xdp_link);
9486	}
9487
9488	static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link,
9489	struct seq_file *seq)
9490	{
9491	struct bpf_xdp_link xdp_link = container_of(link, struct* bpf_xdp_link, link);
9492	u32 ifindex = `0`;
9493
9494	rtnl_lock();
9495	if (xdp_link->dev)
9496	ifindex = xdp_link->dev->ifindex;
9497	rtnl_unlock();
9498
9499	seq_printf(m: seq, fmt: "ifindex:\t%u\n", ifindex);
9500	}
9501
9502	static int bpf_xdp_link_fill_link_info(const struct bpf_link *link,
9503	struct bpf_link_info *info)
9504	{
9505	struct bpf_xdp_link xdp_link = container_of(link, struct* bpf_xdp_link, link);
9506	u32 ifindex = `0`;
9507
9508	rtnl_lock();
9509	if (xdp_link->dev)
9510	ifindex = xdp_link->dev->ifindex;
9511	rtnl_unlock();
9512
9513	info->xdp.ifindex = ifindex;
9514	return `0`;
9515	}
9516
9517	static int bpf_xdp_link_update(struct bpf_link link, struct* bpf_prog *new_prog,
9518	struct bpf_prog *old_prog)
9519	{
9520	struct bpf_xdp_link xdp_link = container_of(link, struct* bpf_xdp_link, link);
9521	enum bpf_xdp_mode mode;
9522	bpf_op_t bpf_op;
9523	int err = `0`;
9524
9525	rtnl_lock();
9526
9527	/ link might have been auto-released already, so fail /
9528	if (!xdp_link->dev) {
9529	err = -ENOLINK;
9530	goto out_unlock;
9531	}
9532
9533	if (old_prog && link->prog != old_prog) {
9534	err = -EPERM;
9535	goto out_unlock;
9536	}
9537	old_prog = link->prog;
9538	if (old_prog->type != new_prog->type \|\|
9539	old_prog->expected_attach_type != new_prog->expected_attach_type) {
9540	err = -EINVAL;
9541	goto out_unlock;
9542	}
9543
9544	if (old_prog == new_prog) {
9545	/ no-op, don't disturb drivers /
9546	bpf_prog_put(prog: new_prog);
9547	goto out_unlock;
9548	}
9549
9550	mode = dev_xdp_mode(dev: xdp_link->dev, flags: xdp_link->flags);
9551	bpf_op = dev_xdp_bpf_op(dev: xdp_link->dev, mode);
9552	err = dev_xdp_install(dev: xdp_link->dev, mode, bpf_op, NULL,
9553	flags: xdp_link->flags, prog: new_prog);
9554	if (err)
9555	goto out_unlock;
9556
9557	old_prog = xchg(&link->prog, new_prog);
9558	bpf_prog_put(prog: old_prog);
9559
9560	out_unlock:
9561	rtnl_unlock();
9562	return err;
9563	}
9564
9565	static const struct bpf_link_ops bpf_xdp_link_lops = {
9566	.release = bpf_xdp_link_release,
9567	.dealloc = bpf_xdp_link_dealloc,
9568	.detach = bpf_xdp_link_detach,
9569	.show_fdinfo = bpf_xdp_link_show_fdinfo,
9570	.fill_link_info = bpf_xdp_link_fill_link_info,
9571	.update_prog = bpf_xdp_link_update,
9572	};
9573
9574	int bpf_xdp_link_attach(const union bpf_attr attr, struct* bpf_prog *prog)
9575	{
9576	struct net *net = current->nsproxy->net_ns;
9577	struct bpf_link_primer link_primer;
9578	struct netlink_ext_ack extack = {};
9579	struct bpf_xdp_link *link;
9580	struct net_device *dev;
9581	int err, fd;
9582
9583	rtnl_lock();
9584	dev = dev_get_by_index(net, attr->link_create.target_ifindex);
9585	if (!dev) {
9586	rtnl_unlock();
9587	return -EINVAL;
9588	}
9589
9590	link = kzalloc(size: sizeof(*link), GFP_USER);
9591	if (!link) {
9592	err = -ENOMEM;
9593	goto unlock;
9594	}
9595
9596	bpf_link_init(link: &link->link, type: BPF_LINK_TYPE_XDP, ops: &bpf_xdp_link_lops, prog);
9597	link->dev = dev;
9598	link->flags = attr->link_create.flags;
9599
9600	err = bpf_link_prime(link: &link->link, primer: &link_primer);
9601	if (err) {
9602	kfree(objp: link);
9603	goto unlock;
9604	}
9605
9606	err = dev_xdp_attach_link(dev, extack: &extack, link);
9607	rtnl_unlock();
9608
9609	if (err) {
9610	link->dev = NULL;
9611	bpf_link_cleanup(primer: &link_primer);
9612	trace_bpf_xdp_link_attach_failed(msg: extack._msg);
9613	goto out_put_dev;
9614	}
9615
9616	fd = bpf_link_settle(primer: &link_primer);
9617	/ link itself doesn't hold dev's refcnt to not complicate shutdown /
9618	dev_put(dev);
9619	return fd;
9620
9621	unlock:
9622	rtnl_unlock();
9623
9624	out_put_dev:
9625	dev_put(dev);
9626	return err;
9627	}
9628
9629	/**
9630	* dev_change_xdp_fd - set or clear a bpf program for a device rx path
9631	* @dev: device
9632	* @extack: netlink extended ack
9633	* @fd: new program fd or negative value to clear
9634	* @expected_fd: old program fd that userspace expects to replace or clear
9635	* @flags: xdp-related flags
9636	*
9637	* Set or clear a bpf program for a device
9638	*/
9639	int dev_change_xdp_fd(struct net_device dev, struct* netlink_ext_ack *extack,
9640	int fd, int expected_fd, u32 flags)
9641	{
9642	enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags);
9643	struct bpf_prog new_prog = NULL, old_prog = NULL;
9644	int err;
9645
9646	ASSERT_RTNL();
9647
9648	if (fd >= `0`) {
9649	new_prog = bpf_prog_get_type_dev(ufd: fd, type: BPF_PROG_TYPE_XDP,
9650	attach_drv: mode != XDP_MODE_SKB);
9651	if (IS_ERR(ptr: new_prog))
9652	return PTR_ERR(ptr: new_prog);
9653	}
9654
9655	if (expected_fd >= `0`) {
9656	old_prog = bpf_prog_get_type_dev(ufd: expected_fd, type: BPF_PROG_TYPE_XDP,
9657	attach_drv: mode != XDP_MODE_SKB);
9658	if (IS_ERR(ptr: old_prog)) {
9659	err = PTR_ERR(ptr: old_prog);
9660	old_prog = NULL;
9661	goto err_out;
9662	}
9663	}
9664
9665	err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags);
9666
9667	err_out:
9668	if (err && new_prog)
9669	bpf_prog_put(prog: new_prog);
9670	if (old_prog)
9671	bpf_prog_put(prog: old_prog);
9672	return err;
9673	}
9674
9675	/**
9676	* dev_index_reserve() - allocate an ifindex in a namespace
9677	* @net: the applicable net namespace
9678	* @ifindex: requested ifindex, pass %0 to get one allocated
9679	*
9680	* Allocate a ifindex for a new device. Caller must either use the ifindex
9681	* to store the device (via list_netdevice()) or call dev_index_release()
9682	* to give the index up.
9683	*
9684	* Return: a suitable unique value for a new device interface number or -errno.
9685	*/
9686	static int dev_index_reserve(struct net *net, u32 ifindex)
9687	{
9688	int err;
9689
9690	if (ifindex > INT_MAX) {
9691	DEBUG_NET_WARN_ON_ONCE(`1`);
9692	return -EINVAL;
9693	}
9694
9695	if (!ifindex)
9696	err = xa_alloc_cyclic(xa: &net->dev_by_index, id: &ifindex, NULL,
9697	xa_limit_31b, next: &net->ifindex, GFP_KERNEL);
9698	else
9699	err = xa_insert(xa: &net->dev_by_index, index: ifindex, NULL, GFP_KERNEL);
9700	if (err < `0`)
9701	return err;
9702
9703	return ifindex;
9704	}
9705
9706	static void dev_index_release(struct net net, int* ifindex)
9707	{
9708	/ Expect only unused indexes, unlist_netdevice() removes the used /
9709	WARN_ON(xa_erase(&net->dev_by_index, ifindex));
9710	}
9711
9712	/ Delayed registration/unregisteration /
9713	LIST_HEAD(net_todo_list);
9714	DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
9715	atomic_t dev_unreg_count = ATOMIC_INIT(`0`);
9716
9717	static void net_set_todo(struct net_device *dev)
9718	{
9719	list_add_tail(new: &dev->todo_list, head: &net_todo_list);
9720	}
9721
9722	static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
9723	struct net_device *upper, netdev_features_t features)
9724	{
9725	netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
9726	netdev_features_t feature;
9727	int feature_bit;
9728
9729	for_each_netdev_feature(upper_disables, feature_bit) {
9730	feature = __NETIF_F_BIT(feature_bit);
9731	if (!(upper->wanted_features & feature)
9732	&& (features & feature)) {
9733	netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
9734	&feature, upper->name);
9735	features &= ~feature;
9736	}
9737	}
9738
9739	return features;
9740	}
9741
9742	static void netdev_sync_lower_features(struct net_device *upper,
9743	struct net_device *lower, netdev_features_t features)
9744	{
9745	netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
9746	netdev_features_t feature;
9747	int feature_bit;
9748
9749	for_each_netdev_feature(upper_disables, feature_bit) {
9750	feature = __NETIF_F_BIT(feature_bit);
9751	if (!(features & feature) && (lower->features & feature)) {
9752	netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
9753	&feature, lower->name);
9754	lower->wanted_features &= ~feature;
9755	__netdev_update_features(dev: lower);
9756
9757	if (unlikely(lower->features & feature))
9758	netdev_WARN(upper, "failed to disable %pNF on %s!\n",
9759	&feature, lower->name);
9760	else
9761	netdev_features_change(lower);
9762	}
9763	}
9764	}
9765
9766	static netdev_features_t netdev_fix_features(struct net_device *dev,
9767	netdev_features_t features)
9768	{
9769	/ Fix illegal checksum combinations /
9770	if ((features & NETIF_F_HW_CSUM) &&
9771	(features & (NETIF_F_IP_CSUM\|NETIF_F_IPV6_CSUM))) {
9772	netdev_warn(dev, format: "mixed HW and IP checksum settings.\n");
9773	features &= ~(NETIF_F_IP_CSUM\|NETIF_F_IPV6_CSUM);
9774	}
9775
9776	/ TSO requires that SG is present as well. /
9777	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
9778	netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
9779	features &= ~NETIF_F_ALL_TSO;
9780	}
9781
9782	if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
9783	!(features & NETIF_F_IP_CSUM)) {
9784	netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
9785	features &= ~NETIF_F_TSO;
9786	features &= ~NETIF_F_TSO_ECN;
9787	}
9788
9789	if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
9790	!(features & NETIF_F_IPV6_CSUM)) {
9791	netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
9792	features &= ~NETIF_F_TSO6;
9793	}
9794
9795	/ TSO with IPv4 ID mangling requires IPv4 TSO be enabled /
9796	if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
9797	features &= ~NETIF_F_TSO_MANGLEID;
9798
9799	/ TSO ECN requires that TSO is present as well. /
9800	if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
9801	features &= ~NETIF_F_TSO_ECN;
9802
9803	/ Software GSO depends on SG. /
9804	if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
9805	netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
9806	features &= ~NETIF_F_GSO;
9807	}
9808
9809	/ GSO partial features require GSO partial be set /
9810	if ((features & dev->gso_partial_features) &&
9811	!(features & NETIF_F_GSO_PARTIAL)) {
9812	netdev_dbg(dev,
9813	"Dropping partially supported GSO features since no GSO partial.\n");
9814	features &= ~dev->gso_partial_features;
9815	}
9816
9817	if (!(features & NETIF_F_RXCSUM)) {
9818	/ NETIF_F_GRO_HW implies doing RXCSUM since every packet*
9819	* successfully merged by hardware must also have the
9820	* checksum verified by hardware. If the user does not
9821	* want to enable RXCSUM, logically, we should disable GRO_HW.
9822	*/
9823	if (features & NETIF_F_GRO_HW) {
9824	netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
9825	features &= ~NETIF_F_GRO_HW;
9826	}
9827	}
9828
9829	/ LRO/HW-GRO features cannot be combined with RX-FCS /
9830	if (features & NETIF_F_RXFCS) {
9831	if (features & NETIF_F_LRO) {
9832	netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
9833	features &= ~NETIF_F_LRO;
9834	}
9835
9836	if (features & NETIF_F_GRO_HW) {
9837	netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
9838	features &= ~NETIF_F_GRO_HW;
9839	}
9840	}
9841
9842	if ((features & NETIF_F_GRO_HW) && (features & NETIF_F_LRO)) {
9843	netdev_dbg(dev, "Dropping LRO feature since HW-GRO is requested.\n");
9844	features &= ~NETIF_F_LRO;
9845	}
9846
9847	if (features & NETIF_F_HW_TLS_TX) {
9848	bool ip_csum = (features & (NETIF_F_IP_CSUM \| NETIF_F_IPV6_CSUM)) ==
9849	(NETIF_F_IP_CSUM \| NETIF_F_IPV6_CSUM);
9850	bool hw_csum = features & NETIF_F_HW_CSUM;
9851
9852	if (!ip_csum && !hw_csum) {
9853	netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
9854	features &= ~NETIF_F_HW_TLS_TX;
9855	}
9856	}
9857
9858	if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
9859	netdev_dbg(dev, "Dropping TLS RX HW offload feature since no RXCSUM feature.\n");
9860	features &= ~NETIF_F_HW_TLS_RX;
9861	}
9862
9863	return features;
9864	}
9865
9866	int __netdev_update_features(struct net_device *dev)
9867	{
9868	struct net_device upper, lower;
9869	netdev_features_t features;
9870	struct list_head *iter;
9871	int err = -`1`;
9872
9873	ASSERT_RTNL();
9874
9875	features = netdev_get_wanted_features(dev);
9876
9877	if (dev->netdev_ops->ndo_fix_features)
9878	features = dev->netdev_ops->ndo_fix_features(dev, features);
9879
9880	/ driver might be less strict about feature dependencies /
9881	features = netdev_fix_features(dev, features);
9882
9883	/ some features can't be enabled if they're off on an upper device /
9884	netdev_for_each_upper_dev_rcu(dev, upper, iter)
9885	features = netdev_sync_upper_features(lower: dev, upper, features);
9886
9887	if (dev->features == features)
9888	goto sync_lower;
9889
9890	netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
9891	&dev->features, &features);
9892
9893	if (dev->netdev_ops->ndo_set_features)
9894	err = dev->netdev_ops->ndo_set_features(dev, features);
9895	else
9896	err = `0`;
9897
9898	if (unlikely(err < `0`)) {
9899	netdev_err(dev,
9900	format: "set_features() failed (%d); wanted %pNF, left %pNF\n",
9901	err, &features, &dev->features);
9902	/ return non-0 since some features might have changed and*
9903	* it's better to fire a spurious notification than miss it
9904	*/
9905	return -`1`;
9906	}
9907
9908	sync_lower:
9909	/ some features must be disabled on lower devices when disabled*
9910	* on an upper device (think: bonding master or bridge)
9911	*/
9912	netdev_for_each_lower_dev(dev, lower, iter)
9913	netdev_sync_lower_features(upper: dev, lower, features);
9914
9915	if (!err) {
9916	netdev_features_t diff = features ^ dev->features;
9917
9918	if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
9919	/ udp_tunnel_{get,drop}_rx_info both need*
9920	* NETIF_F_RX_UDP_TUNNEL_PORT enabled on the
9921	* device, or they won't do anything.
9922	* Thus we need to update dev->features
9923	* before calling udp_tunnel_get_rx_info,
9924	* but after calling udp_tunnel_drop_rx_info.
9925	*/
9926	if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
9927	dev->features = features;
9928	udp_tunnel_get_rx_info(dev);
9929	} else {
9930	udp_tunnel_drop_rx_info(dev);
9931	}
9932	}
9933
9934	if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
9935	if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
9936	dev->features = features;
9937	err \|= vlan_get_rx_ctag_filter_info(dev);
9938	} else {
9939	vlan_drop_rx_ctag_filter_info(dev);
9940	}
9941	}
9942
9943	if (diff & NETIF_F_HW_VLAN_STAG_FILTER) {
9944	if (features & NETIF_F_HW_VLAN_STAG_FILTER) {
9945	dev->features = features;
9946	err \|= vlan_get_rx_stag_filter_info(dev);
9947	} else {
9948	vlan_drop_rx_stag_filter_info(dev);
9949	}
9950	}
9951
9952	dev->features = features;
9953	}
9954
9955	return err < `0` ? `0` : `1`;
9956	}
9957
9958	/**
9959	* netdev_update_features - recalculate device features
9960	* @dev: the device to check
9961	*
9962	* Recalculate dev->features set and send notifications if it
9963	* has changed. Should be called after driver or hardware dependent
9964	* conditions might have changed that influence the features.
9965	*/
9966	void netdev_update_features(struct net_device *dev)
9967	{
9968	if (__netdev_update_features(dev))
9969	netdev_features_change(dev);
9970	}
9971	EXPORT_SYMBOL(netdev_update_features);
9972
9973	/**
9974	* netdev_change_features - recalculate device features
9975	* @dev: the device to check
9976	*
9977	* Recalculate dev->features set and send notifications even
9978	* if they have not changed. Should be called instead of
9979	* netdev_update_features() if also dev->vlan_features might
9980	* have changed to allow the changes to be propagated to stacked
9981	* VLAN devices.
9982	*/
9983	void netdev_change_features(struct net_device *dev)
9984	{
9985	__netdev_update_features(dev);
9986	netdev_features_change(dev);
9987	}
9988	EXPORT_SYMBOL(netdev_change_features);
9989
9990	/**
9991	* netif_stacked_transfer_operstate - transfer operstate
9992	* @rootdev: the root or lower level device to transfer state from
9993	* @dev: the device to transfer operstate to
9994	*
9995	* Transfer operational state from root to device. This is normally
9996	* called when a stacking relationship exists between the root
9997	* device and the device(a leaf device).
9998	*/
9999	void netif_stacked_transfer_operstate(const struct net_device *rootdev,
10000	struct net_device *dev)
10001	{
10002	if (rootdev->operstate == IF_OPER_DORMANT)
10003	netif_dormant_on(dev);
10004	else
10005	netif_dormant_off(dev);
10006
10007	if (rootdev->operstate == IF_OPER_TESTING)
10008	netif_testing_on(dev);
10009	else
10010	netif_testing_off(dev);
10011
10012	if (netif_carrier_ok(dev: rootdev))
10013	netif_carrier_on(dev);
10014	else
10015	netif_carrier_off(dev);
10016	}
10017	EXPORT_SYMBOL(netif_stacked_transfer_operstate);
10018
10019	static int netif_alloc_rx_queues(struct net_device *dev)
10020	{
10021	unsigned int i, count = dev->num_rx_queues;
10022	struct netdev_rx_queue *rx;
10023	size_t sz = count * sizeof(*rx);
10024	int err = `0`;
10025
10026	BUG_ON(count < `1`);
10027
10028	rx = kvzalloc(size: sz, GFP_KERNEL_ACCOUNT \| __GFP_RETRY_MAYFAIL);
10029	if (!rx)
10030	return -ENOMEM;
10031
10032	dev->_rx = rx;
10033
10034	for (i = `0`; i < count; i++) {
10035	rx[i].dev = dev;
10036
10037	/ XDP RX-queue setup /
10038	err = xdp_rxq_info_reg(xdp_rxq: &rx[i].xdp_rxq, dev, queue_index: i, napi_id: `0`);
10039	if (err < `0`)
10040	goto err_rxq_info;
10041	}
10042	return `0`;
10043
10044	err_rxq_info:
10045	/ Rollback successful reg's and free other resources /
10046	while (i--)
10047	xdp_rxq_info_unreg(xdp_rxq: &rx[i].xdp_rxq);
10048	kvfree(addr: dev->_rx);
10049	dev->_rx = NULL;
10050	return err;
10051	}
10052
10053	static void netif_free_rx_queues(struct net_device *dev)
10054	{
10055	unsigned int i, count = dev->num_rx_queues;
10056
10057	/ netif_alloc_rx_queues alloc failed, resources have been unreg'ed /
10058	if (!dev->_rx)
10059	return;
10060
10061	for (i = `0`; i < count; i++)
10062	xdp_rxq_info_unreg(xdp_rxq: &dev->_rx[i].xdp_rxq);
10063
10064	kvfree(addr: dev->_rx);
10065	}
10066
10067	static void netdev_init_one_queue(struct net_device *dev,
10068	struct netdev_queue queue, void* *_unused)
10069	{
10070	/ Initialize queue lock /
10071	spin_lock_init(&queue->_xmit_lock);
10072	netdev_set_xmit_lockdep_class(lock: &queue->_xmit_lock, dev_type: dev->type);
10073	queue->xmit_lock_owner = -`1`;
10074	netdev_queue_numa_node_write(q: queue, NUMA_NO_NODE);
10075	queue->dev = dev;
10076	#ifdef CONFIG_BQL
10077	dql_init(dql: &queue->dql, HZ);
10078	#endif
10079	}
10080
10081	static void netif_free_tx_queues(struct net_device *dev)
10082	{
10083	kvfree(addr: dev->_tx);
10084	}
10085
10086	static int netif_alloc_netdev_queues(struct net_device *dev)
10087	{
10088	unsigned int count = dev->num_tx_queues;
10089	struct netdev_queue *tx;
10090	size_t sz = count * sizeof(*tx);
10091
10092	if (count < `1` \|\| count > `0xffff`)
10093	return -EINVAL;
10094
10095	tx = kvzalloc(size: sz, GFP_KERNEL_ACCOUNT \| __GFP_RETRY_MAYFAIL);
10096	if (!tx)
10097	return -ENOMEM;
10098
10099	dev->_tx = tx;
10100
10101	netdev_for_each_tx_queue(dev, f: netdev_init_one_queue, NULL);
10102	spin_lock_init(&dev->tx_global_lock);
10103
10104	return `0`;
10105	}
10106
10107	void netif_tx_stop_all_queues(struct net_device *dev)
10108	{
10109	unsigned int i;
10110
10111	for (i = `0`; i < dev->num_tx_queues; i++) {
10112	struct netdev_queue *txq = netdev_get_tx_queue(dev, index: i);
10113
10114	netif_tx_stop_queue(dev_queue: txq);
10115	}
10116	}
10117	EXPORT_SYMBOL(netif_tx_stop_all_queues);
10118
10119	static int netdev_do_alloc_pcpu_stats(struct net_device *dev)
10120	{
10121	void __percpu *v;
10122
10123	/ Drivers implementing ndo_get_peer_dev must support tstat*
10124	* accounting, so that skb_do_redirect() can bump the dev's
10125	* RX stats upon network namespace switch.
10126	*/
10127	if (dev->netdev_ops->ndo_get_peer_dev &&
10128	dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS)
10129	return -EOPNOTSUPP;
10130
10131	switch (dev->pcpu_stat_type) {
10132	case NETDEV_PCPU_STAT_NONE:
10133	return `0`;
10134	case NETDEV_PCPU_STAT_LSTATS:
10135	v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
10136	break;
10137	case NETDEV_PCPU_STAT_TSTATS:
10138	v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
10139	break;
10140	case NETDEV_PCPU_STAT_DSTATS:
10141	v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
10142	break;
10143	default:
10144	return -EINVAL;
10145	}
10146
10147	return v ? `0` : -ENOMEM;
10148	}
10149
10150	static void netdev_do_free_pcpu_stats(struct net_device *dev)
10151	{
10152	switch (dev->pcpu_stat_type) {
10153	case NETDEV_PCPU_STAT_NONE:
10154	return;
10155	case NETDEV_PCPU_STAT_LSTATS:
10156	free_percpu(pdata: dev->lstats);
10157	break;
10158	case NETDEV_PCPU_STAT_TSTATS:
10159	free_percpu(pdata: dev->tstats);
10160	break;
10161	case NETDEV_PCPU_STAT_DSTATS:
10162	free_percpu(pdata: dev->dstats);
10163	break;
10164	}
10165	}
10166
10167	/**
10168	* register_netdevice() - register a network device
10169	* @dev: device to register
10170	*
10171	* Take a prepared network device structure and make it externally accessible.
10172	* A %NETDEV_REGISTER message is sent to the netdev notifier chain.
10173	* Callers must hold the rtnl lock - you may want register_netdev()
10174	* instead of this.
10175	*/
10176	int register_netdevice(struct net_device *dev)
10177	{
10178	int ret;
10179	struct net *net = dev_net(dev);
10180
10181	BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE <
10182	NETDEV_FEATURE_COUNT);
10183	BUG_ON(dev_boot_phase);
10184	ASSERT_RTNL();
10185
10186	might_sleep();
10187
10188	/ When net_device's are persistent, this will be fatal. /
10189	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
10190	BUG_ON(!net);
10191
10192	ret = ethtool_check_ops(ops: dev->ethtool_ops);
10193	if (ret)
10194	return ret;
10195
10196	spin_lock_init(&dev->addr_list_lock);
10197	netdev_set_addr_lockdep_class(dev);
10198
10199	ret = dev_get_valid_name(net, dev, name: dev->name);
10200	if (ret < `0`)
10201	goto out;
10202
10203	ret = -ENOMEM;
10204	dev->name_node = netdev_name_node_head_alloc(dev);
10205	if (!dev->name_node)
10206	goto out;
10207
10208	/ Init, if this function is available /
10209	if (dev->netdev_ops->ndo_init) {
10210	ret = dev->netdev_ops->ndo_init(dev);
10211	if (ret) {
10212	if (ret > `0`)
10213	ret = -EIO;
10214	goto err_free_name;
10215	}
10216	}
10217
10218	if (((dev->hw_features \| dev->features) &
10219	NETIF_F_HW_VLAN_CTAG_FILTER) &&
10220	(!dev->netdev_ops->ndo_vlan_rx_add_vid \|\|
10221	!dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
10222	netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
10223	ret = -EINVAL;
10224	goto err_uninit;
10225	}
10226
10227	ret = netdev_do_alloc_pcpu_stats(dev);
10228	if (ret)
10229	goto err_uninit;
10230
10231	ret = dev_index_reserve(net, ifindex: dev->ifindex);
10232	if (ret < `0`)
10233	goto err_free_pcpu;
10234	dev->ifindex = ret;
10235
10236	/ Transfer changeable features to wanted_features and enable*
10237	* software offloads (GSO and GRO).
10238	*/
10239	dev->hw_features \|= (NETIF_F_SOFT_FEATURES \| NETIF_F_SOFT_FEATURES_OFF);
10240	dev->features \|= NETIF_F_SOFT_FEATURES;
10241
10242	if (dev->udp_tunnel_nic_info) {
10243	dev->features \|= NETIF_F_RX_UDP_TUNNEL_PORT;
10244	dev->hw_features \|= NETIF_F_RX_UDP_TUNNEL_PORT;
10245	}
10246
10247	dev->wanted_features = dev->features & dev->hw_features;
10248
10249	if (!(dev->flags & IFF_LOOPBACK))
10250	dev->hw_features \|= NETIF_F_NOCACHE_COPY;
10251
10252	/ If IPv4 TCP segmentation offload is supported we should also*
10253	* allow the device to enable segmenting the frame with the option
10254	* of ignoring a static IP ID value. This doesn't enable the
10255	* feature itself but allows the user to enable it later.
10256	*/
10257	if (dev->hw_features & NETIF_F_TSO)
10258	dev->hw_features \|= NETIF_F_TSO_MANGLEID;
10259	if (dev->vlan_features & NETIF_F_TSO)
10260	dev->vlan_features \|= NETIF_F_TSO_MANGLEID;
10261	if (dev->mpls_features & NETIF_F_TSO)
10262	dev->mpls_features \|= NETIF_F_TSO_MANGLEID;
10263	if (dev->hw_enc_features & NETIF_F_TSO)
10264	dev->hw_enc_features \|= NETIF_F_TSO_MANGLEID;
10265
10266	/ Make NETIF_F_HIGHDMA inheritable to VLAN devices.*
10267	*/
10268	dev->vlan_features \|= NETIF_F_HIGHDMA;
10269
10270	/ Make NETIF_F_SG inheritable to tunnel devices.*
10271	*/
10272	dev->hw_enc_features \|= NETIF_F_SG \| NETIF_F_GSO_PARTIAL;
10273
10274	/ Make NETIF_F_SG inheritable to MPLS.*
10275	*/
10276	dev->mpls_features \|= NETIF_F_SG;
10277
10278	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
10279	ret = notifier_to_errno(ret);
10280	if (ret)
10281	goto err_ifindex_release;
10282
10283	ret = netdev_register_kobject(dev);
10284
10285	WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED);
10286
10287	if (ret)
10288	goto err_uninit_notify;
10289
10290	__netdev_update_features(dev);
10291
10292	/*
10293	* Default initial state at registry is that the
10294	* device is present.
10295	*/
10296
10297	set_bit(nr: __LINK_STATE_PRESENT, addr: &dev->state);
10298
10299	linkwatch_init_dev(dev);
10300
10301	dev_init_scheduler(dev);
10302
10303	netdev_hold(dev, tracker: &dev->dev_registered_tracker, GFP_KERNEL);
10304	list_netdevice(dev);
10305
10306	add_device_randomness(buf: dev->dev_addr, len: dev->addr_len);
10307
10308	/ If the device has permanent device address, driver should*
10309	* set dev_addr and also addr_assign_type should be set to
10310	* NET_ADDR_PERM (default value).
10311	*/
10312	if (dev->addr_assign_type == NET_ADDR_PERM)
10313	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
10314
10315	/ Notify protocols, that a new device appeared. /
10316	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
10317	ret = notifier_to_errno(ret);
10318	if (ret) {
10319	/ Expect explicit free_netdev() on failure /
10320	dev->needs_free_netdev = false;
10321	unregister_netdevice_queue(dev, NULL);
10322	goto out;
10323	}
10324	/*
10325	* Prevent userspace races by waiting until the network
10326	* device is fully setup before sending notifications.
10327	*/
10328	if (!dev->rtnl_link_ops \|\|
10329	dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
10330	rtmsg_ifinfo(RTM_NEWLINK, dev, change: ~`0U`, GFP_KERNEL, portid: `0`, NULL);
10331
10332	out:
10333	return ret;
10334
10335	err_uninit_notify:
10336	call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
10337	err_ifindex_release:
10338	dev_index_release(net, ifindex: dev->ifindex);
10339	err_free_pcpu:
10340	netdev_do_free_pcpu_stats(dev);
10341	err_uninit:
10342	if (dev->netdev_ops->ndo_uninit)
10343	dev->netdev_ops->ndo_uninit(dev);
10344	if (dev->priv_destructor)
10345	dev->priv_destructor(dev);
10346	err_free_name:
10347	netdev_name_node_free(name_node: dev->name_node);
10348	goto out;
10349	}
10350	EXPORT_SYMBOL(register_netdevice);
10351
10352	/**
10353	* init_dummy_netdev - init a dummy network device for NAPI
10354	* @dev: device to init
10355	*
10356	* This takes a network device structure and initialize the minimum
10357	* amount of fields so it can be used to schedule NAPI polls without
10358	* registering a full blown interface. This is to be used by drivers
10359	* that need to tie several hardware interfaces to a single NAPI
10360	* poll scheduler due to HW limitations.
10361	*/
10362	void init_dummy_netdev(struct net_device *dev)
10363	{
10364	/ Clear everything. Note we don't initialize spinlocks*
10365	* are they aren't supposed to be taken by any of the
10366	* NAPI code and this dummy netdev is supposed to be
10367	* only ever used for NAPI polls
10368	*/
10369	memset(dev, `0`, sizeof(struct net_device));
10370
10371	/ make sure we BUG if trying to hit standard*
10372	* register/unregister code path
10373	*/
10374	dev->reg_state = NETREG_DUMMY;
10375
10376	/ NAPI wants this /
10377	INIT_LIST_HEAD(list: &dev->napi_list);
10378
10379	/ a dummy interface is started by default /
10380	set_bit(nr: __LINK_STATE_PRESENT, addr: &dev->state);
10381	set_bit(nr: __LINK_STATE_START, addr: &dev->state);
10382
10383	/ napi_busy_loop stats accounting wants this /
10384	dev_net_set(dev, net: &init_net);
10385
10386	/ Note : We dont allocate pcpu_refcnt for dummy devices,*
10387	* because users of this 'device' dont need to change
10388	* its refcount.
10389	*/
10390	}
10391	EXPORT_SYMBOL_GPL(init_dummy_netdev);
10392
10393
10394	/**
10395	* register_netdev - register a network device
10396	* @dev: device to register
10397	*
10398	* Take a completed network device structure and add it to the kernel
10399	* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
10400	* chain. 0 is returned on success. A negative errno code is returned
10401	* on a failure to set up the device, or if the name is a duplicate.
10402	*
10403	* This is a wrapper around register_netdevice that takes the rtnl semaphore
10404	* and expands the device name if you passed a format string to
10405	* alloc_netdev.
10406	*/
10407	int register_netdev(struct net_device *dev)
10408	{
10409	int err;
10410
10411	if (rtnl_lock_killable())
10412	return -EINTR;
10413	err = register_netdevice(dev);
10414	rtnl_unlock();
10415	return err;
10416	}
10417	EXPORT_SYMBOL(register_netdev);
10418
10419	int netdev_refcnt_read(const struct net_device *dev)
10420	{
10421	#ifdef CONFIG_PCPU_DEV_REFCNT
10422	int i, refcnt = `0`;
10423
10424	for_each_possible_cpu(i)
10425	refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
10426	return refcnt;
10427	#else
10428	return refcount_read(&dev->dev_refcnt);
10429	#endif
10430	}
10431	EXPORT_SYMBOL(netdev_refcnt_read);
10432
10433	int netdev_unregister_timeout_secs __read_mostly = `10`;
10434
10435	#define WAIT_REFS_MIN_MSECS 1
10436	#define WAIT_REFS_MAX_MSECS 250
10437	/**
10438	* netdev_wait_allrefs_any - wait until all references are gone.
10439	* @list: list of net_devices to wait on
10440	*
10441	* This is called when unregistering network devices.
10442	*
10443	* Any protocol or device that holds a reference should register
10444	* for netdevice notification, and cleanup and put back the
10445	* reference if they receive an UNREGISTER event.
10446	* We can get stuck here if buggy protocols don't correctly
10447	* call dev_put.
10448	*/
10449	static struct net_device netdev_wait_allrefs_any(struct* list_head *list)
10450	{
10451	unsigned long rebroadcast_time, warning_time;
10452	struct net_device *dev;
10453	int wait = `0`;
10454
10455	rebroadcast_time = warning_time = jiffies;
10456
10457	list_for_each_entry(dev, list, todo_list)
10458	if (netdev_refcnt_read(dev) == `1`)
10459	return dev;
10460
10461	while (true) {
10462	if (time_after(jiffies, rebroadcast_time + `1` * HZ)) {
10463	rtnl_lock();
10464
10465	/ Rebroadcast unregister notification /
10466	list_for_each_entry(dev, list, todo_list)
10467	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
10468
10469	__rtnl_unlock();
10470	rcu_barrier();
10471	rtnl_lock();
10472
10473	list_for_each_entry(dev, list, todo_list)
10474	if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
10475	&dev->state)) {
10476	/ We must not have linkwatch events*
10477	* pending on unregister. If this
10478	* happens, we simply run the queue
10479	* unscheduled, resulting in a noop
10480	* for this device.
10481	*/
10482	linkwatch_run_queue();
10483	break;
10484	}
10485
10486	__rtnl_unlock();
10487
10488	rebroadcast_time = jiffies;
10489	}
10490
10491	if (!wait) {
10492	rcu_barrier();
10493	wait = WAIT_REFS_MIN_MSECS;
10494	} else {
10495	msleep(msecs: wait);
10496	wait = min(wait << `1`, WAIT_REFS_MAX_MSECS);
10497	}
10498
10499	list_for_each_entry(dev, list, todo_list)
10500	if (netdev_refcnt_read(dev) == `1`)
10501	return dev;
10502
10503	if (time_after(jiffies, warning_time +
10504	READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
10505	list_for_each_entry(dev, list, todo_list) {
10506	pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
10507	dev->name, netdev_refcnt_read(dev));
10508	ref_tracker_dir_print(dir: &dev->refcnt_tracker, display_limit: `10`);
10509	}
10510
10511	warning_time = jiffies;
10512	}
10513	}
10514	}
10515
10516	/ The sequence is:*
10517	*
10518	* rtnl_lock();
10519	* ...
10520	* register_netdevice(x1);
10521	* register_netdevice(x2);
10522	* ...
10523	* unregister_netdevice(y1);
10524	* unregister_netdevice(y2);
10525	* ...
10526	* rtnl_unlock();
10527	* free_netdev(y1);
10528	* free_netdev(y2);
10529	*
10530	* We are invoked by rtnl_unlock().
10531	* This allows us to deal with problems:
10532	* 1) We can delete sysfs objects which invoke hotplug
10533	* without deadlocking with linkwatch via keventd.
10534	* 2) Since we run with the RTNL semaphore not held, we can sleep
10535	* safely in order to wait for the netdev refcnt to drop to zero.
10536	*
10537	* We must not return until all unregister events added during
10538	* the interval the lock was held have been completed.
10539	*/
10540	void netdev_run_todo(void)
10541	{
10542	struct net_device dev, tmp;
10543	struct list_head list;
10544	int cnt;
10545	#ifdef CONFIG_LOCKDEP
10546	struct list_head unlink_list;
10547
10548	list_replace_init(old: &net_unlink_list, new: &unlink_list);
10549
10550	while (!list_empty(head: &unlink_list)) {
10551	struct net_device *dev = list_first_entry(&unlink_list,
10552	struct net_device,
10553	unlink_list);
10554	list_del_init(entry: &dev->unlink_list);
10555	dev->nested_level = dev->lower_level - `1`;
10556	}
10557	#endif
10558
10559	/ Snapshot list, allow later requests /
10560	list_replace_init(old: &net_todo_list, new: &list);
10561
10562	__rtnl_unlock();
10563
10564	/ Wait for rcu callbacks to finish before next phase /
10565	if (!list_empty(head: &list))
10566	rcu_barrier();
10567
10568	list_for_each_entry_safe(dev, tmp, &list, todo_list) {
10569	if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
10570	netdev_WARN(dev, "run_todo but not unregistering\n");
10571	list_del(entry: &dev->todo_list);
10572	continue;
10573	}
10574
10575	WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
10576	linkwatch_sync_dev(dev);
10577	}
10578
10579	cnt = `0`;
10580	while (!list_empty(head: &list)) {
10581	dev = netdev_wait_allrefs_any(list: &list);
10582	list_del(entry: &dev->todo_list);
10583
10584	/ paranoia /
10585	BUG_ON(netdev_refcnt_read(dev) != `1`);
10586	BUG_ON(!list_empty(&dev->ptype_all));
10587	BUG_ON(!list_empty(&dev->ptype_specific));
10588	WARN_ON(rcu_access_pointer(dev->ip_ptr));
10589	WARN_ON(rcu_access_pointer(dev->ip6_ptr));
10590
10591	netdev_do_free_pcpu_stats(dev);
10592	if (dev->priv_destructor)
10593	dev->priv_destructor(dev);
10594	if (dev->needs_free_netdev)
10595	free_netdev(dev);
10596
10597	cnt++;
10598
10599	/ Free network device /
10600	kobject_put(kobj: &dev->dev.kobj);
10601	}
10602	if (cnt && atomic_sub_and_test(i: cnt, v: &dev_unreg_count))
10603	wake_up(&netdev_unregistering_wq);
10604	}
10605
10606	/ Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has*
10607	* all the same fields in the same order as net_device_stats, with only
10608	* the type differing, but rtnl_link_stats64 may have additional fields
10609	* at the end for newer counters.
10610	*/
10611	void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
10612	const struct net_device_stats *netdev_stats)
10613	{
10614	size_t i, n = sizeof(netdev_stats) / sizeof*(atomic_long_t);
10615	const atomic_long_t src = (atomic_long_t )netdev_stats;
10616	u64 dst = (u64 )stats64;
10617
10618	BUILD_BUG_ON(n > sizeof(stats64) / sizeof*(u64));
10619	for (i = `0`; i < n; i++)
10620	dst[i] = (unsigned long)atomic_long_read(v: &src[i]);
10621	/ zero out counters that only exist in rtnl_link_stats64 /
10622	memset((char )stats64 + n sizeof(u64), `0`,
10623	sizeof(stats64) - n sizeof(u64));
10624	}
10625	EXPORT_SYMBOL(netdev_stats_to_stats64);
10626
10627	static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc(
10628	struct net_device *dev)
10629	{
10630	struct net_device_core_stats __percpu *p;
10631
10632	p = alloc_percpu_gfp(struct net_device_core_stats,
10633	GFP_ATOMIC \| __GFP_NOWARN);
10634
10635	if (p && cmpxchg(&dev->core_stats, NULL, p))
10636	free_percpu(pdata: p);
10637
10638	/ This READ_ONCE() pairs with the cmpxchg() above /
10639	return READ_ONCE(dev->core_stats);
10640	}
10641
10642	noinline void netdev_core_stats_inc(struct net_device *dev, u32 offset)
10643	{
10644	/ This READ_ONCE() pairs with the write in netdev_core_stats_alloc() /
10645	struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
10646	unsigned long __percpu *field;
10647
10648	if (unlikely(!p)) {
10649	p = netdev_core_stats_alloc(dev);
10650	if (!p)
10651	return;
10652	}
10653
10654	field = (__force unsigned long __percpu )((__force void* *)p + offset);
10655	this_cpu_inc(*field);
10656	}
10657	EXPORT_SYMBOL_GPL(netdev_core_stats_inc);
10658
10659	/**
10660	* dev_get_stats - get network device statistics
10661	* @dev: device to get statistics from
10662	* @storage: place to store stats
10663	*
10664	* Get network statistics from device. Return @storage.
10665	* The device driver may provide its own method by setting
10666	* dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
10667	* otherwise the internal statistics structure is used.
10668	*/
10669	struct rtnl_link_stats64 dev_get_stats(struct* net_device *dev,
10670	struct rtnl_link_stats64 *storage)
10671	{
10672	const struct net_device_ops *ops = dev->netdev_ops;
10673	const struct net_device_core_stats __percpu *p;
10674
10675	if (ops->ndo_get_stats64) {
10676	memset(storage, `0`, sizeof(*storage));
10677	ops->ndo_get_stats64(dev, storage);
10678	} else if (ops->ndo_get_stats) {
10679	netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
10680	} else if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) {
10681	dev_get_tstats64(dev, s: storage);
10682	} else {
10683	netdev_stats_to_stats64(storage, &dev->stats);
10684	}
10685
10686	/ This READ_ONCE() pairs with the write in netdev_core_stats_alloc() /
10687	p = READ_ONCE(dev->core_stats);
10688	if (p) {
10689	const struct net_device_core_stats *core_stats;
10690	int i;
10691
10692	for_each_possible_cpu(i) {
10693	core_stats = per_cpu_ptr(p, i);
10694	storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
10695	storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
10696	storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
10697	storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped);
10698	}
10699	}
10700	return storage;
10701	}
10702	EXPORT_SYMBOL(dev_get_stats);
10703
10704	/**
10705	* dev_fetch_sw_netstats - get per-cpu network device statistics
10706	* @s: place to store stats
10707	* @netstats: per-cpu network stats to read from
10708	*
10709	* Read per-cpu network statistics and populate the related fields in @s.
10710	*/
10711	void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
10712	const struct pcpu_sw_netstats __percpu *netstats)
10713	{
10714	int cpu;
10715
10716	for_each_possible_cpu(cpu) {
10717	u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
10718	const struct pcpu_sw_netstats *stats;
10719	unsigned int start;
10720
10721	stats = per_cpu_ptr(netstats, cpu);
10722	do {
10723	start = u64_stats_fetch_begin(syncp: &stats->syncp);
10724	rx_packets = u64_stats_read(p: &stats->rx_packets);
10725	rx_bytes = u64_stats_read(p: &stats->rx_bytes);
10726	tx_packets = u64_stats_read(p: &stats->tx_packets);
10727	tx_bytes = u64_stats_read(p: &stats->tx_bytes);
10728	} while (u64_stats_fetch_retry(syncp: &stats->syncp, start));
10729
10730	s->rx_packets += rx_packets;
10731	s->rx_bytes += rx_bytes;
10732	s->tx_packets += tx_packets;
10733	s->tx_bytes += tx_bytes;
10734	}
10735	}
10736	EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats);
10737
10738	/**
10739	* dev_get_tstats64 - ndo_get_stats64 implementation
10740	* @dev: device to get statistics from
10741	* @s: place to store stats
10742	*
10743	* Populate @s from dev->stats and dev->tstats. Can be used as
10744	* ndo_get_stats64() callback.
10745	*/
10746	void dev_get_tstats64(struct net_device dev, struct* rtnl_link_stats64 *s)
10747	{
10748	netdev_stats_to_stats64(s, &dev->stats);
10749	dev_fetch_sw_netstats(s, dev->tstats);
10750	}
10751	EXPORT_SYMBOL_GPL(dev_get_tstats64);
10752
10753	struct netdev_queue dev_ingress_queue_create(struct* net_device *dev)
10754	{
10755	struct netdev_queue *queue = dev_ingress_queue(dev);
10756
10757	#ifdef CONFIG_NET_CLS_ACT
10758	if (queue)
10759	return queue;
10760	queue = kzalloc(size: sizeof(*queue), GFP_KERNEL);
10761	if (!queue)
10762	return NULL;
10763	netdev_init_one_queue(dev, queue, NULL);
10764	RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
10765	RCU_INIT_POINTER(queue->qdisc_sleeping, &noop_qdisc);
10766	rcu_assign_pointer(dev->ingress_queue, queue);
10767	#endif
10768	return queue;
10769	}
10770
10771	static const struct ethtool_ops default_ethtool_ops;
10772
10773	void netdev_set_default_ethtool_ops(struct net_device *dev,
10774	const struct ethtool_ops *ops)
10775	{
10776	if (dev->ethtool_ops == &default_ethtool_ops)
10777	dev->ethtool_ops = ops;
10778	}
10779	EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
10780
10781	/**
10782	* netdev_sw_irq_coalesce_default_on() - enable SW IRQ coalescing by default
10783	* @dev: netdev to enable the IRQ coalescing on
10784	*
10785	* Sets a conservative default for SW IRQ coalescing. Users can use
10786	* sysfs attributes to override the default values.
10787	*/
10788	void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
10789	{
10790	WARN_ON(dev->reg_state == NETREG_REGISTERED);
10791
10792	if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
10793	dev->gro_flush_timeout = `20000`;
10794	dev->napi_defer_hard_irqs = `1`;
10795	}
10796	}
10797	EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
10798
10799	void netdev_freemem(struct net_device *dev)
10800	{
10801	char addr = (char* *)dev - dev->padded;
10802
10803	kvfree(addr);
10804	}
10805
10806	/**
10807	* alloc_netdev_mqs - allocate network device
10808	* @sizeof_priv: size of private data to allocate space for
10809	* @name: device name format string
10810	* @name_assign_type: origin of device name
10811	* @setup: callback to initialize device
10812	* @txqs: the number of TX subqueues to allocate
10813	* @rxqs: the number of RX subqueues to allocate
10814	*
10815	* Allocates a struct net_device with private data area for driver use
10816	* and performs basic initialization. Also allocates subqueue structs
10817	* for each queue on the device.
10818	*/
10819	struct net_device alloc_netdev_mqs(int* sizeof_priv, const char *name,
10820	unsigned char name_assign_type,
10821	void (setup)(struct* net_device *),
10822	unsigned int txqs, unsigned int rxqs)
10823	{
10824	struct net_device *dev;
10825	unsigned int alloc_size;
10826	struct net_device *p;
10827
10828	BUG_ON(strlen(name) >= sizeof(dev->name));
10829
10830	if (txqs < `1`) {
10831	pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
10832	return NULL;
10833	}
10834
10835	if (rxqs < `1`) {
10836	pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
10837	return NULL;
10838	}
10839
10840	alloc_size = sizeof(struct net_device);
10841	if (sizeof_priv) {
10842	/ ensure 32-byte alignment of private area /
10843	alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
10844	alloc_size += sizeof_priv;
10845	}
10846	/ ensure 32-byte alignment of whole construct /
10847	alloc_size += NETDEV_ALIGN - `1`;
10848
10849	p = kvzalloc(size: alloc_size, GFP_KERNEL_ACCOUNT \| __GFP_RETRY_MAYFAIL);
10850	if (!p)
10851	return NULL;
10852
10853	dev = PTR_ALIGN(p, NETDEV_ALIGN);
10854	dev->padded = (char )dev - (char* *)p;
10855
10856	ref_tracker_dir_init(dir: &dev->refcnt_tracker, quarantine_count: `128`, name);
10857	#ifdef CONFIG_PCPU_DEV_REFCNT
10858	dev->pcpu_refcnt = alloc_percpu(int);
10859	if (!dev->pcpu_refcnt)
10860	goto free_dev;
10861	__dev_hold(dev);
10862	#else
10863	refcount_set(&dev->dev_refcnt, `1`);
10864	#endif
10865
10866	if (dev_addr_init(dev))
10867	goto free_pcpu;
10868
10869	dev_mc_init(dev);
10870	dev_uc_init(dev);
10871
10872	dev_net_set(dev, net: &init_net);
10873
10874	dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
10875	dev->xdp_zc_max_segs = `1`;
10876	dev->gso_max_segs = GSO_MAX_SEGS;
10877	dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
10878	dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
10879	dev->gro_ipv4_max_size = GRO_LEGACY_MAX_SIZE;
10880	dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
10881	dev->tso_max_segs = TSO_MAX_SEGS;
10882	dev->upper_level = `1`;
10883	dev->lower_level = `1`;
10884	#ifdef CONFIG_LOCKDEP
10885	dev->nested_level = `0`;
10886	INIT_LIST_HEAD(list: &dev->unlink_list);
10887	#endif
10888
10889	INIT_LIST_HEAD(list: &dev->napi_list);
10890	INIT_LIST_HEAD(list: &dev->unreg_list);
10891	INIT_LIST_HEAD(list: &dev->close_list);
10892	INIT_LIST_HEAD(list: &dev->link_watch_list);
10893	INIT_LIST_HEAD(list: &dev->adj_list.upper);
10894	INIT_LIST_HEAD(list: &dev->adj_list.lower);
10895	INIT_LIST_HEAD(list: &dev->ptype_all);
10896	INIT_LIST_HEAD(list: &dev->ptype_specific);
10897	INIT_LIST_HEAD(list: &dev->net_notifier_list);
10898	#ifdef CONFIG_NET_SCHED
10899	hash_init(dev->qdisc_hash);
10900	#endif
10901	dev->priv_flags = IFF_XMIT_DST_RELEASE \| IFF_XMIT_DST_RELEASE_PERM;
10902	setup(dev);
10903
10904	if (!dev->tx_queue_len) {
10905	dev->priv_flags \|= IFF_NO_QUEUE;
10906	dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
10907	}
10908
10909	dev->num_tx_queues = txqs;
10910	dev->real_num_tx_queues = txqs;
10911	if (netif_alloc_netdev_queues(dev))
10912	goto free_all;
10913
10914	dev->num_rx_queues = rxqs;
10915	dev->real_num_rx_queues = rxqs;
10916	if (netif_alloc_rx_queues(dev))
10917	goto free_all;
10918
10919	strcpy(p: dev->name, q: name);
10920	dev->name_assign_type = name_assign_type;
10921	dev->group = INIT_NETDEV_GROUP;
10922	if (!dev->ethtool_ops)
10923	dev->ethtool_ops = &default_ethtool_ops;
10924
10925	nf_hook_netdev_init(dev);
10926
10927	return dev;
10928
10929	free_all:
10930	free_netdev(dev);
10931	return NULL;
10932
10933	free_pcpu:
10934	#ifdef CONFIG_PCPU_DEV_REFCNT
10935	free_percpu(pdata: dev->pcpu_refcnt);
10936	free_dev:
10937	#endif
10938	netdev_freemem(dev);
10939	return NULL;
10940	}
10941	EXPORT_SYMBOL(alloc_netdev_mqs);
10942
10943	/**
10944	* free_netdev - free network device
10945	* @dev: device
10946	*
10947	* This function does the last stage of destroying an allocated device
10948	* interface. The reference to the device object is released. If this
10949	* is the last reference then it will be freed.Must be called in process
10950	* context.
10951	*/
10952	void free_netdev(struct net_device *dev)
10953	{
10954	struct napi_struct p, n;
10955
10956	might_sleep();
10957
10958	/ When called immediately after register_netdevice() failed the unwind*
10959	* handling may still be dismantling the device. Handle that case by
10960	* deferring the free.
10961	*/
10962	if (dev->reg_state == NETREG_UNREGISTERING) {
10963	ASSERT_RTNL();
10964	dev->needs_free_netdev = true;
10965	return;
10966	}
10967
10968	netif_free_tx_queues(dev);
10969	netif_free_rx_queues(dev);
10970
10971	kfree(rcu_dereference_protected(dev->ingress_queue, `1`));
10972
10973	/ Flush device addresses /
10974	dev_addr_flush(dev);
10975
10976	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
10977	netif_napi_del(napi: p);
10978
10979	ref_tracker_dir_exit(dir: &dev->refcnt_tracker);
10980	#ifdef CONFIG_PCPU_DEV_REFCNT
10981	free_percpu(pdata: dev->pcpu_refcnt);
10982	dev->pcpu_refcnt = NULL;
10983	#endif
10984	free_percpu(pdata: dev->core_stats);
10985	dev->core_stats = NULL;
10986	free_percpu(pdata: dev->xdp_bulkq);
10987	dev->xdp_bulkq = NULL;
10988
10989	/ Compatibility with error handling in drivers /
10990	if (dev->reg_state == NETREG_UNINITIALIZED) {
10991	netdev_freemem(dev);
10992	return;
10993	}
10994
10995	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
10996	WRITE_ONCE(dev->reg_state, NETREG_RELEASED);
10997
10998	/ will free via device release /
10999	put_device(dev: &dev->dev);
11000	}
11001	EXPORT_SYMBOL(free_netdev);
11002
11003	/**
11004	* synchronize_net - Synchronize with packet receive processing
11005	*
11006	* Wait for packets currently being received to be done.
11007	* Does not block later packets from starting.
11008	*/
11009	void synchronize_net(void)
11010	{
11011	might_sleep();
11012	if (rtnl_is_locked())
11013	synchronize_rcu_expedited();
11014	else
11015	synchronize_rcu();
11016	}
11017	EXPORT_SYMBOL(synchronize_net);
11018
11019	/**
11020	* unregister_netdevice_queue - remove device from the kernel
11021	* @dev: device
11022	* @head: list
11023	*
11024	* This function shuts down a device interface and removes it
11025	* from the kernel tables.
11026	* If head not NULL, device is queued to be unregistered later.
11027	*
11028	* Callers must hold the rtnl semaphore. You may want
11029	* unregister_netdev() instead of this.
11030	*/
11031
11032	void unregister_netdevice_queue(struct net_device dev, struct* list_head *head)
11033	{
11034	ASSERT_RTNL();
11035
11036	if (head) {
11037	list_move_tail(list: &dev->unreg_list, head);
11038	} else {
11039	LIST_HEAD(single);
11040
11041	list_add(new: &dev->unreg_list, head: &single);
11042	unregister_netdevice_many(head: &single);
11043	}
11044	}
11045	EXPORT_SYMBOL(unregister_netdevice_queue);
11046
11047	void unregister_netdevice_many_notify(struct list_head *head,
11048	u32 portid, const struct nlmsghdr *nlh)
11049	{
11050	struct net_device dev, tmp;
11051	LIST_HEAD(close_head);
11052	int cnt = `0`;
11053
11054	BUG_ON(dev_boot_phase);
11055	ASSERT_RTNL();
11056
11057	if (list_empty(head))
11058	return;
11059
11060	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
11061	/ Some devices call without registering*
11062	* for initialization unwind. Remove those
11063	* devices and proceed with the remaining.
11064	*/
11065	if (dev->reg_state == NETREG_UNINITIALIZED) {
11066	pr_debug("unregister_netdevice: device %s/%p never was registered\n",
11067	dev->name, dev);
11068
11069	WARN_ON(`1`);
11070	list_del(entry: &dev->unreg_list);
11071	continue;
11072	}
11073	dev->dismantle = true;
11074	BUG_ON(dev->reg_state != NETREG_REGISTERED);
11075	}
11076
11077	/ If device is running, close it first. /
11078	list_for_each_entry(dev, head, unreg_list)
11079	list_add_tail(new: &dev->close_list, head: &close_head);
11080	dev_close_many(&close_head, true);
11081
11082	list_for_each_entry(dev, head, unreg_list) {
11083	/ And unlink it from device chain. /
11084	unlist_netdevice(dev);
11085	WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
11086	}
11087	flush_all_backlogs();
11088
11089	synchronize_net();
11090
11091	list_for_each_entry(dev, head, unreg_list) {
11092	struct sk_buff *skb = NULL;
11093
11094	/ Shutdown queueing discipline. /
11095	dev_shutdown(dev);
11096	dev_tcx_uninstall(dev);
11097	dev_xdp_uninstall(dev);
11098	bpf_dev_bound_netdev_unregister(dev);
11099
11100	netdev_offload_xstats_disable_all(dev);
11101
11102	/ Notify protocols, that we are about to destroy*
11103	* this device. They should clean all the things.
11104	*/
11105	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
11106
11107	if (!dev->rtnl_link_ops \|\|
11108	dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
11109	skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, change: ~`0U`, event: `0`,
11110	GFP_KERNEL, NULL, new_ifindex: `0`,
11111	portid, nlh);
11112
11113	/*
11114	* Flush the unicast and multicast chains
11115	*/
11116	dev_uc_flush(dev);
11117	dev_mc_flush(dev);
11118
11119	netdev_name_node_alt_flush(dev);
11120	netdev_name_node_free(name_node: dev->name_node);
11121
11122	call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
11123
11124	if (dev->netdev_ops->ndo_uninit)
11125	dev->netdev_ops->ndo_uninit(dev);
11126
11127	if (skb)
11128	rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
11129
11130	/ Notifier chain MUST detach us all upper devices. /
11131	WARN_ON(netdev_has_any_upper_dev(dev));
11132	WARN_ON(netdev_has_any_lower_dev(dev));
11133
11134	/ Remove entries from kobject tree /
11135	netdev_unregister_kobject(dev);
11136	#ifdef CONFIG_XPS
11137	/ Remove XPS queueing entries /
11138	netif_reset_xps_queues_gt(dev, index: `0`);
11139	#endif
11140	}
11141
11142	synchronize_net();
11143
11144	list_for_each_entry(dev, head, unreg_list) {
11145	netdev_put(dev, tracker: &dev->dev_registered_tracker);
11146	net_set_todo(dev);
11147	cnt++;
11148	}
11149	atomic_add(i: cnt, v: &dev_unreg_count);
11150
11151	list_del(entry: head);
11152	}
11153
11154	/**
11155	* unregister_netdevice_many - unregister many devices
11156	* @head: list of devices
11157	*
11158	* Note: As most callers use a stack allocated list_head,
11159	* we force a list_del() to make sure stack wont be corrupted later.
11160	*/
11161	void unregister_netdevice_many(struct list_head *head)
11162	{
11163	unregister_netdevice_many_notify(head, portid: `0`, NULL);
11164	}
11165	EXPORT_SYMBOL(unregister_netdevice_many);
11166
11167	/**
11168	* unregister_netdev - remove device from the kernel
11169	* @dev: device
11170	*
11171	* This function shuts down a device interface and removes it
11172	* from the kernel tables.
11173	*
11174	* This is just a wrapper for unregister_netdevice that takes
11175	* the rtnl semaphore. In general you want to use this and not
11176	* unregister_netdevice.
11177	*/
11178	void unregister_netdev(struct net_device *dev)
11179	{
11180	rtnl_lock();
11181	unregister_netdevice(dev);
11182	rtnl_unlock();
11183	}
11184	EXPORT_SYMBOL(unregister_netdev);
11185
11186	/**
11187	* __dev_change_net_namespace - move device to different nethost namespace
11188	* @dev: device
11189	* @net: network namespace
11190	* @pat: If not NULL name pattern to try if the current device name
11191	* is already taken in the destination network namespace.
11192	* @new_ifindex: If not zero, specifies device index in the target
11193	* namespace.
11194	*
11195	* This function shuts down a device interface and moves it
11196	* to a new network namespace. On success 0 is returned, on
11197	* a failure a netagive errno code is returned.
11198	*
11199	* Callers must hold the rtnl semaphore.
11200	*/
11201
11202	int __dev_change_net_namespace(struct net_device dev, struct* net *net,
11203	const char pat, int* new_ifindex)
11204	{
11205	struct netdev_name_node *name_node;
11206	struct net *net_old = dev_net(dev);
11207	char new_name[IFNAMSIZ] = {};
11208	int err, new_nsid;
11209
11210	ASSERT_RTNL();
11211
11212	/ Don't allow namespace local devices to be moved. /
11213	err = -EINVAL;
11214	if (dev->features & NETIF_F_NETNS_LOCAL)
11215	goto out;
11216
11217	/ Ensure the device has been registrered /
11218	if (dev->reg_state != NETREG_REGISTERED)
11219	goto out;
11220
11221	/ Get out if there is nothing todo /
11222	err = `0`;
11223	if (net_eq(net1: net_old, net2: net))
11224	goto out;
11225
11226	/ Pick the destination device name, and ensure*
11227	* we can use it in the destination network namespace.
11228	*/
11229	err = -EEXIST;
11230	if (netdev_name_in_use(net, dev->name)) {
11231	/ We get here if we can't use the current device name /
11232	if (!pat)
11233	goto out;
11234	err = dev_prep_valid_name(net, dev, want_name: pat, out_name: new_name, EEXIST);
11235	if (err < `0`)
11236	goto out;
11237	}
11238	/ Check that none of the altnames conflicts. /
11239	err = -EEXIST;
11240	netdev_for_each_altname(dev, name_node)
11241	if (netdev_name_in_use(net, name_node->name))
11242	goto out;
11243
11244	/ Check that new_ifindex isn't used yet. /
11245	if (new_ifindex) {
11246	err = dev_index_reserve(net, ifindex: new_ifindex);
11247	if (err < `0`)
11248	goto out;
11249	} else {
11250	/ If there is an ifindex conflict assign a new one /
11251	err = dev_index_reserve(net, ifindex: dev->ifindex);
11252	if (err == -EBUSY)
11253	err = dev_index_reserve(net, ifindex: `0`);
11254	if (err < `0`)
11255	goto out;
11256	new_ifindex = err;
11257	}
11258
11259	/*
11260	* And now a mini version of register_netdevice unregister_netdevice.
11261	*/
11262
11263	/ If device is running close it first. /
11264	dev_close(dev);
11265
11266	/ And unlink it from device chain /
11267	unlist_netdevice(dev);
11268
11269	synchronize_net();
11270
11271	/ Shutdown queueing discipline. /
11272	dev_shutdown(dev);
11273
11274	/ Notify protocols, that we are about to destroy*
11275	* this device. They should clean all the things.
11276	*
11277	* Note that dev->reg_state stays at NETREG_REGISTERED.
11278	* This is wanted because this way 8021q and macvlan know
11279	* the device is just moving and can keep their slaves up.
11280	*/
11281	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
11282	rcu_barrier();
11283
11284	new_nsid = peernet2id_alloc(net: dev_net(dev), peer: net, GFP_KERNEL);
11285
11286	rtmsg_ifinfo_newnet(RTM_DELLINK, dev, change: ~`0U`, GFP_KERNEL, new_nsid: &new_nsid,
11287	new_ifindex);
11288
11289	/*
11290	* Flush the unicast and multicast chains
11291	*/
11292	dev_uc_flush(dev);
11293	dev_mc_flush(dev);
11294
11295	/ Send a netdev-removed uevent to the old namespace /
11296	kobject_uevent(kobj: &dev->dev.kobj, action: KOBJ_REMOVE);
11297	netdev_adjacent_del_links(dev);
11298
11299	/ Move per-net netdevice notifiers that are following the netdevice /
11300	move_netdevice_notifiers_dev_net(dev, net);
11301
11302	/ Actually switch the network namespace /
11303	dev_net_set(dev, net);
11304	dev->ifindex = new_ifindex;
11305
11306	if (new_name[`0`]) / Rename the netdev to prepared name /
11307	strscpy(dev->name, new_name, IFNAMSIZ);
11308
11309	/ Fixup kobjects /
11310	dev_set_uevent_suppress(dev: &dev->dev, val: `1`);
11311	err = device_rename(dev: &dev->dev, new_name: dev->name);
11312	dev_set_uevent_suppress(dev: &dev->dev, val: `0`);
11313	WARN_ON(err);
11314
11315	/ Send a netdev-add uevent to the new namespace /
11316	kobject_uevent(kobj: &dev->dev.kobj, action: KOBJ_ADD);
11317	netdev_adjacent_add_links(dev);
11318
11319	/ Adapt owner in case owning user namespace of target network*
11320	* namespace is different from the original one.
11321	*/
11322	err = netdev_change_owner(dev, net_old, net_new: net);
11323	WARN_ON(err);
11324
11325	/ Add the device back in the hashes /
11326	list_netdevice(dev);
11327
11328	/ Notify protocols, that a new device appeared. /
11329	call_netdevice_notifiers(NETDEV_REGISTER, dev);
11330
11331	/*
11332	* Prevent userspace races by waiting until the network
11333	* device is fully setup before sending notifications.
11334	*/
11335	rtmsg_ifinfo(RTM_NEWLINK, dev, change: ~`0U`, GFP_KERNEL, portid: `0`, NULL);
11336
11337	synchronize_net();
11338	err = `0`;
11339	out:
11340	return err;
11341	}
11342	EXPORT_SYMBOL_GPL(__dev_change_net_namespace);
11343
11344	static int dev_cpu_dead(unsigned int oldcpu)
11345	{
11346	struct sk_buff **list_skb;
11347	struct sk_buff *skb;
11348	unsigned int cpu;
11349	struct softnet_data sd, oldsd, *remsd = NULL;
11350
11351	local_irq_disable();
11352	cpu = smp_processor_id();
11353	sd = &per_cpu(softnet_data, cpu);
11354	oldsd = &per_cpu(softnet_data, oldcpu);
11355
11356	/ Find end of our completion_queue. /
11357	list_skb = &sd->completion_queue;
11358	while (*list_skb)
11359	list_skb = &(*list_skb)->next;
11360	/ Append completion queue from offline CPU. /
11361	*list_skb = oldsd->completion_queue;
11362	oldsd->completion_queue = NULL;
11363
11364	/ Append output queue from offline CPU. /
11365	if (oldsd->output_queue) {
11366	*sd->output_queue_tailp = oldsd->output_queue;
11367	sd->output_queue_tailp = oldsd->output_queue_tailp;
11368	oldsd->output_queue = NULL;
11369	oldsd->output_queue_tailp = &oldsd->output_queue;
11370	}
11371	/ Append NAPI poll list from offline CPU, with one exception :*
11372	* process_backlog() must be called by cpu owning percpu backlog.
11373	* We properly handle process_queue & input_pkt_queue later.
11374	*/
11375	while (!list_empty(head: &oldsd->poll_list)) {
11376	struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
11377	struct napi_struct,
11378	poll_list);
11379
11380	list_del_init(entry: &napi->poll_list);
11381	if (napi->poll == process_backlog)
11382	napi->state = `0`;
11383	else
11384	____napi_schedule(sd, napi);
11385	}
11386
11387	raise_softirq_irqoff(nr: NET_TX_SOFTIRQ);
11388	local_irq_enable();
11389
11390	#ifdef CONFIG_RPS
11391	remsd = oldsd->rps_ipi_list;
11392	oldsd->rps_ipi_list = NULL;
11393	#endif
11394	/ send out pending IPI's on offline CPU /
11395	net_rps_send_ipi(remsd);
11396
11397	/ Process offline CPU's input_pkt_queue /
11398	while ((skb = __skb_dequeue(list: &oldsd->process_queue))) {
11399	netif_rx(skb);
11400	input_queue_head_incr(sd: oldsd);
11401	}
11402	while ((skb = skb_dequeue(list: &oldsd->input_pkt_queue))) {
11403	netif_rx(skb);
11404	input_queue_head_incr(sd: oldsd);
11405	}
11406
11407	return `0`;
11408	}
11409
11410	/**
11411	* netdev_increment_features - increment feature set by one
11412	* @all: current feature set
11413	* @one: new feature set
11414	* @mask: mask feature set
11415	*
11416	* Computes a new feature set after adding a device with feature set
11417	* @one to the master device with current feature set @all. Will not
11418	* enable anything that is off in @mask. Returns the new feature set.
11419	*/
11420	netdev_features_t netdev_increment_features(netdev_features_t all,
11421	netdev_features_t one, netdev_features_t mask)
11422	{
11423	if (mask & NETIF_F_HW_CSUM)
11424	mask \|= NETIF_F_CSUM_MASK;
11425	mask \|= NETIF_F_VLAN_CHALLENGED;
11426
11427	all \|= one & (NETIF_F_ONE_FOR_ALL \| NETIF_F_CSUM_MASK) & mask;
11428	all &= one \| ~NETIF_F_ALL_FOR_ALL;
11429
11430	/ If one device supports hw checksumming, set for all. /
11431	if (all & NETIF_F_HW_CSUM)
11432	all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
11433
11434	return all;
11435	}
11436	EXPORT_SYMBOL(netdev_increment_features);
11437
11438	static struct hlist_head * __net_init netdev_create_hash(void)
11439	{
11440	int i;
11441	struct hlist_head *hash;
11442
11443	hash = kmalloc_array(NETDEV_HASHENTRIES, size: sizeof(*hash), GFP_KERNEL);
11444	if (hash != NULL)
11445	for (i = `0`; i < NETDEV_HASHENTRIES; i++)
11446	INIT_HLIST_HEAD(&hash[i]);
11447
11448	return hash;
11449	}
11450
11451	/ Initialize per network namespace state /
11452	static int __net_init netdev_init(struct net *net)
11453	{
11454	BUILD_BUG_ON(GRO_HASH_BUCKETS >
11455	`8` * sizeof_field(struct napi_struct, gro_bitmask));
11456
11457	INIT_LIST_HEAD(list: &net->dev_base_head);
11458
11459	net->dev_name_head = netdev_create_hash();
11460	if (net->dev_name_head == NULL)
11461	goto err_name;
11462
11463	net->dev_index_head = netdev_create_hash();
11464	if (net->dev_index_head == NULL)
11465	goto err_idx;
11466
11467	xa_init_flags(xa: &net->dev_by_index, XA_FLAGS_ALLOC1);
11468
11469	RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
11470
11471	return `0`;
11472
11473	err_idx:
11474	kfree(objp: net->dev_name_head);
11475	err_name:
11476	return -ENOMEM;
11477	}
11478
11479	/**
11480	* netdev_drivername - network driver for the device
11481	* @dev: network device
11482	*
11483	* Determine network driver for device.
11484	*/
11485	const char netdev_drivername(const* struct net_device *dev)
11486	{
11487	const struct device_driver *driver;
11488	const struct device *parent;
11489	const char *empty = "";
11490
11491	parent = dev->dev.parent;
11492	if (!parent)
11493	return empty;
11494
11495	driver = parent->driver;
11496	if (driver && driver->name)
11497	return driver->name;
11498	return empty;
11499	}
11500
11501	static void __netdev_printk(const char level, const* struct net_device *dev,
11502	struct va_format *vaf)
11503	{
11504	if (dev && dev->dev.parent) {
11505	dev_printk_emit(level: level[`1`] - `'0'`,
11506	dev: dev->dev.parent,
11507	fmt: "%s %s %s%s: %pV",
11508	dev_driver_string(dev: dev->dev.parent),
11509	dev_name(dev: dev->dev.parent),
11510	netdev_name(dev), netdev_reg_state(dev),
11511	vaf);
11512	} else if (dev) {
11513	printk("%s%s%s: %pV",
11514	level, netdev_name(dev), netdev_reg_state(dev), vaf);
11515	} else {
11516	printk("%s(NULL net_device): %pV", level, vaf);
11517	}
11518	}
11519
11520	void netdev_printk(const char level, const* struct net_device *dev,
11521	const char *format, ...)
11522	{
11523	struct va_format vaf;
11524	va_list args;
11525
11526	va_start(args, format);
11527
11528	vaf.fmt = format;
11529	vaf.va = &args;
11530
11531	__netdev_printk(level, dev, vaf: &vaf);
11532
11533	va_end(args);
11534	}
11535	EXPORT_SYMBOL(netdev_printk);
11536
11537	#define define_netdev_printk_level(func, level) \
11538	void func(const struct net_device dev, const char fmt, ...) \
11539	{ \
11540	struct va_format vaf; \
11541	va_list args; \
11542	\
11543	va_start(args, fmt); \
11544	\
11545	vaf.fmt = fmt; \
11546	vaf.va = &args; \
11547	\
11548	__netdev_printk(level, dev, &vaf); \
11549	\
11550	va_end(args); \
11551	} \
11552	EXPORT_SYMBOL(func);
11553
11554	define_netdev_printk_level(netdev_emerg, KERN_EMERG);
11555	define_netdev_printk_level(netdev_alert, KERN_ALERT);
11556	define_netdev_printk_level(netdev_crit, KERN_CRIT);
11557	define_netdev_printk_level(netdev_err, KERN_ERR);
11558	define_netdev_printk_level(netdev_warn, KERN_WARNING);
11559	define_netdev_printk_level(netdev_notice, KERN_NOTICE);
11560	define_netdev_printk_level(netdev_info, KERN_INFO);
11561
11562	static void __net_exit netdev_exit(struct net *net)
11563	{
11564	kfree(objp: net->dev_name_head);
11565	kfree(objp: net->dev_index_head);
11566	xa_destroy(&net->dev_by_index);
11567	if (net != &init_net)
11568	WARN_ON_ONCE(!list_empty(&net->dev_base_head));
11569	}
11570
11571	static struct pernet_operations __net_initdata netdev_net_ops = {
11572	.init = netdev_init,
11573	.exit = netdev_exit,
11574	};
11575
11576	static void __net_exit default_device_exit_net(struct net *net)
11577	{
11578	struct netdev_name_node name_node, tmp;
11579	struct net_device dev, aux;
11580	/*
11581	* Push all migratable network devices back to the
11582	* initial network namespace
11583	*/
11584	ASSERT_RTNL();
11585	for_each_netdev_safe(net, dev, aux) {
11586	int err;
11587	char fb_name[IFNAMSIZ];
11588
11589	/ Ignore unmoveable devices (i.e. loopback) /
11590	if (dev->features & NETIF_F_NETNS_LOCAL)
11591	continue;
11592
11593	/ Leave virtual devices for the generic cleanup /
11594	if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund)
11595	continue;
11596
11597	/ Push remaining network devices to init_net /
11598	snprintf(buf: fb_name, IFNAMSIZ, fmt: "dev%d", dev->ifindex);
11599	if (netdev_name_in_use(&init_net, fb_name))
11600	snprintf(buf: fb_name, IFNAMSIZ, fmt: "dev%%d");
11601
11602	netdev_for_each_altname_safe(dev, name_node, tmp)
11603	if (netdev_name_in_use(&init_net, name_node->name))
11604	__netdev_name_node_alt_destroy(name_node);
11605
11606	err = dev_change_net_namespace(dev, net: &init_net, pat: fb_name);
11607	if (err) {
11608	pr_emerg("%s: failed to move %s to init_net: %d\n",
11609	__func__, dev->name, err);
11610	BUG();
11611	}
11612	}
11613	}
11614
11615	static void __net_exit default_device_exit_batch(struct list_head *net_list)
11616	{
11617	/ At exit all network devices most be removed from a network*
11618	* namespace. Do this in the reverse order of registration.
11619	* Do this across as many network namespaces as possible to
11620	* improve batching efficiency.
11621	*/
11622	struct net_device *dev;
11623	struct net *net;
11624	LIST_HEAD(dev_kill_list);
11625
11626	rtnl_lock();
11627	list_for_each_entry(net, net_list, exit_list) {
11628	default_device_exit_net(net);
11629	cond_resched();
11630	}
11631
11632	list_for_each_entry(net, net_list, exit_list) {
11633	for_each_netdev_reverse(net, dev) {
11634	if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
11635	dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
11636	else
11637	unregister_netdevice_queue(dev, &dev_kill_list);
11638	}
11639	}
11640	unregister_netdevice_many(&dev_kill_list);
11641	rtnl_unlock();
11642	}
11643
11644	static struct pernet_operations __net_initdata default_device_ops = {
11645	.exit_batch = default_device_exit_batch,
11646	};
11647
11648	static void __init net_dev_struct_check(void)
11649	{
11650	/ TX read-mostly hotpath /
11651	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags);
11652	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops);
11653	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops);
11654	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx);
11655	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, real_num_tx_queues);
11656	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size);
11657	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size);
11658	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs);
11659	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_partial_features);
11660	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc);
11661	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu);
11662	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom);
11663	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tc_to_txq);
11664	#ifdef CONFIG_XPS
11665	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_maps);
11666	#endif
11667	#ifdef CONFIG_NETFILTER_EGRESS
11668	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, nf_hooks_egress);
11669	#endif
11670	#ifdef CONFIG_NET_XGRESS
11671	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress);
11672	#endif
11673	CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, `160`);
11674
11675	/ TXRX read-mostly hotpath /
11676	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats);
11677	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, state);
11678	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
11679	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
11680	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
11681	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
11682	CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, `46`);
11683
11684	/ RX read-mostly hotpath /
11685	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
11686	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
11687	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
11688	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
11689	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
11690	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
11691	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
11692	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
11693	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
11694	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler_data);
11695	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, nd_net);
11696	#ifdef CONFIG_NETPOLL
11697	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, npinfo);
11698	#endif
11699	#ifdef CONFIG_NET_XGRESS
11700	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
11701	#endif
11702	CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, `104`);
11703	}
11704
11705	/*
11706	* Initialize the DEV module. At boot time this walks the device list and
11707	* unhooks any devices that fail to initialise (normally hardware not
11708	* present) and leaves us with a valid list of present and active devices.
11709	*
11710	*/
11711
11712	/ We allocate 256 pages for each CPU if PAGE_SHIFT is 12 /
11713	#define SYSTEM_PERCPU_PAGE_POOL_SIZE ((1 << 20) / PAGE_SIZE)
11714
11715	static int net_page_pool_create(int cpuid)
11716	{
11717	#if IS_ENABLED(CONFIG_PAGE_POOL)
11718	struct page_pool_params page_pool_params = {
11719	.pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE,
11720	.flags = PP_FLAG_SYSTEM_POOL,
11721	.nid = NUMA_NO_NODE,
11722	};
11723	struct page_pool *pp_ptr;
11724
11725	pp_ptr = page_pool_create_percpu(params: &page_pool_params, cpuid);
11726	if (IS_ERR(ptr: pp_ptr))
11727	return -ENOMEM;
11728
11729	per_cpu(system_page_pool, cpuid) = pp_ptr;
11730	#endif
11731	return `0`;
11732	}
11733
11734	/*
11735	* This is called single threaded during boot, so no need
11736	* to take the rtnl semaphore.
11737	*/
11738	static int __init net_dev_init(void)
11739	{
11740	int i, rc = -ENOMEM;
11741
11742	BUG_ON(!dev_boot_phase);
11743
11744	net_dev_struct_check();
11745
11746	if (dev_proc_init())
11747	goto out;
11748
11749	if (netdev_kobject_init())
11750	goto out;
11751
11752	for (i = `0`; i < PTYPE_HASH_SIZE; i++)
11753	INIT_LIST_HEAD(list: &ptype_base[i]);
11754
11755	if (register_pernet_subsys(&netdev_net_ops))
11756	goto out;
11757
11758	/*
11759	* Initialise the packet receive queues.
11760	*/
11761
11762	for_each_possible_cpu(i) {
11763	struct work_struct *flush = per_cpu_ptr(&flush_works, i);
11764	struct softnet_data *sd = &per_cpu(softnet_data, i);
11765
11766	INIT_WORK(flush, flush_backlog);
11767
11768	skb_queue_head_init(list: &sd->input_pkt_queue);
11769	skb_queue_head_init(list: &sd->process_queue);
11770	#ifdef CONFIG_XFRM_OFFLOAD
11771	skb_queue_head_init(list: &sd->xfrm_backlog);
11772	#endif
11773	INIT_LIST_HEAD(list: &sd->poll_list);
11774	sd->output_queue_tailp = &sd->output_queue;
11775	#ifdef CONFIG_RPS
11776	INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
11777	sd->cpu = i;
11778	#endif
11779	INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
11780	spin_lock_init(&sd->defer_lock);
11781
11782	init_gro_hash(napi: &sd->backlog);
11783	sd->backlog.poll = process_backlog;
11784	sd->backlog.weight = weight_p;
11785
11786	if (net_page_pool_create(cpuid: i))
11787	goto out;
11788	}
11789
11790	dev_boot_phase = `0`;
11791
11792	/ The loopback device is special if any other network devices*
11793	* is present in a network namespace the loopback device must
11794	* be present. Since we now dynamically allocate and free the
11795	* loopback device ensure this invariant is maintained by
11796	* keeping the loopback device as the first device on the
11797	* list of network devices. Ensuring the loopback devices
11798	* is the first device that appears and the last network device
11799	* that disappears.
11800	*/
11801	if (register_pernet_device(&loopback_net_ops))
11802	goto out;
11803
11804	if (register_pernet_device(&default_device_ops))
11805	goto out;
11806
11807	open_softirq(nr: NET_TX_SOFTIRQ, action: net_tx_action);
11808	open_softirq(nr: NET_RX_SOFTIRQ, action: net_rx_action);
11809
11810	rc = cpuhp_setup_state_nocalls(state: CPUHP_NET_DEV_DEAD, name: "net/dev:dead",
11811	NULL, teardown: dev_cpu_dead);
11812	WARN_ON(rc < `0`);
11813	rc = `0`;
11814	out:
11815	if (rc < `0`) {
11816	for_each_possible_cpu(i) {
11817	struct page_pool *pp_ptr;
11818
11819	pp_ptr = per_cpu(system_page_pool, i);
11820	if (!pp_ptr)
11821	continue;
11822
11823	page_pool_destroy(pool: pp_ptr);
11824	per_cpu(system_page_pool, i) = NULL;
11825	}
11826	}
11827
11828	return rc;
11829	}
11830
11831	subsys_initcall(net_dev_init);
11832

source code of linux/net/core/dev.c