1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _LINUX_SCHED_H |
3 | #define _LINUX_SCHED_H |
4 | |
5 | /* |
6 | * Define 'struct task_struct' and provide the main scheduler |
7 | * APIs (schedule(), wakeup variants, etc.) |
8 | */ |
9 | |
10 | #include <uapi/linux/sched.h> |
11 | |
12 | #include <asm/current.h> |
13 | #include <asm/processor.h> |
14 | #include <linux/thread_info.h> |
15 | #include <linux/preempt.h> |
16 | #include <linux/cpumask.h> |
17 | |
18 | #include <linux/cache.h> |
19 | #include <linux/irqflags_types.h> |
20 | #include <linux/smp_types.h> |
21 | #include <linux/pid_types.h> |
22 | #include <linux/sem_types.h> |
23 | #include <linux/shm.h> |
24 | #include <linux/kmsan_types.h> |
25 | #include <linux/mutex_types.h> |
26 | #include <linux/plist_types.h> |
27 | #include <linux/hrtimer_types.h> |
28 | #include <linux/timer_types.h> |
29 | #include <linux/seccomp_types.h> |
30 | #include <linux/nodemask_types.h> |
31 | #include <linux/refcount_types.h> |
32 | #include <linux/resource.h> |
33 | #include <linux/latencytop.h> |
34 | #include <linux/sched/prio.h> |
35 | #include <linux/sched/types.h> |
36 | #include <linux/signal_types.h> |
37 | #include <linux/syscall_user_dispatch_types.h> |
38 | #include <linux/mm_types_task.h> |
39 | #include <linux/task_io_accounting.h> |
40 | #include <linux/posix-timers_types.h> |
41 | #include <linux/restart_block.h> |
42 | #include <uapi/linux/rseq.h> |
43 | #include <linux/seqlock_types.h> |
44 | #include <linux/kcsan.h> |
45 | #include <linux/rv.h> |
46 | #include <linux/livepatch_sched.h> |
47 | #include <linux/uidgid_types.h> |
48 | #include <asm/kmap_size.h> |
49 | |
50 | /* task_struct member predeclarations (sorted alphabetically): */ |
51 | struct audit_context; |
52 | struct bio_list; |
53 | struct blk_plug; |
54 | struct bpf_local_storage; |
55 | struct bpf_run_ctx; |
56 | struct capture_control; |
57 | struct cfs_rq; |
58 | struct fs_struct; |
59 | struct futex_pi_state; |
60 | struct io_context; |
61 | struct io_uring_task; |
62 | struct mempolicy; |
63 | struct nameidata; |
64 | struct nsproxy; |
65 | struct perf_event_context; |
66 | struct pid_namespace; |
67 | struct pipe_inode_info; |
68 | struct rcu_node; |
69 | struct reclaim_state; |
70 | struct robust_list_head; |
71 | struct root_domain; |
72 | struct rq; |
73 | struct sched_attr; |
74 | struct sched_dl_entity; |
75 | struct seq_file; |
76 | struct sighand_struct; |
77 | struct signal_struct; |
78 | struct task_delay_info; |
79 | struct task_group; |
80 | struct task_struct; |
81 | struct user_event_mm; |
82 | |
83 | /* |
84 | * Task state bitmask. NOTE! These bits are also |
85 | * encoded in fs/proc/array.c: get_task_state(). |
86 | * |
87 | * We have two separate sets of flags: task->__state |
88 | * is about runnability, while task->exit_state are |
89 | * about the task exiting. Confusing, but this way |
90 | * modifying one set can't modify the other one by |
91 | * mistake. |
92 | */ |
93 | |
94 | /* Used in tsk->__state: */ |
95 | #define TASK_RUNNING 0x00000000 |
96 | #define TASK_INTERRUPTIBLE 0x00000001 |
97 | #define TASK_UNINTERRUPTIBLE 0x00000002 |
98 | #define __TASK_STOPPED 0x00000004 |
99 | #define __TASK_TRACED 0x00000008 |
100 | /* Used in tsk->exit_state: */ |
101 | #define EXIT_DEAD 0x00000010 |
102 | #define EXIT_ZOMBIE 0x00000020 |
103 | #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) |
104 | /* Used in tsk->__state again: */ |
105 | #define TASK_PARKED 0x00000040 |
106 | #define TASK_DEAD 0x00000080 |
107 | #define TASK_WAKEKILL 0x00000100 |
108 | #define TASK_WAKING 0x00000200 |
109 | #define TASK_NOLOAD 0x00000400 |
110 | #define TASK_NEW 0x00000800 |
111 | #define TASK_RTLOCK_WAIT 0x00001000 |
112 | #define TASK_FREEZABLE 0x00002000 |
113 | #define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP)) |
114 | #define TASK_FROZEN 0x00008000 |
115 | #define TASK_STATE_MAX 0x00010000 |
116 | |
117 | #define TASK_ANY (TASK_STATE_MAX-1) |
118 | |
119 | /* |
120 | * DO NOT ADD ANY NEW USERS ! |
121 | */ |
122 | #define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE) |
123 | |
124 | /* Convenience macros for the sake of set_current_state: */ |
125 | #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) |
126 | #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) |
127 | #define TASK_TRACED __TASK_TRACED |
128 | |
129 | #define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) |
130 | |
131 | /* Convenience macros for the sake of wake_up(): */ |
132 | #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) |
133 | |
134 | /* get_task_state(): */ |
135 | #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ |
136 | TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ |
137 | __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ |
138 | TASK_PARKED) |
139 | |
140 | #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) |
141 | |
142 | #define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0) |
143 | #define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0) |
144 | #define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0) |
145 | |
146 | /* |
147 | * Special states are those that do not use the normal wait-loop pattern. See |
148 | * the comment with set_special_state(). |
149 | */ |
150 | #define is_special_task_state(state) \ |
151 | ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) |
152 | |
153 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
154 | # define debug_normal_state_change(state_value) \ |
155 | do { \ |
156 | WARN_ON_ONCE(is_special_task_state(state_value)); \ |
157 | current->task_state_change = _THIS_IP_; \ |
158 | } while (0) |
159 | |
160 | # define debug_special_state_change(state_value) \ |
161 | do { \ |
162 | WARN_ON_ONCE(!is_special_task_state(state_value)); \ |
163 | current->task_state_change = _THIS_IP_; \ |
164 | } while (0) |
165 | |
166 | # define debug_rtlock_wait_set_state() \ |
167 | do { \ |
168 | current->saved_state_change = current->task_state_change;\ |
169 | current->task_state_change = _THIS_IP_; \ |
170 | } while (0) |
171 | |
172 | # define debug_rtlock_wait_restore_state() \ |
173 | do { \ |
174 | current->task_state_change = current->saved_state_change;\ |
175 | } while (0) |
176 | |
177 | #else |
178 | # define debug_normal_state_change(cond) do { } while (0) |
179 | # define debug_special_state_change(cond) do { } while (0) |
180 | # define debug_rtlock_wait_set_state() do { } while (0) |
181 | # define debug_rtlock_wait_restore_state() do { } while (0) |
182 | #endif |
183 | |
184 | /* |
185 | * set_current_state() includes a barrier so that the write of current->__state |
186 | * is correctly serialised wrt the caller's subsequent test of whether to |
187 | * actually sleep: |
188 | * |
189 | * for (;;) { |
190 | * set_current_state(TASK_UNINTERRUPTIBLE); |
191 | * if (CONDITION) |
192 | * break; |
193 | * |
194 | * schedule(); |
195 | * } |
196 | * __set_current_state(TASK_RUNNING); |
197 | * |
198 | * If the caller does not need such serialisation (because, for instance, the |
199 | * CONDITION test and condition change and wakeup are under the same lock) then |
200 | * use __set_current_state(). |
201 | * |
202 | * The above is typically ordered against the wakeup, which does: |
203 | * |
204 | * CONDITION = 1; |
205 | * wake_up_state(p, TASK_UNINTERRUPTIBLE); |
206 | * |
207 | * where wake_up_state()/try_to_wake_up() executes a full memory barrier before |
208 | * accessing p->__state. |
209 | * |
210 | * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is, |
211 | * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a |
212 | * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). |
213 | * |
214 | * However, with slightly different timing the wakeup TASK_RUNNING store can |
215 | * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not |
216 | * a problem either because that will result in one extra go around the loop |
217 | * and our @cond test will save the day. |
218 | * |
219 | * Also see the comments of try_to_wake_up(). |
220 | */ |
221 | #define __set_current_state(state_value) \ |
222 | do { \ |
223 | debug_normal_state_change((state_value)); \ |
224 | WRITE_ONCE(current->__state, (state_value)); \ |
225 | } while (0) |
226 | |
227 | #define set_current_state(state_value) \ |
228 | do { \ |
229 | debug_normal_state_change((state_value)); \ |
230 | smp_store_mb(current->__state, (state_value)); \ |
231 | } while (0) |
232 | |
233 | /* |
234 | * set_special_state() should be used for those states when the blocking task |
235 | * can not use the regular condition based wait-loop. In that case we must |
236 | * serialize against wakeups such that any possible in-flight TASK_RUNNING |
237 | * stores will not collide with our state change. |
238 | */ |
239 | #define set_special_state(state_value) \ |
240 | do { \ |
241 | unsigned long flags; /* may shadow */ \ |
242 | \ |
243 | raw_spin_lock_irqsave(¤t->pi_lock, flags); \ |
244 | debug_special_state_change((state_value)); \ |
245 | WRITE_ONCE(current->__state, (state_value)); \ |
246 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ |
247 | } while (0) |
248 | |
249 | /* |
250 | * PREEMPT_RT specific variants for "sleeping" spin/rwlocks |
251 | * |
252 | * RT's spin/rwlock substitutions are state preserving. The state of the |
253 | * task when blocking on the lock is saved in task_struct::saved_state and |
254 | * restored after the lock has been acquired. These operations are |
255 | * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT |
256 | * lock related wakeups while the task is blocked on the lock are |
257 | * redirected to operate on task_struct::saved_state to ensure that these |
258 | * are not dropped. On restore task_struct::saved_state is set to |
259 | * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail. |
260 | * |
261 | * The lock operation looks like this: |
262 | * |
263 | * current_save_and_set_rtlock_wait_state(); |
264 | * for (;;) { |
265 | * if (try_lock()) |
266 | * break; |
267 | * raw_spin_unlock_irq(&lock->wait_lock); |
268 | * schedule_rtlock(); |
269 | * raw_spin_lock_irq(&lock->wait_lock); |
270 | * set_current_state(TASK_RTLOCK_WAIT); |
271 | * } |
272 | * current_restore_rtlock_saved_state(); |
273 | */ |
274 | #define current_save_and_set_rtlock_wait_state() \ |
275 | do { \ |
276 | lockdep_assert_irqs_disabled(); \ |
277 | raw_spin_lock(¤t->pi_lock); \ |
278 | current->saved_state = current->__state; \ |
279 | debug_rtlock_wait_set_state(); \ |
280 | WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ |
281 | raw_spin_unlock(¤t->pi_lock); \ |
282 | } while (0); |
283 | |
284 | #define current_restore_rtlock_saved_state() \ |
285 | do { \ |
286 | lockdep_assert_irqs_disabled(); \ |
287 | raw_spin_lock(¤t->pi_lock); \ |
288 | debug_rtlock_wait_restore_state(); \ |
289 | WRITE_ONCE(current->__state, current->saved_state); \ |
290 | current->saved_state = TASK_RUNNING; \ |
291 | raw_spin_unlock(¤t->pi_lock); \ |
292 | } while (0); |
293 | |
294 | #define get_current_state() READ_ONCE(current->__state) |
295 | |
296 | /* |
297 | * Define the task command name length as enum, then it can be visible to |
298 | * BPF programs. |
299 | */ |
300 | enum { |
301 | TASK_COMM_LEN = 16, |
302 | }; |
303 | |
304 | extern void scheduler_tick(void); |
305 | |
306 | #define MAX_SCHEDULE_TIMEOUT LONG_MAX |
307 | |
308 | extern long schedule_timeout(long timeout); |
309 | extern long schedule_timeout_interruptible(long timeout); |
310 | extern long schedule_timeout_killable(long timeout); |
311 | extern long schedule_timeout_uninterruptible(long timeout); |
312 | extern long schedule_timeout_idle(long timeout); |
313 | asmlinkage void schedule(void); |
314 | extern void schedule_preempt_disabled(void); |
315 | asmlinkage void preempt_schedule_irq(void); |
316 | #ifdef CONFIG_PREEMPT_RT |
317 | extern void schedule_rtlock(void); |
318 | #endif |
319 | |
320 | extern int __must_check io_schedule_prepare(void); |
321 | extern void io_schedule_finish(int token); |
322 | extern long io_schedule_timeout(long timeout); |
323 | extern void io_schedule(void); |
324 | |
325 | /** |
326 | * struct prev_cputime - snapshot of system and user cputime |
327 | * @utime: time spent in user mode |
328 | * @stime: time spent in system mode |
329 | * @lock: protects the above two fields |
330 | * |
331 | * Stores previous user/system time values such that we can guarantee |
332 | * monotonicity. |
333 | */ |
334 | struct prev_cputime { |
335 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
336 | u64 utime; |
337 | u64 stime; |
338 | raw_spinlock_t lock; |
339 | #endif |
340 | }; |
341 | |
342 | enum vtime_state { |
343 | /* Task is sleeping or running in a CPU with VTIME inactive: */ |
344 | VTIME_INACTIVE = 0, |
345 | /* Task is idle */ |
346 | VTIME_IDLE, |
347 | /* Task runs in kernelspace in a CPU with VTIME active: */ |
348 | VTIME_SYS, |
349 | /* Task runs in userspace in a CPU with VTIME active: */ |
350 | VTIME_USER, |
351 | /* Task runs as guests in a CPU with VTIME active: */ |
352 | VTIME_GUEST, |
353 | }; |
354 | |
355 | struct vtime { |
356 | seqcount_t seqcount; |
357 | unsigned long long starttime; |
358 | enum vtime_state state; |
359 | unsigned int cpu; |
360 | u64 utime; |
361 | u64 stime; |
362 | u64 gtime; |
363 | }; |
364 | |
365 | /* |
366 | * Utilization clamp constraints. |
367 | * @UCLAMP_MIN: Minimum utilization |
368 | * @UCLAMP_MAX: Maximum utilization |
369 | * @UCLAMP_CNT: Utilization clamp constraints count |
370 | */ |
371 | enum uclamp_id { |
372 | UCLAMP_MIN = 0, |
373 | UCLAMP_MAX, |
374 | UCLAMP_CNT |
375 | }; |
376 | |
377 | #ifdef CONFIG_SMP |
378 | extern struct root_domain def_root_domain; |
379 | extern struct mutex sched_domains_mutex; |
380 | #endif |
381 | |
382 | struct sched_param { |
383 | int sched_priority; |
384 | }; |
385 | |
386 | struct sched_info { |
387 | #ifdef CONFIG_SCHED_INFO |
388 | /* Cumulative counters: */ |
389 | |
390 | /* # of times we have run on this CPU: */ |
391 | unsigned long pcount; |
392 | |
393 | /* Time spent waiting on a runqueue: */ |
394 | unsigned long long run_delay; |
395 | |
396 | /* Timestamps: */ |
397 | |
398 | /* When did we last run on a CPU? */ |
399 | unsigned long long last_arrival; |
400 | |
401 | /* When were we last queued to run? */ |
402 | unsigned long long last_queued; |
403 | |
404 | #endif /* CONFIG_SCHED_INFO */ |
405 | }; |
406 | |
407 | /* |
408 | * Integer metrics need fixed point arithmetic, e.g., sched/fair |
409 | * has a few: load, load_avg, util_avg, freq, and capacity. |
410 | * |
411 | * We define a basic fixed point arithmetic range, and then formalize |
412 | * all these metrics based on that basic range. |
413 | */ |
414 | # define SCHED_FIXEDPOINT_SHIFT 10 |
415 | # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) |
416 | |
417 | /* Increase resolution of cpu_capacity calculations */ |
418 | # define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT |
419 | # define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) |
420 | |
421 | struct load_weight { |
422 | unsigned long weight; |
423 | u32 inv_weight; |
424 | }; |
425 | |
426 | /* |
427 | * The load/runnable/util_avg accumulates an infinite geometric series |
428 | * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c). |
429 | * |
430 | * [load_avg definition] |
431 | * |
432 | * load_avg = runnable% * scale_load_down(load) |
433 | * |
434 | * [runnable_avg definition] |
435 | * |
436 | * runnable_avg = runnable% * SCHED_CAPACITY_SCALE |
437 | * |
438 | * [util_avg definition] |
439 | * |
440 | * util_avg = running% * SCHED_CAPACITY_SCALE |
441 | * |
442 | * where runnable% is the time ratio that a sched_entity is runnable and |
443 | * running% the time ratio that a sched_entity is running. |
444 | * |
445 | * For cfs_rq, they are the aggregated values of all runnable and blocked |
446 | * sched_entities. |
447 | * |
448 | * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU |
449 | * capacity scaling. The scaling is done through the rq_clock_pelt that is used |
450 | * for computing those signals (see update_rq_clock_pelt()) |
451 | * |
452 | * N.B., the above ratios (runnable% and running%) themselves are in the |
453 | * range of [0, 1]. To do fixed point arithmetics, we therefore scale them |
454 | * to as large a range as necessary. This is for example reflected by |
455 | * util_avg's SCHED_CAPACITY_SCALE. |
456 | * |
457 | * [Overflow issue] |
458 | * |
459 | * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities |
460 | * with the highest load (=88761), always runnable on a single cfs_rq, |
461 | * and should not overflow as the number already hits PID_MAX_LIMIT. |
462 | * |
463 | * For all other cases (including 32-bit kernels), struct load_weight's |
464 | * weight will overflow first before we do, because: |
465 | * |
466 | * Max(load_avg) <= Max(load.weight) |
467 | * |
468 | * Then it is the load_weight's responsibility to consider overflow |
469 | * issues. |
470 | */ |
471 | struct sched_avg { |
472 | u64 last_update_time; |
473 | u64 load_sum; |
474 | u64 runnable_sum; |
475 | u32 util_sum; |
476 | u32 period_contrib; |
477 | unsigned long load_avg; |
478 | unsigned long runnable_avg; |
479 | unsigned long util_avg; |
480 | unsigned int util_est; |
481 | } ____cacheline_aligned; |
482 | |
483 | /* |
484 | * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg |
485 | * updates. When a task is dequeued, its util_est should not be updated if its |
486 | * util_avg has not been updated in the meantime. |
487 | * This information is mapped into the MSB bit of util_est at dequeue time. |
488 | * Since max value of util_est for a task is 1024 (PELT util_avg for a task) |
489 | * it is safe to use MSB. |
490 | */ |
491 | #define UTIL_EST_WEIGHT_SHIFT 2 |
492 | #define UTIL_AVG_UNCHANGED 0x80000000 |
493 | |
494 | struct sched_statistics { |
495 | #ifdef CONFIG_SCHEDSTATS |
496 | u64 wait_start; |
497 | u64 wait_max; |
498 | u64 wait_count; |
499 | u64 wait_sum; |
500 | u64 iowait_count; |
501 | u64 iowait_sum; |
502 | |
503 | u64 sleep_start; |
504 | u64 sleep_max; |
505 | s64 sum_sleep_runtime; |
506 | |
507 | u64 block_start; |
508 | u64 block_max; |
509 | s64 sum_block_runtime; |
510 | |
511 | s64 exec_max; |
512 | u64 slice_max; |
513 | |
514 | u64 nr_migrations_cold; |
515 | u64 nr_failed_migrations_affine; |
516 | u64 nr_failed_migrations_running; |
517 | u64 nr_failed_migrations_hot; |
518 | u64 nr_forced_migrations; |
519 | |
520 | u64 nr_wakeups; |
521 | u64 nr_wakeups_sync; |
522 | u64 nr_wakeups_migrate; |
523 | u64 nr_wakeups_local; |
524 | u64 nr_wakeups_remote; |
525 | u64 nr_wakeups_affine; |
526 | u64 nr_wakeups_affine_attempts; |
527 | u64 nr_wakeups_passive; |
528 | u64 nr_wakeups_idle; |
529 | |
530 | #ifdef CONFIG_SCHED_CORE |
531 | u64 core_forceidle_sum; |
532 | #endif |
533 | #endif /* CONFIG_SCHEDSTATS */ |
534 | } ____cacheline_aligned; |
535 | |
536 | struct sched_entity { |
537 | /* For load-balancing: */ |
538 | struct load_weight load; |
539 | struct rb_node run_node; |
540 | u64 deadline; |
541 | u64 min_vruntime; |
542 | |
543 | struct list_head group_node; |
544 | unsigned int on_rq; |
545 | |
546 | u64 exec_start; |
547 | u64 sum_exec_runtime; |
548 | u64 prev_sum_exec_runtime; |
549 | u64 vruntime; |
550 | s64 vlag; |
551 | u64 slice; |
552 | |
553 | u64 nr_migrations; |
554 | |
555 | #ifdef CONFIG_FAIR_GROUP_SCHED |
556 | int depth; |
557 | struct sched_entity *parent; |
558 | /* rq on which this entity is (to be) queued: */ |
559 | struct cfs_rq *cfs_rq; |
560 | /* rq "owned" by this entity/group: */ |
561 | struct cfs_rq *my_q; |
562 | /* cached value of my_q->h_nr_running */ |
563 | unsigned long runnable_weight; |
564 | #endif |
565 | |
566 | #ifdef CONFIG_SMP |
567 | /* |
568 | * Per entity load average tracking. |
569 | * |
570 | * Put into separate cache line so it does not |
571 | * collide with read-mostly values above. |
572 | */ |
573 | struct sched_avg avg; |
574 | #endif |
575 | }; |
576 | |
577 | struct sched_rt_entity { |
578 | struct list_head run_list; |
579 | unsigned long timeout; |
580 | unsigned long watchdog_stamp; |
581 | unsigned int time_slice; |
582 | unsigned short on_rq; |
583 | unsigned short on_list; |
584 | |
585 | struct sched_rt_entity *back; |
586 | #ifdef CONFIG_RT_GROUP_SCHED |
587 | struct sched_rt_entity *parent; |
588 | /* rq on which this entity is (to be) queued: */ |
589 | struct rt_rq *rt_rq; |
590 | /* rq "owned" by this entity/group: */ |
591 | struct rt_rq *my_q; |
592 | #endif |
593 | } __randomize_layout; |
594 | |
595 | typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *); |
596 | typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *); |
597 | |
598 | struct sched_dl_entity { |
599 | struct rb_node rb_node; |
600 | |
601 | /* |
602 | * Original scheduling parameters. Copied here from sched_attr |
603 | * during sched_setattr(), they will remain the same until |
604 | * the next sched_setattr(). |
605 | */ |
606 | u64 dl_runtime; /* Maximum runtime for each instance */ |
607 | u64 dl_deadline; /* Relative deadline of each instance */ |
608 | u64 dl_period; /* Separation of two instances (period) */ |
609 | u64 dl_bw; /* dl_runtime / dl_period */ |
610 | u64 dl_density; /* dl_runtime / dl_deadline */ |
611 | |
612 | /* |
613 | * Actual scheduling parameters. Initialized with the values above, |
614 | * they are continuously updated during task execution. Note that |
615 | * the remaining runtime could be < 0 in case we are in overrun. |
616 | */ |
617 | s64 runtime; /* Remaining runtime for this instance */ |
618 | u64 deadline; /* Absolute deadline for this instance */ |
619 | unsigned int flags; /* Specifying the scheduler behaviour */ |
620 | |
621 | /* |
622 | * Some bool flags: |
623 | * |
624 | * @dl_throttled tells if we exhausted the runtime. If so, the |
625 | * task has to wait for a replenishment to be performed at the |
626 | * next firing of dl_timer. |
627 | * |
628 | * @dl_yielded tells if task gave up the CPU before consuming |
629 | * all its available runtime during the last job. |
630 | * |
631 | * @dl_non_contending tells if the task is inactive while still |
632 | * contributing to the active utilization. In other words, it |
633 | * indicates if the inactive timer has been armed and its handler |
634 | * has not been executed yet. This flag is useful to avoid race |
635 | * conditions between the inactive timer handler and the wakeup |
636 | * code. |
637 | * |
638 | * @dl_overrun tells if the task asked to be informed about runtime |
639 | * overruns. |
640 | */ |
641 | unsigned int dl_throttled : 1; |
642 | unsigned int dl_yielded : 1; |
643 | unsigned int dl_non_contending : 1; |
644 | unsigned int dl_overrun : 1; |
645 | unsigned int dl_server : 1; |
646 | |
647 | /* |
648 | * Bandwidth enforcement timer. Each -deadline task has its |
649 | * own bandwidth to be enforced, thus we need one timer per task. |
650 | */ |
651 | struct hrtimer dl_timer; |
652 | |
653 | /* |
654 | * Inactive timer, responsible for decreasing the active utilization |
655 | * at the "0-lag time". When a -deadline task blocks, it contributes |
656 | * to GRUB's active utilization until the "0-lag time", hence a |
657 | * timer is needed to decrease the active utilization at the correct |
658 | * time. |
659 | */ |
660 | struct hrtimer inactive_timer; |
661 | |
662 | /* |
663 | * Bits for DL-server functionality. Also see the comment near |
664 | * dl_server_update(). |
665 | * |
666 | * @rq the runqueue this server is for |
667 | * |
668 | * @server_has_tasks() returns true if @server_pick return a |
669 | * runnable task. |
670 | */ |
671 | struct rq *rq; |
672 | dl_server_has_tasks_f server_has_tasks; |
673 | dl_server_pick_f server_pick; |
674 | |
675 | #ifdef CONFIG_RT_MUTEXES |
676 | /* |
677 | * Priority Inheritance. When a DEADLINE scheduling entity is boosted |
678 | * pi_se points to the donor, otherwise points to the dl_se it belongs |
679 | * to (the original one/itself). |
680 | */ |
681 | struct sched_dl_entity *pi_se; |
682 | #endif |
683 | }; |
684 | |
685 | #ifdef CONFIG_UCLAMP_TASK |
686 | /* Number of utilization clamp buckets (shorter alias) */ |
687 | #define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT |
688 | |
689 | /* |
690 | * Utilization clamp for a scheduling entity |
691 | * @value: clamp value "assigned" to a se |
692 | * @bucket_id: bucket index corresponding to the "assigned" value |
693 | * @active: the se is currently refcounted in a rq's bucket |
694 | * @user_defined: the requested clamp value comes from user-space |
695 | * |
696 | * The bucket_id is the index of the clamp bucket matching the clamp value |
697 | * which is pre-computed and stored to avoid expensive integer divisions from |
698 | * the fast path. |
699 | * |
700 | * The active bit is set whenever a task has got an "effective" value assigned, |
701 | * which can be different from the clamp value "requested" from user-space. |
702 | * This allows to know a task is refcounted in the rq's bucket corresponding |
703 | * to the "effective" bucket_id. |
704 | * |
705 | * The user_defined bit is set whenever a task has got a task-specific clamp |
706 | * value requested from userspace, i.e. the system defaults apply to this task |
707 | * just as a restriction. This allows to relax default clamps when a less |
708 | * restrictive task-specific value has been requested, thus allowing to |
709 | * implement a "nice" semantic. For example, a task running with a 20% |
710 | * default boost can still drop its own boosting to 0%. |
711 | */ |
712 | struct uclamp_se { |
713 | unsigned int value : bits_per(SCHED_CAPACITY_SCALE); |
714 | unsigned int bucket_id : bits_per(UCLAMP_BUCKETS); |
715 | unsigned int active : 1; |
716 | unsigned int user_defined : 1; |
717 | }; |
718 | #endif /* CONFIG_UCLAMP_TASK */ |
719 | |
720 | union rcu_special { |
721 | struct { |
722 | u8 blocked; |
723 | u8 need_qs; |
724 | u8 exp_hint; /* Hint for performance. */ |
725 | u8 need_mb; /* Readers need smp_mb(). */ |
726 | } b; /* Bits. */ |
727 | u32 s; /* Set of bits. */ |
728 | }; |
729 | |
730 | enum perf_event_task_context { |
731 | perf_invalid_context = -1, |
732 | perf_hw_context = 0, |
733 | perf_sw_context, |
734 | perf_nr_task_contexts, |
735 | }; |
736 | |
737 | struct wake_q_node { |
738 | struct wake_q_node *next; |
739 | }; |
740 | |
741 | struct kmap_ctrl { |
742 | #ifdef CONFIG_KMAP_LOCAL |
743 | int idx; |
744 | pte_t pteval[KM_MAX_IDX]; |
745 | #endif |
746 | }; |
747 | |
748 | struct task_struct { |
749 | #ifdef CONFIG_THREAD_INFO_IN_TASK |
750 | /* |
751 | * For reasons of header soup (see current_thread_info()), this |
752 | * must be the first element of task_struct. |
753 | */ |
754 | struct thread_info thread_info; |
755 | #endif |
756 | unsigned int __state; |
757 | |
758 | /* saved state for "spinlock sleepers" */ |
759 | unsigned int saved_state; |
760 | |
761 | /* |
762 | * This begins the randomizable portion of task_struct. Only |
763 | * scheduling-critical items should be added above here. |
764 | */ |
765 | randomized_struct_fields_start |
766 | |
767 | void *stack; |
768 | refcount_t usage; |
769 | /* Per task flags (PF_*), defined further below: */ |
770 | unsigned int flags; |
771 | unsigned int ptrace; |
772 | |
773 | #ifdef CONFIG_SMP |
774 | int on_cpu; |
775 | struct __call_single_node wake_entry; |
776 | unsigned int wakee_flips; |
777 | unsigned long wakee_flip_decay_ts; |
778 | struct task_struct *last_wakee; |
779 | |
780 | /* |
781 | * recent_used_cpu is initially set as the last CPU used by a task |
782 | * that wakes affine another task. Waker/wakee relationships can |
783 | * push tasks around a CPU where each wakeup moves to the next one. |
784 | * Tracking a recently used CPU allows a quick search for a recently |
785 | * used CPU that may be idle. |
786 | */ |
787 | int recent_used_cpu; |
788 | int wake_cpu; |
789 | #endif |
790 | int on_rq; |
791 | |
792 | int prio; |
793 | int static_prio; |
794 | int normal_prio; |
795 | unsigned int rt_priority; |
796 | |
797 | struct sched_entity se; |
798 | struct sched_rt_entity rt; |
799 | struct sched_dl_entity dl; |
800 | struct sched_dl_entity *dl_server; |
801 | const struct sched_class *sched_class; |
802 | |
803 | #ifdef CONFIG_SCHED_CORE |
804 | struct rb_node core_node; |
805 | unsigned long core_cookie; |
806 | unsigned int core_occupation; |
807 | #endif |
808 | |
809 | #ifdef CONFIG_CGROUP_SCHED |
810 | struct task_group *sched_task_group; |
811 | #endif |
812 | |
813 | #ifdef CONFIG_UCLAMP_TASK |
814 | /* |
815 | * Clamp values requested for a scheduling entity. |
816 | * Must be updated with task_rq_lock() held. |
817 | */ |
818 | struct uclamp_se uclamp_req[UCLAMP_CNT]; |
819 | /* |
820 | * Effective clamp values used for a scheduling entity. |
821 | * Must be updated with task_rq_lock() held. |
822 | */ |
823 | struct uclamp_se uclamp[UCLAMP_CNT]; |
824 | #endif |
825 | |
826 | struct sched_statistics stats; |
827 | |
828 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
829 | /* List of struct preempt_notifier: */ |
830 | struct hlist_head preempt_notifiers; |
831 | #endif |
832 | |
833 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
834 | unsigned int btrace_seq; |
835 | #endif |
836 | |
837 | unsigned int policy; |
838 | int nr_cpus_allowed; |
839 | const cpumask_t *cpus_ptr; |
840 | cpumask_t *user_cpus_ptr; |
841 | cpumask_t cpus_mask; |
842 | void *migration_pending; |
843 | #ifdef CONFIG_SMP |
844 | unsigned short migration_disabled; |
845 | #endif |
846 | unsigned short migration_flags; |
847 | |
848 | #ifdef CONFIG_PREEMPT_RCU |
849 | int rcu_read_lock_nesting; |
850 | union rcu_special rcu_read_unlock_special; |
851 | struct list_head rcu_node_entry; |
852 | struct rcu_node *rcu_blocked_node; |
853 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ |
854 | |
855 | #ifdef CONFIG_TASKS_RCU |
856 | unsigned long rcu_tasks_nvcsw; |
857 | u8 rcu_tasks_holdout; |
858 | u8 rcu_tasks_idx; |
859 | int rcu_tasks_idle_cpu; |
860 | struct list_head rcu_tasks_holdout_list; |
861 | int rcu_tasks_exit_cpu; |
862 | struct list_head rcu_tasks_exit_list; |
863 | #endif /* #ifdef CONFIG_TASKS_RCU */ |
864 | |
865 | #ifdef CONFIG_TASKS_TRACE_RCU |
866 | int trc_reader_nesting; |
867 | int trc_ipi_to_cpu; |
868 | union rcu_special trc_reader_special; |
869 | struct list_head trc_holdout_list; |
870 | struct list_head trc_blkd_node; |
871 | int trc_blkd_cpu; |
872 | #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ |
873 | |
874 | struct sched_info sched_info; |
875 | |
876 | struct list_head tasks; |
877 | #ifdef CONFIG_SMP |
878 | struct plist_node pushable_tasks; |
879 | struct rb_node pushable_dl_tasks; |
880 | #endif |
881 | |
882 | struct mm_struct *mm; |
883 | struct mm_struct *active_mm; |
884 | struct address_space *faults_disabled_mapping; |
885 | |
886 | int exit_state; |
887 | int exit_code; |
888 | int exit_signal; |
889 | /* The signal sent when the parent dies: */ |
890 | int pdeath_signal; |
891 | /* JOBCTL_*, siglock protected: */ |
892 | unsigned long jobctl; |
893 | |
894 | /* Used for emulating ABI behavior of previous Linux versions: */ |
895 | unsigned int personality; |
896 | |
897 | /* Scheduler bits, serialized by scheduler locks: */ |
898 | unsigned sched_reset_on_fork:1; |
899 | unsigned sched_contributes_to_load:1; |
900 | unsigned sched_migrated:1; |
901 | |
902 | /* Force alignment to the next boundary: */ |
903 | unsigned :0; |
904 | |
905 | /* Unserialized, strictly 'current' */ |
906 | |
907 | /* |
908 | * This field must not be in the scheduler word above due to wakelist |
909 | * queueing no longer being serialized by p->on_cpu. However: |
910 | * |
911 | * p->XXX = X; ttwu() |
912 | * schedule() if (p->on_rq && ..) // false |
913 | * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true |
914 | * deactivate_task() ttwu_queue_wakelist()) |
915 | * p->on_rq = 0; p->sched_remote_wakeup = Y; |
916 | * |
917 | * guarantees all stores of 'current' are visible before |
918 | * ->sched_remote_wakeup gets used, so it can be in this word. |
919 | */ |
920 | unsigned sched_remote_wakeup:1; |
921 | #ifdef CONFIG_RT_MUTEXES |
922 | unsigned sched_rt_mutex:1; |
923 | #endif |
924 | |
925 | /* Bit to tell TOMOYO we're in execve(): */ |
926 | unsigned in_execve:1; |
927 | unsigned in_iowait:1; |
928 | #ifndef TIF_RESTORE_SIGMASK |
929 | unsigned restore_sigmask:1; |
930 | #endif |
931 | #ifdef CONFIG_MEMCG |
932 | unsigned in_user_fault:1; |
933 | #endif |
934 | #ifdef CONFIG_LRU_GEN |
935 | /* whether the LRU algorithm may apply to this access */ |
936 | unsigned in_lru_fault:1; |
937 | #endif |
938 | #ifdef CONFIG_COMPAT_BRK |
939 | unsigned brk_randomized:1; |
940 | #endif |
941 | #ifdef CONFIG_CGROUPS |
942 | /* disallow userland-initiated cgroup migration */ |
943 | unsigned no_cgroup_migration:1; |
944 | /* task is frozen/stopped (used by the cgroup freezer) */ |
945 | unsigned frozen:1; |
946 | #endif |
947 | #ifdef CONFIG_BLK_CGROUP |
948 | unsigned use_memdelay:1; |
949 | #endif |
950 | #ifdef CONFIG_PSI |
951 | /* Stalled due to lack of memory */ |
952 | unsigned in_memstall:1; |
953 | #endif |
954 | #ifdef CONFIG_PAGE_OWNER |
955 | /* Used by page_owner=on to detect recursion in page tracking. */ |
956 | unsigned in_page_owner:1; |
957 | #endif |
958 | #ifdef CONFIG_EVENTFD |
959 | /* Recursion prevention for eventfd_signal() */ |
960 | unsigned in_eventfd:1; |
961 | #endif |
962 | #ifdef CONFIG_ARCH_HAS_CPU_PASID |
963 | unsigned pasid_activated:1; |
964 | #endif |
965 | #ifdef CONFIG_CPU_SUP_INTEL |
966 | unsigned reported_split_lock:1; |
967 | #endif |
968 | #ifdef CONFIG_TASK_DELAY_ACCT |
969 | /* delay due to memory thrashing */ |
970 | unsigned in_thrashing:1; |
971 | #endif |
972 | |
973 | unsigned long atomic_flags; /* Flags requiring atomic access. */ |
974 | |
975 | struct restart_block restart_block; |
976 | |
977 | pid_t pid; |
978 | pid_t tgid; |
979 | |
980 | #ifdef CONFIG_STACKPROTECTOR |
981 | /* Canary value for the -fstack-protector GCC feature: */ |
982 | unsigned long stack_canary; |
983 | #endif |
984 | /* |
985 | * Pointers to the (original) parent process, youngest child, younger sibling, |
986 | * older sibling, respectively. (p->father can be replaced with |
987 | * p->real_parent->pid) |
988 | */ |
989 | |
990 | /* Real parent process: */ |
991 | struct task_struct __rcu *real_parent; |
992 | |
993 | /* Recipient of SIGCHLD, wait4() reports: */ |
994 | struct task_struct __rcu *parent; |
995 | |
996 | /* |
997 | * Children/sibling form the list of natural children: |
998 | */ |
999 | struct list_head children; |
1000 | struct list_head sibling; |
1001 | struct task_struct *group_leader; |
1002 | |
1003 | /* |
1004 | * 'ptraced' is the list of tasks this task is using ptrace() on. |
1005 | * |
1006 | * This includes both natural children and PTRACE_ATTACH targets. |
1007 | * 'ptrace_entry' is this task's link on the p->parent->ptraced list. |
1008 | */ |
1009 | struct list_head ptraced; |
1010 | struct list_head ptrace_entry; |
1011 | |
1012 | /* PID/PID hash table linkage. */ |
1013 | struct pid *thread_pid; |
1014 | struct hlist_node pid_links[PIDTYPE_MAX]; |
1015 | struct list_head thread_node; |
1016 | |
1017 | struct completion *vfork_done; |
1018 | |
1019 | /* CLONE_CHILD_SETTID: */ |
1020 | int __user *set_child_tid; |
1021 | |
1022 | /* CLONE_CHILD_CLEARTID: */ |
1023 | int __user *clear_child_tid; |
1024 | |
1025 | /* PF_KTHREAD | PF_IO_WORKER */ |
1026 | void *worker_private; |
1027 | |
1028 | u64 utime; |
1029 | u64 stime; |
1030 | #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME |
1031 | u64 utimescaled; |
1032 | u64 stimescaled; |
1033 | #endif |
1034 | u64 gtime; |
1035 | struct prev_cputime prev_cputime; |
1036 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN |
1037 | struct vtime vtime; |
1038 | #endif |
1039 | |
1040 | #ifdef CONFIG_NO_HZ_FULL |
1041 | atomic_t tick_dep_mask; |
1042 | #endif |
1043 | /* Context switch counts: */ |
1044 | unsigned long nvcsw; |
1045 | unsigned long nivcsw; |
1046 | |
1047 | /* Monotonic time in nsecs: */ |
1048 | u64 start_time; |
1049 | |
1050 | /* Boot based time in nsecs: */ |
1051 | u64 start_boottime; |
1052 | |
1053 | /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */ |
1054 | unsigned long min_flt; |
1055 | unsigned long maj_flt; |
1056 | |
1057 | /* Empty if CONFIG_POSIX_CPUTIMERS=n */ |
1058 | struct posix_cputimers posix_cputimers; |
1059 | |
1060 | #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK |
1061 | struct posix_cputimers_work posix_cputimers_work; |
1062 | #endif |
1063 | |
1064 | /* Process credentials: */ |
1065 | |
1066 | /* Tracer's credentials at attach: */ |
1067 | const struct cred __rcu *ptracer_cred; |
1068 | |
1069 | /* Objective and real subjective task credentials (COW): */ |
1070 | const struct cred __rcu *real_cred; |
1071 | |
1072 | /* Effective (overridable) subjective task credentials (COW): */ |
1073 | const struct cred __rcu *cred; |
1074 | |
1075 | #ifdef CONFIG_KEYS |
1076 | /* Cached requested key. */ |
1077 | struct key *cached_requested_key; |
1078 | #endif |
1079 | |
1080 | /* |
1081 | * executable name, excluding path. |
1082 | * |
1083 | * - normally initialized setup_new_exec() |
1084 | * - access it with [gs]et_task_comm() |
1085 | * - lock it with task_lock() |
1086 | */ |
1087 | char comm[TASK_COMM_LEN]; |
1088 | |
1089 | struct nameidata *nameidata; |
1090 | |
1091 | #ifdef CONFIG_SYSVIPC |
1092 | struct sysv_sem sysvsem; |
1093 | struct sysv_shm sysvshm; |
1094 | #endif |
1095 | #ifdef CONFIG_DETECT_HUNG_TASK |
1096 | unsigned long last_switch_count; |
1097 | unsigned long last_switch_time; |
1098 | #endif |
1099 | /* Filesystem information: */ |
1100 | struct fs_struct *fs; |
1101 | |
1102 | /* Open file information: */ |
1103 | struct files_struct *files; |
1104 | |
1105 | #ifdef CONFIG_IO_URING |
1106 | struct io_uring_task *io_uring; |
1107 | #endif |
1108 | |
1109 | /* Namespaces: */ |
1110 | struct nsproxy *nsproxy; |
1111 | |
1112 | /* Signal handlers: */ |
1113 | struct signal_struct *signal; |
1114 | struct sighand_struct __rcu *sighand; |
1115 | sigset_t blocked; |
1116 | sigset_t real_blocked; |
1117 | /* Restored if set_restore_sigmask() was used: */ |
1118 | sigset_t saved_sigmask; |
1119 | struct sigpending pending; |
1120 | unsigned long sas_ss_sp; |
1121 | size_t sas_ss_size; |
1122 | unsigned int sas_ss_flags; |
1123 | |
1124 | struct callback_head *task_works; |
1125 | |
1126 | #ifdef CONFIG_AUDIT |
1127 | #ifdef CONFIG_AUDITSYSCALL |
1128 | struct audit_context *audit_context; |
1129 | #endif |
1130 | kuid_t loginuid; |
1131 | unsigned int sessionid; |
1132 | #endif |
1133 | struct seccomp seccomp; |
1134 | struct syscall_user_dispatch syscall_dispatch; |
1135 | |
1136 | /* Thread group tracking: */ |
1137 | u64 parent_exec_id; |
1138 | u64 self_exec_id; |
1139 | |
1140 | /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ |
1141 | spinlock_t alloc_lock; |
1142 | |
1143 | /* Protection of the PI data structures: */ |
1144 | raw_spinlock_t pi_lock; |
1145 | |
1146 | struct wake_q_node wake_q; |
1147 | |
1148 | #ifdef CONFIG_RT_MUTEXES |
1149 | /* PI waiters blocked on a rt_mutex held by this task: */ |
1150 | struct rb_root_cached pi_waiters; |
1151 | /* Updated under owner's pi_lock and rq lock */ |
1152 | struct task_struct *pi_top_task; |
1153 | /* Deadlock detection and priority inheritance handling: */ |
1154 | struct rt_mutex_waiter *pi_blocked_on; |
1155 | #endif |
1156 | |
1157 | #ifdef CONFIG_DEBUG_MUTEXES |
1158 | /* Mutex deadlock detection: */ |
1159 | struct mutex_waiter *blocked_on; |
1160 | #endif |
1161 | |
1162 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
1163 | int non_block_count; |
1164 | #endif |
1165 | |
1166 | #ifdef CONFIG_TRACE_IRQFLAGS |
1167 | struct irqtrace_events irqtrace; |
1168 | unsigned int hardirq_threaded; |
1169 | u64 hardirq_chain_key; |
1170 | int softirqs_enabled; |
1171 | int softirq_context; |
1172 | int irq_config; |
1173 | #endif |
1174 | #ifdef CONFIG_PREEMPT_RT |
1175 | int softirq_disable_cnt; |
1176 | #endif |
1177 | |
1178 | #ifdef CONFIG_LOCKDEP |
1179 | # define MAX_LOCK_DEPTH 48UL |
1180 | u64 curr_chain_key; |
1181 | int lockdep_depth; |
1182 | unsigned int lockdep_recursion; |
1183 | struct held_lock held_locks[MAX_LOCK_DEPTH]; |
1184 | #endif |
1185 | |
1186 | #if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP) |
1187 | unsigned int in_ubsan; |
1188 | #endif |
1189 | |
1190 | /* Journalling filesystem info: */ |
1191 | void *journal_info; |
1192 | |
1193 | /* Stacked block device info: */ |
1194 | struct bio_list *bio_list; |
1195 | |
1196 | /* Stack plugging: */ |
1197 | struct blk_plug *plug; |
1198 | |
1199 | /* VM state: */ |
1200 | struct reclaim_state *reclaim_state; |
1201 | |
1202 | struct io_context *io_context; |
1203 | |
1204 | #ifdef CONFIG_COMPACTION |
1205 | struct capture_control *capture_control; |
1206 | #endif |
1207 | /* Ptrace state: */ |
1208 | unsigned long ptrace_message; |
1209 | kernel_siginfo_t *last_siginfo; |
1210 | |
1211 | struct task_io_accounting ioac; |
1212 | #ifdef CONFIG_PSI |
1213 | /* Pressure stall state */ |
1214 | unsigned int psi_flags; |
1215 | #endif |
1216 | #ifdef CONFIG_TASK_XACCT |
1217 | /* Accumulated RSS usage: */ |
1218 | u64 ; |
1219 | /* Accumulated virtual memory usage: */ |
1220 | u64 acct_vm_mem1; |
1221 | /* stime + utime since last update: */ |
1222 | u64 acct_timexpd; |
1223 | #endif |
1224 | #ifdef CONFIG_CPUSETS |
1225 | /* Protected by ->alloc_lock: */ |
1226 | nodemask_t mems_allowed; |
1227 | /* Sequence number to catch updates: */ |
1228 | seqcount_spinlock_t mems_allowed_seq; |
1229 | int cpuset_mem_spread_rotor; |
1230 | int cpuset_slab_spread_rotor; |
1231 | #endif |
1232 | #ifdef CONFIG_CGROUPS |
1233 | /* Control Group info protected by css_set_lock: */ |
1234 | struct css_set __rcu *cgroups; |
1235 | /* cg_list protected by css_set_lock and tsk->alloc_lock: */ |
1236 | struct list_head cg_list; |
1237 | #endif |
1238 | #ifdef CONFIG_X86_CPU_RESCTRL |
1239 | u32 closid; |
1240 | u32 rmid; |
1241 | #endif |
1242 | #ifdef CONFIG_FUTEX |
1243 | struct robust_list_head __user *robust_list; |
1244 | #ifdef CONFIG_COMPAT |
1245 | struct compat_robust_list_head __user *compat_robust_list; |
1246 | #endif |
1247 | struct list_head pi_state_list; |
1248 | struct futex_pi_state *pi_state_cache; |
1249 | struct mutex futex_exit_mutex; |
1250 | unsigned int futex_state; |
1251 | #endif |
1252 | #ifdef CONFIG_PERF_EVENTS |
1253 | struct perf_event_context *perf_event_ctxp; |
1254 | struct mutex perf_event_mutex; |
1255 | struct list_head perf_event_list; |
1256 | #endif |
1257 | #ifdef CONFIG_DEBUG_PREEMPT |
1258 | unsigned long preempt_disable_ip; |
1259 | #endif |
1260 | #ifdef CONFIG_NUMA |
1261 | /* Protected by alloc_lock: */ |
1262 | struct mempolicy *mempolicy; |
1263 | short il_prev; |
1264 | u8 il_weight; |
1265 | short pref_node_fork; |
1266 | #endif |
1267 | #ifdef CONFIG_NUMA_BALANCING |
1268 | int numa_scan_seq; |
1269 | unsigned int numa_scan_period; |
1270 | unsigned int numa_scan_period_max; |
1271 | int numa_preferred_nid; |
1272 | unsigned long numa_migrate_retry; |
1273 | /* Migration stamp: */ |
1274 | u64 node_stamp; |
1275 | u64 last_task_numa_placement; |
1276 | u64 last_sum_exec_runtime; |
1277 | struct callback_head numa_work; |
1278 | |
1279 | /* |
1280 | * This pointer is only modified for current in syscall and |
1281 | * pagefault context (and for tasks being destroyed), so it can be read |
1282 | * from any of the following contexts: |
1283 | * - RCU read-side critical section |
1284 | * - current->numa_group from everywhere |
1285 | * - task's runqueue locked, task not running |
1286 | */ |
1287 | struct numa_group __rcu *numa_group; |
1288 | |
1289 | /* |
1290 | * numa_faults is an array split into four regions: |
1291 | * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer |
1292 | * in this precise order. |
1293 | * |
1294 | * faults_memory: Exponential decaying average of faults on a per-node |
1295 | * basis. Scheduling placement decisions are made based on these |
1296 | * counts. The values remain static for the duration of a PTE scan. |
1297 | * faults_cpu: Track the nodes the process was running on when a NUMA |
1298 | * hinting fault was incurred. |
1299 | * faults_memory_buffer and faults_cpu_buffer: Record faults per node |
1300 | * during the current scan window. When the scan completes, the counts |
1301 | * in faults_memory and faults_cpu decay and these values are copied. |
1302 | */ |
1303 | unsigned long *numa_faults; |
1304 | unsigned long total_numa_faults; |
1305 | |
1306 | /* |
1307 | * numa_faults_locality tracks if faults recorded during the last |
1308 | * scan window were remote/local or failed to migrate. The task scan |
1309 | * period is adapted based on the locality of the faults with different |
1310 | * weights depending on whether they were shared or private faults |
1311 | */ |
1312 | unsigned long numa_faults_locality[3]; |
1313 | |
1314 | unsigned long numa_pages_migrated; |
1315 | #endif /* CONFIG_NUMA_BALANCING */ |
1316 | |
1317 | #ifdef CONFIG_RSEQ |
1318 | struct rseq __user *rseq; |
1319 | u32 rseq_len; |
1320 | u32 rseq_sig; |
1321 | /* |
1322 | * RmW on rseq_event_mask must be performed atomically |
1323 | * with respect to preemption. |
1324 | */ |
1325 | unsigned long rseq_event_mask; |
1326 | #endif |
1327 | |
1328 | #ifdef CONFIG_SCHED_MM_CID |
1329 | int mm_cid; /* Current cid in mm */ |
1330 | int last_mm_cid; /* Most recent cid in mm */ |
1331 | int migrate_from_cpu; |
1332 | int mm_cid_active; /* Whether cid bitmap is active */ |
1333 | struct callback_head cid_work; |
1334 | #endif |
1335 | |
1336 | struct tlbflush_unmap_batch tlb_ubc; |
1337 | |
1338 | /* Cache last used pipe for splice(): */ |
1339 | struct pipe_inode_info *splice_pipe; |
1340 | |
1341 | struct page_frag task_frag; |
1342 | |
1343 | #ifdef CONFIG_TASK_DELAY_ACCT |
1344 | struct task_delay_info *delays; |
1345 | #endif |
1346 | |
1347 | #ifdef CONFIG_FAULT_INJECTION |
1348 | int make_it_fail; |
1349 | unsigned int fail_nth; |
1350 | #endif |
1351 | /* |
1352 | * When (nr_dirtied >= nr_dirtied_pause), it's time to call |
1353 | * balance_dirty_pages() for a dirty throttling pause: |
1354 | */ |
1355 | int nr_dirtied; |
1356 | int nr_dirtied_pause; |
1357 | /* Start of a write-and-pause period: */ |
1358 | unsigned long dirty_paused_when; |
1359 | |
1360 | #ifdef CONFIG_LATENCYTOP |
1361 | int latency_record_count; |
1362 | struct latency_record latency_record[LT_SAVECOUNT]; |
1363 | #endif |
1364 | /* |
1365 | * Time slack values; these are used to round up poll() and |
1366 | * select() etc timeout values. These are in nanoseconds. |
1367 | */ |
1368 | u64 timer_slack_ns; |
1369 | u64 default_timer_slack_ns; |
1370 | |
1371 | #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) |
1372 | unsigned int kasan_depth; |
1373 | #endif |
1374 | |
1375 | #ifdef CONFIG_KCSAN |
1376 | struct kcsan_ctx kcsan_ctx; |
1377 | #ifdef CONFIG_TRACE_IRQFLAGS |
1378 | struct irqtrace_events kcsan_save_irqtrace; |
1379 | #endif |
1380 | #ifdef CONFIG_KCSAN_WEAK_MEMORY |
1381 | int kcsan_stack_depth; |
1382 | #endif |
1383 | #endif |
1384 | |
1385 | #ifdef CONFIG_KMSAN |
1386 | struct kmsan_ctx kmsan_ctx; |
1387 | #endif |
1388 | |
1389 | #if IS_ENABLED(CONFIG_KUNIT) |
1390 | struct kunit *kunit_test; |
1391 | #endif |
1392 | |
1393 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1394 | /* Index of current stored address in ret_stack: */ |
1395 | int curr_ret_stack; |
1396 | int curr_ret_depth; |
1397 | |
1398 | /* Stack of return addresses for return function tracing: */ |
1399 | struct ftrace_ret_stack *ret_stack; |
1400 | |
1401 | /* Timestamp for last schedule: */ |
1402 | unsigned long long ftrace_timestamp; |
1403 | |
1404 | /* |
1405 | * Number of functions that haven't been traced |
1406 | * because of depth overrun: |
1407 | */ |
1408 | atomic_t trace_overrun; |
1409 | |
1410 | /* Pause tracing: */ |
1411 | atomic_t tracing_graph_pause; |
1412 | #endif |
1413 | |
1414 | #ifdef CONFIG_TRACING |
1415 | /* Bitmask and counter of trace recursion: */ |
1416 | unsigned long trace_recursion; |
1417 | #endif /* CONFIG_TRACING */ |
1418 | |
1419 | #ifdef CONFIG_KCOV |
1420 | /* See kernel/kcov.c for more details. */ |
1421 | |
1422 | /* Coverage collection mode enabled for this task (0 if disabled): */ |
1423 | unsigned int kcov_mode; |
1424 | |
1425 | /* Size of the kcov_area: */ |
1426 | unsigned int kcov_size; |
1427 | |
1428 | /* Buffer for coverage collection: */ |
1429 | void *kcov_area; |
1430 | |
1431 | /* KCOV descriptor wired with this task or NULL: */ |
1432 | struct kcov *kcov; |
1433 | |
1434 | /* KCOV common handle for remote coverage collection: */ |
1435 | u64 kcov_handle; |
1436 | |
1437 | /* KCOV sequence number: */ |
1438 | int kcov_sequence; |
1439 | |
1440 | /* Collect coverage from softirq context: */ |
1441 | unsigned int kcov_softirq; |
1442 | #endif |
1443 | |
1444 | #ifdef CONFIG_MEMCG |
1445 | struct mem_cgroup *memcg_in_oom; |
1446 | gfp_t memcg_oom_gfp_mask; |
1447 | int memcg_oom_order; |
1448 | |
1449 | /* Number of pages to reclaim on returning to userland: */ |
1450 | unsigned int memcg_nr_pages_over_high; |
1451 | |
1452 | /* Used by memcontrol for targeted memcg charge: */ |
1453 | struct mem_cgroup *active_memcg; |
1454 | #endif |
1455 | |
1456 | #ifdef CONFIG_MEMCG_KMEM |
1457 | struct obj_cgroup *objcg; |
1458 | #endif |
1459 | |
1460 | #ifdef CONFIG_BLK_CGROUP |
1461 | struct gendisk *throttle_disk; |
1462 | #endif |
1463 | |
1464 | #ifdef CONFIG_UPROBES |
1465 | struct uprobe_task *utask; |
1466 | #endif |
1467 | #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) |
1468 | unsigned int sequential_io; |
1469 | unsigned int sequential_io_avg; |
1470 | #endif |
1471 | struct kmap_ctrl kmap_ctrl; |
1472 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
1473 | unsigned long task_state_change; |
1474 | # ifdef CONFIG_PREEMPT_RT |
1475 | unsigned long saved_state_change; |
1476 | # endif |
1477 | #endif |
1478 | struct rcu_head rcu; |
1479 | refcount_t rcu_users; |
1480 | int pagefault_disabled; |
1481 | #ifdef CONFIG_MMU |
1482 | struct task_struct *oom_reaper_list; |
1483 | struct timer_list oom_reaper_timer; |
1484 | #endif |
1485 | #ifdef CONFIG_VMAP_STACK |
1486 | struct vm_struct *stack_vm_area; |
1487 | #endif |
1488 | #ifdef CONFIG_THREAD_INFO_IN_TASK |
1489 | /* A live task holds one reference: */ |
1490 | refcount_t stack_refcount; |
1491 | #endif |
1492 | #ifdef CONFIG_LIVEPATCH |
1493 | int patch_state; |
1494 | #endif |
1495 | #ifdef CONFIG_SECURITY |
1496 | /* Used by LSM modules for access restriction: */ |
1497 | void *security; |
1498 | #endif |
1499 | #ifdef CONFIG_BPF_SYSCALL |
1500 | /* Used by BPF task local storage */ |
1501 | struct bpf_local_storage __rcu *bpf_storage; |
1502 | /* Used for BPF run context */ |
1503 | struct bpf_run_ctx *bpf_ctx; |
1504 | #endif |
1505 | |
1506 | #ifdef CONFIG_GCC_PLUGIN_STACKLEAK |
1507 | unsigned long lowest_stack; |
1508 | unsigned long prev_lowest_stack; |
1509 | #endif |
1510 | |
1511 | #ifdef CONFIG_X86_MCE |
1512 | void __user *mce_vaddr; |
1513 | __u64 mce_kflags; |
1514 | u64 mce_addr; |
1515 | __u64 mce_ripv : 1, |
1516 | mce_whole_page : 1, |
1517 | __mce_reserved : 62; |
1518 | struct callback_head mce_kill_me; |
1519 | int mce_count; |
1520 | #endif |
1521 | |
1522 | #ifdef CONFIG_KRETPROBES |
1523 | struct llist_head kretprobe_instances; |
1524 | #endif |
1525 | #ifdef CONFIG_RETHOOK |
1526 | struct llist_head rethooks; |
1527 | #endif |
1528 | |
1529 | #ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH |
1530 | /* |
1531 | * If L1D flush is supported on mm context switch |
1532 | * then we use this callback head to queue kill work |
1533 | * to kill tasks that are not running on SMT disabled |
1534 | * cores |
1535 | */ |
1536 | struct callback_head l1d_flush_kill; |
1537 | #endif |
1538 | |
1539 | #ifdef CONFIG_RV |
1540 | /* |
1541 | * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS. |
1542 | * If we find justification for more monitors, we can think |
1543 | * about adding more or developing a dynamic method. So far, |
1544 | * none of these are justified. |
1545 | */ |
1546 | union rv_task_monitor rv[RV_PER_TASK_MONITORS]; |
1547 | #endif |
1548 | |
1549 | #ifdef CONFIG_USER_EVENTS |
1550 | struct user_event_mm *user_event_mm; |
1551 | #endif |
1552 | |
1553 | /* |
1554 | * New fields for task_struct should be added above here, so that |
1555 | * they are included in the randomized portion of task_struct. |
1556 | */ |
1557 | randomized_struct_fields_end |
1558 | |
1559 | /* CPU-specific state of this task: */ |
1560 | struct thread_struct thread; |
1561 | |
1562 | /* |
1563 | * WARNING: on x86, 'thread_struct' contains a variable-sized |
1564 | * structure. It *MUST* be at the end of 'task_struct'. |
1565 | * |
1566 | * Do not put anything below here! |
1567 | */ |
1568 | }; |
1569 | |
1570 | #define TASK_REPORT_IDLE (TASK_REPORT + 1) |
1571 | #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) |
1572 | |
1573 | static inline unsigned int __task_state_index(unsigned int tsk_state, |
1574 | unsigned int tsk_exit_state) |
1575 | { |
1576 | unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT; |
1577 | |
1578 | BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); |
1579 | |
1580 | if ((tsk_state & TASK_IDLE) == TASK_IDLE) |
1581 | state = TASK_REPORT_IDLE; |
1582 | |
1583 | /* |
1584 | * We're lying here, but rather than expose a completely new task state |
1585 | * to userspace, we can make this appear as if the task has gone through |
1586 | * a regular rt_mutex_lock() call. |
1587 | */ |
1588 | if (tsk_state & TASK_RTLOCK_WAIT) |
1589 | state = TASK_UNINTERRUPTIBLE; |
1590 | |
1591 | return fls(x: state); |
1592 | } |
1593 | |
1594 | static inline unsigned int task_state_index(struct task_struct *tsk) |
1595 | { |
1596 | return __task_state_index(READ_ONCE(tsk->__state), tsk_exit_state: tsk->exit_state); |
1597 | } |
1598 | |
1599 | static inline char task_index_to_char(unsigned int state) |
1600 | { |
1601 | static const char state_char[] = "RSDTtXZPI" ; |
1602 | |
1603 | BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1); |
1604 | |
1605 | return state_char[state]; |
1606 | } |
1607 | |
1608 | static inline char task_state_to_char(struct task_struct *tsk) |
1609 | { |
1610 | return task_index_to_char(state: task_state_index(tsk)); |
1611 | } |
1612 | |
1613 | extern struct pid *cad_pid; |
1614 | |
1615 | /* |
1616 | * Per process flags |
1617 | */ |
1618 | #define PF_VCPU 0x00000001 /* I'm a virtual CPU */ |
1619 | #define PF_IDLE 0x00000002 /* I am an IDLE thread */ |
1620 | #define PF_EXITING 0x00000004 /* Getting shut down */ |
1621 | #define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */ |
1622 | #define PF_IO_WORKER 0x00000010 /* Task is an IO worker */ |
1623 | #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ |
1624 | #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ |
1625 | #define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */ |
1626 | #define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ |
1627 | #define PF_DUMPCORE 0x00000200 /* Dumped core */ |
1628 | #define PF_SIGNALED 0x00000400 /* Killed by a signal */ |
1629 | #define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */ |
1630 | #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ |
1631 | #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ |
1632 | #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ |
1633 | #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ |
1634 | #define PF__HOLE__00010000 0x00010000 |
1635 | #define PF_KSWAPD 0x00020000 /* I am kswapd */ |
1636 | #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ |
1637 | #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ |
1638 | #define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to, |
1639 | * I am cleaning dirty pages from some other bdi. */ |
1640 | #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ |
1641 | #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ |
1642 | #define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */ |
1643 | #define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */ |
1644 | #define PF__HOLE__02000000 0x02000000 |
1645 | #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ |
1646 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ |
1647 | #define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning. |
1648 | * See memalloc_pin_save() */ |
1649 | #define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */ |
1650 | #define PF__HOLE__40000000 0x40000000 |
1651 | #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ |
1652 | |
1653 | /* |
1654 | * Only the _current_ task can read/write to tsk->flags, but other |
1655 | * tasks can access tsk->flags in readonly mode for example |
1656 | * with tsk_used_math (like during threaded core dumping). |
1657 | * There is however an exception to this rule during ptrace |
1658 | * or during fork: the ptracer task is allowed to write to the |
1659 | * child->flags of its traced child (same goes for fork, the parent |
1660 | * can write to the child->flags), because we're guaranteed the |
1661 | * child is not running and in turn not changing child->flags |
1662 | * at the same time the parent does it. |
1663 | */ |
1664 | #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) |
1665 | #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) |
1666 | #define clear_used_math() clear_stopped_child_used_math(current) |
1667 | #define set_used_math() set_stopped_child_used_math(current) |
1668 | |
1669 | #define conditional_stopped_child_used_math(condition, child) \ |
1670 | do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) |
1671 | |
1672 | #define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current) |
1673 | |
1674 | #define copy_to_stopped_child_used_math(child) \ |
1675 | do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) |
1676 | |
1677 | /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ |
1678 | #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) |
1679 | #define used_math() tsk_used_math(current) |
1680 | |
1681 | static __always_inline bool is_percpu_thread(void) |
1682 | { |
1683 | #ifdef CONFIG_SMP |
1684 | return (current->flags & PF_NO_SETAFFINITY) && |
1685 | (current->nr_cpus_allowed == 1); |
1686 | #else |
1687 | return true; |
1688 | #endif |
1689 | } |
1690 | |
1691 | /* Per-process atomic flags. */ |
1692 | #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ |
1693 | #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ |
1694 | #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ |
1695 | #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ |
1696 | #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ |
1697 | #define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ |
1698 | #define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ |
1699 | #define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */ |
1700 | |
1701 | #define TASK_PFA_TEST(name, func) \ |
1702 | static inline bool task_##func(struct task_struct *p) \ |
1703 | { return test_bit(PFA_##name, &p->atomic_flags); } |
1704 | |
1705 | #define TASK_PFA_SET(name, func) \ |
1706 | static inline void task_set_##func(struct task_struct *p) \ |
1707 | { set_bit(PFA_##name, &p->atomic_flags); } |
1708 | |
1709 | #define TASK_PFA_CLEAR(name, func) \ |
1710 | static inline void task_clear_##func(struct task_struct *p) \ |
1711 | { clear_bit(PFA_##name, &p->atomic_flags); } |
1712 | |
1713 | TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) |
1714 | TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) |
1715 | |
1716 | TASK_PFA_TEST(SPREAD_PAGE, spread_page) |
1717 | TASK_PFA_SET(SPREAD_PAGE, spread_page) |
1718 | TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) |
1719 | |
1720 | TASK_PFA_TEST(SPREAD_SLAB, spread_slab) |
1721 | TASK_PFA_SET(SPREAD_SLAB, spread_slab) |
1722 | TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) |
1723 | |
1724 | TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) |
1725 | TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) |
1726 | TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) |
1727 | |
1728 | TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec) |
1729 | TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec) |
1730 | TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec) |
1731 | |
1732 | TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) |
1733 | TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) |
1734 | |
1735 | TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) |
1736 | TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) |
1737 | TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) |
1738 | |
1739 | TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) |
1740 | TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) |
1741 | |
1742 | static inline void |
1743 | current_restore_flags(unsigned long orig_flags, unsigned long flags) |
1744 | { |
1745 | current->flags &= ~flags; |
1746 | current->flags |= orig_flags & flags; |
1747 | } |
1748 | |
1749 | extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); |
1750 | extern int task_can_attach(struct task_struct *p); |
1751 | extern int dl_bw_alloc(int cpu, u64 dl_bw); |
1752 | extern void dl_bw_free(int cpu, u64 dl_bw); |
1753 | #ifdef CONFIG_SMP |
1754 | |
1755 | /* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */ |
1756 | extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); |
1757 | |
1758 | /** |
1759 | * set_cpus_allowed_ptr - set CPU affinity mask of a task |
1760 | * @p: the task |
1761 | * @new_mask: CPU affinity mask |
1762 | * |
1763 | * Return: zero if successful, or a negative error code |
1764 | */ |
1765 | extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); |
1766 | extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); |
1767 | extern void release_user_cpus_ptr(struct task_struct *p); |
1768 | extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask); |
1769 | extern void force_compatible_cpus_allowed_ptr(struct task_struct *p); |
1770 | extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p); |
1771 | #else |
1772 | static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
1773 | { |
1774 | } |
1775 | static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) |
1776 | { |
1777 | if (!cpumask_test_cpu(0, new_mask)) |
1778 | return -EINVAL; |
1779 | return 0; |
1780 | } |
1781 | static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) |
1782 | { |
1783 | if (src->user_cpus_ptr) |
1784 | return -EINVAL; |
1785 | return 0; |
1786 | } |
1787 | static inline void release_user_cpus_ptr(struct task_struct *p) |
1788 | { |
1789 | WARN_ON(p->user_cpus_ptr); |
1790 | } |
1791 | |
1792 | static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) |
1793 | { |
1794 | return 0; |
1795 | } |
1796 | #endif |
1797 | |
1798 | extern int yield_to(struct task_struct *p, bool preempt); |
1799 | extern void set_user_nice(struct task_struct *p, long nice); |
1800 | extern int task_prio(const struct task_struct *p); |
1801 | |
1802 | /** |
1803 | * task_nice - return the nice value of a given task. |
1804 | * @p: the task in question. |
1805 | * |
1806 | * Return: The nice value [ -20 ... 0 ... 19 ]. |
1807 | */ |
1808 | static inline int task_nice(const struct task_struct *p) |
1809 | { |
1810 | return PRIO_TO_NICE((p)->static_prio); |
1811 | } |
1812 | |
1813 | extern int can_nice(const struct task_struct *p, const int nice); |
1814 | extern int task_curr(const struct task_struct *p); |
1815 | extern int idle_cpu(int cpu); |
1816 | extern int available_idle_cpu(int cpu); |
1817 | extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); |
1818 | extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); |
1819 | extern void sched_set_fifo(struct task_struct *p); |
1820 | extern void sched_set_fifo_low(struct task_struct *p); |
1821 | extern void sched_set_normal(struct task_struct *p, int nice); |
1822 | extern int sched_setattr(struct task_struct *, const struct sched_attr *); |
1823 | extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *); |
1824 | extern struct task_struct *idle_task(int cpu); |
1825 | |
1826 | /** |
1827 | * is_idle_task - is the specified task an idle task? |
1828 | * @p: the task in question. |
1829 | * |
1830 | * Return: 1 if @p is an idle task. 0 otherwise. |
1831 | */ |
1832 | static __always_inline bool is_idle_task(const struct task_struct *p) |
1833 | { |
1834 | return !!(p->flags & PF_IDLE); |
1835 | } |
1836 | |
1837 | extern struct task_struct *curr_task(int cpu); |
1838 | extern void ia64_set_curr_task(int cpu, struct task_struct *p); |
1839 | |
1840 | void yield(void); |
1841 | |
1842 | union thread_union { |
1843 | struct task_struct task; |
1844 | #ifndef CONFIG_THREAD_INFO_IN_TASK |
1845 | struct thread_info thread_info; |
1846 | #endif |
1847 | unsigned long stack[THREAD_SIZE/sizeof(long)]; |
1848 | }; |
1849 | |
1850 | #ifndef CONFIG_THREAD_INFO_IN_TASK |
1851 | extern struct thread_info init_thread_info; |
1852 | #endif |
1853 | |
1854 | extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; |
1855 | |
1856 | #ifdef CONFIG_THREAD_INFO_IN_TASK |
1857 | # define task_thread_info(task) (&(task)->thread_info) |
1858 | #elif !defined(__HAVE_THREAD_FUNCTIONS) |
1859 | # define task_thread_info(task) ((struct thread_info *)(task)->stack) |
1860 | #endif |
1861 | |
1862 | /* |
1863 | * find a task by one of its numerical ids |
1864 | * |
1865 | * find_task_by_pid_ns(): |
1866 | * finds a task by its pid in the specified namespace |
1867 | * find_task_by_vpid(): |
1868 | * finds a task by its virtual pid |
1869 | * |
1870 | * see also find_vpid() etc in include/linux/pid.h |
1871 | */ |
1872 | |
1873 | extern struct task_struct *find_task_by_vpid(pid_t nr); |
1874 | extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); |
1875 | |
1876 | /* |
1877 | * find a task by its virtual pid and get the task struct |
1878 | */ |
1879 | extern struct task_struct *find_get_task_by_vpid(pid_t nr); |
1880 | |
1881 | extern int wake_up_state(struct task_struct *tsk, unsigned int state); |
1882 | extern int wake_up_process(struct task_struct *tsk); |
1883 | extern void wake_up_new_task(struct task_struct *tsk); |
1884 | |
1885 | #ifdef CONFIG_SMP |
1886 | extern void kick_process(struct task_struct *tsk); |
1887 | #else |
1888 | static inline void kick_process(struct task_struct *tsk) { } |
1889 | #endif |
1890 | |
1891 | extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); |
1892 | |
1893 | static inline void set_task_comm(struct task_struct *tsk, const char *from) |
1894 | { |
1895 | __set_task_comm(tsk, from, exec: false); |
1896 | } |
1897 | |
1898 | extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); |
1899 | #define get_task_comm(buf, tsk) ({ \ |
1900 | BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN); \ |
1901 | __get_task_comm(buf, sizeof(buf), tsk); \ |
1902 | }) |
1903 | |
1904 | #ifdef CONFIG_SMP |
1905 | static __always_inline void scheduler_ipi(void) |
1906 | { |
1907 | /* |
1908 | * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting |
1909 | * TIF_NEED_RESCHED remotely (for the first time) will also send |
1910 | * this IPI. |
1911 | */ |
1912 | preempt_fold_need_resched(); |
1913 | } |
1914 | #else |
1915 | static inline void scheduler_ipi(void) { } |
1916 | #endif |
1917 | |
1918 | extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); |
1919 | |
1920 | /* |
1921 | * Set thread flags in other task's structures. |
1922 | * See asm/thread_info.h for TIF_xxxx flags available: |
1923 | */ |
1924 | static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) |
1925 | { |
1926 | set_ti_thread_flag(task_thread_info(tsk), flag); |
1927 | } |
1928 | |
1929 | static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag) |
1930 | { |
1931 | clear_ti_thread_flag(task_thread_info(tsk), flag); |
1932 | } |
1933 | |
1934 | static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, |
1935 | bool value) |
1936 | { |
1937 | update_ti_thread_flag(task_thread_info(tsk), flag, value); |
1938 | } |
1939 | |
1940 | static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) |
1941 | { |
1942 | return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); |
1943 | } |
1944 | |
1945 | static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) |
1946 | { |
1947 | return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); |
1948 | } |
1949 | |
1950 | static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) |
1951 | { |
1952 | return test_ti_thread_flag(task_thread_info(tsk), flag); |
1953 | } |
1954 | |
1955 | static inline void set_tsk_need_resched(struct task_struct *tsk) |
1956 | { |
1957 | set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); |
1958 | } |
1959 | |
1960 | static inline void clear_tsk_need_resched(struct task_struct *tsk) |
1961 | { |
1962 | clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); |
1963 | } |
1964 | |
1965 | static inline int test_tsk_need_resched(struct task_struct *tsk) |
1966 | { |
1967 | return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); |
1968 | } |
1969 | |
1970 | /* |
1971 | * cond_resched() and cond_resched_lock(): latency reduction via |
1972 | * explicit rescheduling in places that are safe. The return |
1973 | * value indicates whether a reschedule was done in fact. |
1974 | * cond_resched_lock() will drop the spinlock before scheduling, |
1975 | */ |
1976 | #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) |
1977 | extern int __cond_resched(void); |
1978 | |
1979 | #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) |
1980 | |
1981 | void sched_dynamic_klp_enable(void); |
1982 | void sched_dynamic_klp_disable(void); |
1983 | |
1984 | DECLARE_STATIC_CALL(cond_resched, __cond_resched); |
1985 | |
1986 | static __always_inline int _cond_resched(void) |
1987 | { |
1988 | return static_call_mod(cond_resched)(); |
1989 | } |
1990 | |
1991 | #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) |
1992 | |
1993 | extern int dynamic_cond_resched(void); |
1994 | |
1995 | static __always_inline int _cond_resched(void) |
1996 | { |
1997 | return dynamic_cond_resched(); |
1998 | } |
1999 | |
2000 | #else /* !CONFIG_PREEMPTION */ |
2001 | |
2002 | static inline int _cond_resched(void) |
2003 | { |
2004 | klp_sched_try_switch(); |
2005 | return __cond_resched(); |
2006 | } |
2007 | |
2008 | #endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ |
2009 | |
2010 | #else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */ |
2011 | |
2012 | static inline int _cond_resched(void) |
2013 | { |
2014 | klp_sched_try_switch(); |
2015 | return 0; |
2016 | } |
2017 | |
2018 | #endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */ |
2019 | |
2020 | #define cond_resched() ({ \ |
2021 | __might_resched(__FILE__, __LINE__, 0); \ |
2022 | _cond_resched(); \ |
2023 | }) |
2024 | |
2025 | extern int __cond_resched_lock(spinlock_t *lock); |
2026 | extern int __cond_resched_rwlock_read(rwlock_t *lock); |
2027 | extern int __cond_resched_rwlock_write(rwlock_t *lock); |
2028 | |
2029 | #define MIGHT_RESCHED_RCU_SHIFT 8 |
2030 | #define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) |
2031 | |
2032 | #ifndef CONFIG_PREEMPT_RT |
2033 | /* |
2034 | * Non RT kernels have an elevated preempt count due to the held lock, |
2035 | * but are not allowed to be inside a RCU read side critical section |
2036 | */ |
2037 | # define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET |
2038 | #else |
2039 | /* |
2040 | * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in |
2041 | * cond_resched*lock() has to take that into account because it checks for |
2042 | * preempt_count() and rcu_preempt_depth(). |
2043 | */ |
2044 | # define PREEMPT_LOCK_RESCHED_OFFSETS \ |
2045 | (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT)) |
2046 | #endif |
2047 | |
2048 | #define cond_resched_lock(lock) ({ \ |
2049 | __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ |
2050 | __cond_resched_lock(lock); \ |
2051 | }) |
2052 | |
2053 | #define cond_resched_rwlock_read(lock) ({ \ |
2054 | __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ |
2055 | __cond_resched_rwlock_read(lock); \ |
2056 | }) |
2057 | |
2058 | #define cond_resched_rwlock_write(lock) ({ \ |
2059 | __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ |
2060 | __cond_resched_rwlock_write(lock); \ |
2061 | }) |
2062 | |
2063 | #ifdef CONFIG_PREEMPT_DYNAMIC |
2064 | |
2065 | extern bool preempt_model_none(void); |
2066 | extern bool preempt_model_voluntary(void); |
2067 | extern bool preempt_model_full(void); |
2068 | |
2069 | #else |
2070 | |
2071 | static inline bool preempt_model_none(void) |
2072 | { |
2073 | return IS_ENABLED(CONFIG_PREEMPT_NONE); |
2074 | } |
2075 | static inline bool preempt_model_voluntary(void) |
2076 | { |
2077 | return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY); |
2078 | } |
2079 | static inline bool preempt_model_full(void) |
2080 | { |
2081 | return IS_ENABLED(CONFIG_PREEMPT); |
2082 | } |
2083 | |
2084 | #endif |
2085 | |
2086 | static inline bool preempt_model_rt(void) |
2087 | { |
2088 | return IS_ENABLED(CONFIG_PREEMPT_RT); |
2089 | } |
2090 | |
2091 | /* |
2092 | * Does the preemption model allow non-cooperative preemption? |
2093 | * |
2094 | * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with |
2095 | * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the |
2096 | * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the |
2097 | * PREEMPT_NONE model. |
2098 | */ |
2099 | static inline bool preempt_model_preemptible(void) |
2100 | { |
2101 | return preempt_model_full() || preempt_model_rt(); |
2102 | } |
2103 | |
2104 | static __always_inline bool need_resched(void) |
2105 | { |
2106 | return unlikely(tif_need_resched()); |
2107 | } |
2108 | |
2109 | /* |
2110 | * Wrappers for p->thread_info->cpu access. No-op on UP. |
2111 | */ |
2112 | #ifdef CONFIG_SMP |
2113 | |
2114 | static inline unsigned int task_cpu(const struct task_struct *p) |
2115 | { |
2116 | return READ_ONCE(task_thread_info(p)->cpu); |
2117 | } |
2118 | |
2119 | extern void set_task_cpu(struct task_struct *p, unsigned int cpu); |
2120 | |
2121 | #else |
2122 | |
2123 | static inline unsigned int task_cpu(const struct task_struct *p) |
2124 | { |
2125 | return 0; |
2126 | } |
2127 | |
2128 | static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) |
2129 | { |
2130 | } |
2131 | |
2132 | #endif /* CONFIG_SMP */ |
2133 | |
2134 | extern bool sched_task_on_rq(struct task_struct *p); |
2135 | extern unsigned long get_wchan(struct task_struct *p); |
2136 | extern struct task_struct *cpu_curr_snapshot(int cpu); |
2137 | |
2138 | #include <linux/spinlock.h> |
2139 | |
2140 | /* |
2141 | * In order to reduce various lock holder preemption latencies provide an |
2142 | * interface to see if a vCPU is currently running or not. |
2143 | * |
2144 | * This allows us to terminate optimistic spin loops and block, analogous to |
2145 | * the native optimistic spin heuristic of testing if the lock owner task is |
2146 | * running or not. |
2147 | */ |
2148 | #ifndef vcpu_is_preempted |
2149 | static inline bool vcpu_is_preempted(int cpu) |
2150 | { |
2151 | return false; |
2152 | } |
2153 | #endif |
2154 | |
2155 | extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); |
2156 | extern long sched_getaffinity(pid_t pid, struct cpumask *mask); |
2157 | |
2158 | #ifndef TASK_SIZE_OF |
2159 | #define TASK_SIZE_OF(tsk) TASK_SIZE |
2160 | #endif |
2161 | |
2162 | #ifdef CONFIG_SMP |
2163 | static inline bool owner_on_cpu(struct task_struct *owner) |
2164 | { |
2165 | /* |
2166 | * As lock holder preemption issue, we both skip spinning if |
2167 | * task is not on cpu or its cpu is preempted |
2168 | */ |
2169 | return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(cpu: task_cpu(p: owner)); |
2170 | } |
2171 | |
2172 | /* Returns effective CPU energy utilization, as seen by the scheduler */ |
2173 | unsigned long sched_cpu_util(int cpu); |
2174 | #endif /* CONFIG_SMP */ |
2175 | |
2176 | #ifdef CONFIG_SCHED_CORE |
2177 | extern void sched_core_free(struct task_struct *tsk); |
2178 | extern void sched_core_fork(struct task_struct *p); |
2179 | extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, |
2180 | unsigned long uaddr); |
2181 | extern int sched_core_idle_cpu(int cpu); |
2182 | #else |
2183 | static inline void sched_core_free(struct task_struct *tsk) { } |
2184 | static inline void sched_core_fork(struct task_struct *p) { } |
2185 | static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } |
2186 | #endif |
2187 | |
2188 | extern void sched_set_stop_task(int cpu, struct task_struct *stop); |
2189 | |
2190 | #endif |
2191 | |