Daily bump.
[gcc.git] / libgomp / team.c
1 /* Copyright (C) 2005-2021 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the maintenance of threads in response to team
27 creation and termination. */
28
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
33
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr;
36
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor;
39
40
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
47
48
49 /* This structure is used to communicate across pthread_create. */
50
51 struct gomp_thread_start_data
52 {
53 void (*fn) (void *);
54 void *fn_data;
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
58 unsigned int place;
59 bool nested;
60 pthread_t handle;
61 };
62
63
64 /* This function is a pthread_create entry point. This contains the idle
65 loop in which a thread waits to be called up to become part of a team. */
66
67 static void *
68 gomp_thread_start (void *xdata)
69 {
70 struct gomp_thread_start_data *data = xdata;
71 struct gomp_thread *thr;
72 struct gomp_thread_pool *pool;
73 void (*local_fn) (void *);
74 void *local_data;
75
76 #if defined HAVE_TLS || defined USE_EMUTLS
77 thr = &gomp_tls_data;
78 #else
79 struct gomp_thread local_thr;
80 thr = &local_thr;
81 pthread_setspecific (gomp_tls_key, thr);
82 #endif
83 gomp_sem_init (&thr->release, 0);
84
85 /* Extract what we need from data. */
86 local_fn = data->fn;
87 local_data = data->fn_data;
88 thr->thread_pool = data->thread_pool;
89 thr->ts = data->ts;
90 thr->task = data->task;
91 thr->place = data->place;
92 #ifdef GOMP_NEEDS_THREAD_HANDLE
93 thr->handle = data->handle;
94 #endif
95
96 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
97
98 /* Make thread pool local. */
99 pool = thr->thread_pool;
100
101 if (data->nested)
102 {
103 struct gomp_team *team = thr->ts.team;
104 struct gomp_task *task = thr->task;
105
106 gomp_barrier_wait (&team->barrier);
107
108 local_fn (local_data);
109 gomp_team_barrier_wait_final (&team->barrier);
110 gomp_finish_task (task);
111 gomp_barrier_wait_last (&team->barrier);
112 }
113 else
114 {
115 pool->threads[thr->ts.team_id] = thr;
116
117 gomp_simple_barrier_wait (&pool->threads_dock);
118 do
119 {
120 struct gomp_team *team = thr->ts.team;
121 struct gomp_task *task = thr->task;
122
123 local_fn (local_data);
124 gomp_team_barrier_wait_final (&team->barrier);
125 gomp_finish_task (task);
126
127 gomp_simple_barrier_wait (&pool->threads_dock);
128
129 local_fn = thr->fn;
130 local_data = thr->data;
131 thr->fn = NULL;
132 }
133 while (local_fn);
134 }
135
136 gomp_sem_destroy (&thr->release);
137 pthread_detach (pthread_self ());
138 thr->thread_pool = NULL;
139 thr->task = NULL;
140 return NULL;
141 }
142 #endif
143
144 static inline struct gomp_team *
145 get_last_team (unsigned nthreads)
146 {
147 struct gomp_thread *thr = gomp_thread ();
148 if (thr->ts.team == NULL)
149 {
150 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
151 struct gomp_team *last_team = pool->last_team;
152 if (last_team != NULL && last_team->nthreads == nthreads)
153 {
154 pool->last_team = NULL;
155 return last_team;
156 }
157 }
158 return NULL;
159 }
160
161 /* Create a new team data structure. */
162
163 struct gomp_team *
164 gomp_new_team (unsigned nthreads)
165 {
166 struct gomp_team *team;
167 int i;
168
169 team = get_last_team (nthreads);
170 if (team == NULL)
171 {
172 size_t extra = sizeof (team->ordered_release[0])
173 + sizeof (team->implicit_task[0]);
174 team = team_malloc (sizeof (*team) + nthreads * extra);
175
176 #ifndef HAVE_SYNC_BUILTINS
177 gomp_mutex_init (&team->work_share_list_free_lock);
178 #endif
179 gomp_barrier_init (&team->barrier, nthreads);
180 gomp_mutex_init (&team->task_lock);
181
182 team->nthreads = nthreads;
183 }
184
185 team->work_share_chunk = 8;
186 #ifdef HAVE_SYNC_BUILTINS
187 team->single_count = 0;
188 #endif
189 team->work_shares_to_free = &team->work_shares[0];
190 gomp_init_work_share (&team->work_shares[0], 0, nthreads);
191 team->work_shares[0].next_alloc = NULL;
192 team->work_share_list_free = NULL;
193 team->work_share_list_alloc = &team->work_shares[1];
194 for (i = 1; i < 7; i++)
195 team->work_shares[i].next_free = &team->work_shares[i + 1];
196 team->work_shares[i].next_free = NULL;
197
198 gomp_sem_init (&team->master_release, 0);
199 team->ordered_release = (void *) &team->implicit_task[nthreads];
200 team->ordered_release[0] = &team->master_release;
201
202 priority_queue_init (&team->task_queue);
203 team->task_count = 0;
204 team->task_queued_count = 0;
205 team->task_running_count = 0;
206 team->work_share_cancelled = 0;
207 team->team_cancelled = 0;
208
209 priority_queue_init (&team->task_detach_queue);
210 team->task_detach_count = 0;
211
212 return team;
213 }
214
215
216 /* Free a team data structure. */
217
218 static void
219 free_team (struct gomp_team *team)
220 {
221 #ifndef HAVE_SYNC_BUILTINS
222 gomp_mutex_destroy (&team->work_share_list_free_lock);
223 #endif
224 gomp_barrier_destroy (&team->barrier);
225 gomp_mutex_destroy (&team->task_lock);
226 priority_queue_free (&team->task_queue);
227 priority_queue_free (&team->task_detach_queue);
228 team_free (team);
229 }
230
231 static void
232 gomp_free_pool_helper (void *thread_pool)
233 {
234 struct gomp_thread *thr = gomp_thread ();
235 struct gomp_thread_pool *pool
236 = (struct gomp_thread_pool *) thread_pool;
237 gomp_simple_barrier_wait_last (&pool->threads_dock);
238 gomp_sem_destroy (&thr->release);
239 thr->thread_pool = NULL;
240 thr->task = NULL;
241 #ifdef LIBGOMP_USE_PTHREADS
242 pthread_detach (pthread_self ());
243 pthread_exit (NULL);
244 #elif defined(__nvptx__)
245 asm ("exit;");
246 #elif defined(__AMDGCN__)
247 asm ("s_dcache_wb\n\t"
248 "s_endpgm");
249 #else
250 #error gomp_free_pool_helper must terminate the thread
251 #endif
252 }
253
254 /* Free a thread pool and release its threads. */
255
256 void
257 gomp_free_thread (void *arg __attribute__((unused)))
258 {
259 struct gomp_thread *thr = gomp_thread ();
260 struct gomp_thread_pool *pool = thr->thread_pool;
261 if (pool)
262 {
263 if (pool->threads_used > 0)
264 {
265 int i;
266 for (i = 1; i < pool->threads_used; i++)
267 {
268 struct gomp_thread *nthr = pool->threads[i];
269 nthr->fn = gomp_free_pool_helper;
270 nthr->data = pool;
271 }
272 /* This barrier undocks threads docked on pool->threads_dock. */
273 gomp_simple_barrier_wait (&pool->threads_dock);
274 /* And this waits till all threads have called gomp_barrier_wait_last
275 in gomp_free_pool_helper. */
276 gomp_simple_barrier_wait (&pool->threads_dock);
277 /* Now it is safe to destroy the barrier and free the pool. */
278 gomp_simple_barrier_destroy (&pool->threads_dock);
279
280 #ifdef HAVE_SYNC_BUILTINS
281 __sync_fetch_and_add (&gomp_managed_threads,
282 1L - pool->threads_used);
283 #else
284 gomp_mutex_lock (&gomp_managed_threads_lock);
285 gomp_managed_threads -= pool->threads_used - 1L;
286 gomp_mutex_unlock (&gomp_managed_threads_lock);
287 #endif
288 }
289 if (pool->last_team)
290 free_team (pool->last_team);
291 #ifndef __nvptx__
292 team_free (pool->threads);
293 team_free (pool);
294 #endif
295 thr->thread_pool = NULL;
296 }
297 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
298 gomp_team_end ();
299 if (thr->task != NULL)
300 {
301 struct gomp_task *task = thr->task;
302 gomp_end_task ();
303 free (task);
304 }
305 }
306
307 /* Launch a team. */
308
309 #ifdef LIBGOMP_USE_PTHREADS
310 void
311 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
312 unsigned flags, struct gomp_team *team,
313 struct gomp_taskgroup *taskgroup)
314 {
315 struct gomp_thread_start_data *start_data;
316 struct gomp_thread *thr, *nthr;
317 struct gomp_task *task;
318 struct gomp_task_icv *icv;
319 bool nested;
320 struct gomp_thread_pool *pool;
321 unsigned i, n, old_threads_used = 0;
322 pthread_attr_t thread_attr, *attr;
323 unsigned long nthreads_var;
324 char bind, bind_var;
325 unsigned int s = 0, rest = 0, p = 0, k = 0;
326 unsigned int affinity_count = 0;
327 struct gomp_thread **affinity_thr = NULL;
328 bool force_display = false;
329
330 thr = gomp_thread ();
331 nested = thr->ts.level;
332 pool = thr->thread_pool;
333 task = thr->task;
334 icv = task ? &task->icv : &gomp_global_icv;
335 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
336 {
337 gomp_init_affinity ();
338 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
339 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
340 thr->place);
341 }
342
343 /* Always save the previous state, even if this isn't a nested team.
344 In particular, we should save any work share state from an outer
345 orphaned work share construct. */
346 team->prev_ts = thr->ts;
347
348 thr->ts.team = team;
349 thr->ts.team_id = 0;
350 ++thr->ts.level;
351 if (nthreads > 1)
352 ++thr->ts.active_level;
353 thr->ts.work_share = &team->work_shares[0];
354 thr->ts.last_work_share = NULL;
355 #ifdef HAVE_SYNC_BUILTINS
356 thr->ts.single_count = 0;
357 #endif
358 thr->ts.static_trip = 0;
359 thr->task = &team->implicit_task[0];
360 #ifdef GOMP_NEEDS_THREAD_HANDLE
361 thr->handle = pthread_self ();
362 #endif
363 nthreads_var = icv->nthreads_var;
364 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
365 && thr->ts.level < gomp_nthreads_var_list_len)
366 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
367 bind_var = icv->bind_var;
368 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
369 bind_var = flags & 7;
370 bind = bind_var;
371 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
372 && thr->ts.level < gomp_bind_var_list_len)
373 bind_var = gomp_bind_var_list[thr->ts.level];
374 gomp_init_task (thr->task, task, icv);
375 thr->task->taskgroup = taskgroup;
376 team->implicit_task[0].icv.nthreads_var = nthreads_var;
377 team->implicit_task[0].icv.bind_var = bind_var;
378
379 if (nthreads == 1)
380 return;
381
382 i = 1;
383
384 if (__builtin_expect (gomp_places_list != NULL, 0))
385 {
386 /* Depending on chosen proc_bind model, set subpartition
387 for the master thread and initialize helper variables
388 P and optionally S, K and/or REST used by later place
389 computation for each additional thread. */
390 p = thr->place - 1;
391 switch (bind)
392 {
393 case omp_proc_bind_true:
394 case omp_proc_bind_close:
395 if (nthreads > thr->ts.place_partition_len)
396 {
397 /* T > P. S threads will be placed in each place,
398 and the final REM threads placed one by one
399 into the already occupied places. */
400 s = nthreads / thr->ts.place_partition_len;
401 rest = nthreads % thr->ts.place_partition_len;
402 }
403 else
404 s = 1;
405 k = 1;
406 break;
407 case omp_proc_bind_master:
408 /* Each thread will be bound to master's place. */
409 break;
410 case omp_proc_bind_spread:
411 if (nthreads <= thr->ts.place_partition_len)
412 {
413 /* T <= P. Each subpartition will have in between s
414 and s+1 places (subpartitions starting at or
415 after rest will have s places, earlier s+1 places),
416 each thread will be bound to the first place in
417 its subpartition (except for the master thread
418 that can be bound to another place in its
419 subpartition). */
420 s = thr->ts.place_partition_len / nthreads;
421 rest = thr->ts.place_partition_len % nthreads;
422 rest = (s + 1) * rest + thr->ts.place_partition_off;
423 if (p < rest)
424 {
425 p -= (p - thr->ts.place_partition_off) % (s + 1);
426 thr->ts.place_partition_len = s + 1;
427 }
428 else
429 {
430 p -= (p - rest) % s;
431 thr->ts.place_partition_len = s;
432 }
433 thr->ts.place_partition_off = p;
434 }
435 else
436 {
437 /* T > P. Each subpartition will have just a single
438 place and we'll place between s and s+1
439 threads into each subpartition. */
440 s = nthreads / thr->ts.place_partition_len;
441 rest = nthreads % thr->ts.place_partition_len;
442 thr->ts.place_partition_off = p;
443 thr->ts.place_partition_len = 1;
444 k = 1;
445 }
446 break;
447 }
448 }
449 else
450 bind = omp_proc_bind_false;
451
452 /* We only allow the reuse of idle threads for non-nested PARALLEL
453 regions. This appears to be implied by the semantics of
454 threadprivate variables, but perhaps that's reading too much into
455 things. Certainly it does prevent any locking problems, since
456 only the initial program thread will modify gomp_threads. */
457 if (!nested)
458 {
459 old_threads_used = pool->threads_used;
460
461 if (nthreads <= old_threads_used)
462 n = nthreads;
463 else if (old_threads_used == 0)
464 {
465 n = 0;
466 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
467 }
468 else
469 {
470 n = old_threads_used;
471
472 /* Increase the barrier threshold to make sure all new
473 threads arrive before the team is released. */
474 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
475 }
476
477 /* Not true yet, but soon will be. We're going to release all
478 threads from the dock, and those that aren't part of the
479 team will exit. */
480 pool->threads_used = nthreads;
481
482 /* If necessary, expand the size of the gomp_threads array. It is
483 expected that changes in the number of threads are rare, thus we
484 make no effort to expand gomp_threads_size geometrically. */
485 if (nthreads >= pool->threads_size)
486 {
487 pool->threads_size = nthreads + 1;
488 pool->threads
489 = gomp_realloc (pool->threads,
490 pool->threads_size
491 * sizeof (struct gomp_thread *));
492 /* Add current (master) thread to threads[]. */
493 pool->threads[0] = thr;
494 }
495
496 /* Release existing idle threads. */
497 for (; i < n; ++i)
498 {
499 unsigned int place_partition_off = thr->ts.place_partition_off;
500 unsigned int place_partition_len = thr->ts.place_partition_len;
501 unsigned int place = 0;
502 if (__builtin_expect (gomp_places_list != NULL, 0))
503 {
504 switch (bind)
505 {
506 case omp_proc_bind_true:
507 case omp_proc_bind_close:
508 if (k == s)
509 {
510 ++p;
511 if (p == (team->prev_ts.place_partition_off
512 + team->prev_ts.place_partition_len))
513 p = team->prev_ts.place_partition_off;
514 k = 1;
515 if (i == nthreads - rest)
516 s = 1;
517 }
518 else
519 ++k;
520 break;
521 case omp_proc_bind_master:
522 break;
523 case omp_proc_bind_spread:
524 if (k == 0)
525 {
526 /* T <= P. */
527 if (p < rest)
528 p += s + 1;
529 else
530 p += s;
531 if (p == (team->prev_ts.place_partition_off
532 + team->prev_ts.place_partition_len))
533 p = team->prev_ts.place_partition_off;
534 place_partition_off = p;
535 if (p < rest)
536 place_partition_len = s + 1;
537 else
538 place_partition_len = s;
539 }
540 else
541 {
542 /* T > P. */
543 if (k == s)
544 {
545 ++p;
546 if (p == (team->prev_ts.place_partition_off
547 + team->prev_ts.place_partition_len))
548 p = team->prev_ts.place_partition_off;
549 k = 1;
550 if (i == nthreads - rest)
551 s = 1;
552 }
553 else
554 ++k;
555 place_partition_off = p;
556 place_partition_len = 1;
557 }
558 break;
559 }
560 if (affinity_thr != NULL
561 || (bind != omp_proc_bind_true
562 && pool->threads[i]->place != p + 1)
563 || pool->threads[i]->place <= place_partition_off
564 || pool->threads[i]->place > (place_partition_off
565 + place_partition_len))
566 {
567 unsigned int l;
568 force_display = true;
569 if (affinity_thr == NULL)
570 {
571 unsigned int j;
572
573 if (team->prev_ts.place_partition_len > 64)
574 affinity_thr
575 = gomp_malloc (team->prev_ts.place_partition_len
576 * sizeof (struct gomp_thread *));
577 else
578 affinity_thr
579 = gomp_alloca (team->prev_ts.place_partition_len
580 * sizeof (struct gomp_thread *));
581 memset (affinity_thr, '\0',
582 team->prev_ts.place_partition_len
583 * sizeof (struct gomp_thread *));
584 for (j = i; j < old_threads_used; j++)
585 {
586 if (pool->threads[j]->place
587 > team->prev_ts.place_partition_off
588 && (pool->threads[j]->place
589 <= (team->prev_ts.place_partition_off
590 + team->prev_ts.place_partition_len)))
591 {
592 l = pool->threads[j]->place - 1
593 - team->prev_ts.place_partition_off;
594 pool->threads[j]->data = affinity_thr[l];
595 affinity_thr[l] = pool->threads[j];
596 }
597 pool->threads[j] = NULL;
598 }
599 if (nthreads > old_threads_used)
600 memset (&pool->threads[old_threads_used],
601 '\0', ((nthreads - old_threads_used)
602 * sizeof (struct gomp_thread *)));
603 n = nthreads;
604 affinity_count = old_threads_used - i;
605 }
606 if (affinity_count == 0)
607 break;
608 l = p;
609 if (affinity_thr[l - team->prev_ts.place_partition_off]
610 == NULL)
611 {
612 if (bind != omp_proc_bind_true)
613 continue;
614 for (l = place_partition_off;
615 l < place_partition_off + place_partition_len;
616 l++)
617 if (affinity_thr[l - team->prev_ts.place_partition_off]
618 != NULL)
619 break;
620 if (l == place_partition_off + place_partition_len)
621 continue;
622 }
623 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
624 affinity_thr[l - team->prev_ts.place_partition_off]
625 = (struct gomp_thread *) nthr->data;
626 affinity_count--;
627 pool->threads[i] = nthr;
628 }
629 else
630 nthr = pool->threads[i];
631 place = p + 1;
632 }
633 else
634 nthr = pool->threads[i];
635 nthr->ts.team = team;
636 nthr->ts.work_share = &team->work_shares[0];
637 nthr->ts.last_work_share = NULL;
638 nthr->ts.team_id = i;
639 nthr->ts.level = team->prev_ts.level + 1;
640 nthr->ts.active_level = thr->ts.active_level;
641 nthr->ts.place_partition_off = place_partition_off;
642 nthr->ts.place_partition_len = place_partition_len;
643 nthr->ts.def_allocator = thr->ts.def_allocator;
644 #ifdef HAVE_SYNC_BUILTINS
645 nthr->ts.single_count = 0;
646 #endif
647 nthr->ts.static_trip = 0;
648 nthr->task = &team->implicit_task[i];
649 nthr->place = place;
650 gomp_init_task (nthr->task, task, icv);
651 team->implicit_task[i].icv.nthreads_var = nthreads_var;
652 team->implicit_task[i].icv.bind_var = bind_var;
653 nthr->task->taskgroup = taskgroup;
654 nthr->fn = fn;
655 nthr->data = data;
656 team->ordered_release[i] = &nthr->release;
657 }
658
659 if (__builtin_expect (affinity_thr != NULL, 0))
660 {
661 /* If AFFINITY_THR is non-NULL just because we had to
662 permute some threads in the pool, but we've managed
663 to find exactly as many old threads as we'd find
664 without affinity, we don't need to handle this
665 specially anymore. */
666 if (nthreads <= old_threads_used
667 ? (affinity_count == old_threads_used - nthreads)
668 : (i == old_threads_used))
669 {
670 if (team->prev_ts.place_partition_len > 64)
671 free (affinity_thr);
672 affinity_thr = NULL;
673 affinity_count = 0;
674 }
675 else
676 {
677 i = 1;
678 /* We are going to compute the places/subpartitions
679 again from the beginning. So, we need to reinitialize
680 vars modified by the switch (bind) above inside
681 of the loop, to the state they had after the initial
682 switch (bind). */
683 switch (bind)
684 {
685 case omp_proc_bind_true:
686 case omp_proc_bind_close:
687 if (nthreads > thr->ts.place_partition_len)
688 /* T > P. S has been changed, so needs
689 to be recomputed. */
690 s = nthreads / thr->ts.place_partition_len;
691 k = 1;
692 p = thr->place - 1;
693 break;
694 case omp_proc_bind_master:
695 /* No vars have been changed. */
696 break;
697 case omp_proc_bind_spread:
698 p = thr->ts.place_partition_off;
699 if (k != 0)
700 {
701 /* T > P. */
702 s = nthreads / team->prev_ts.place_partition_len;
703 k = 1;
704 }
705 break;
706 }
707
708 /* Increase the barrier threshold to make sure all new
709 threads and all the threads we're going to let die
710 arrive before the team is released. */
711 if (affinity_count)
712 gomp_simple_barrier_reinit (&pool->threads_dock,
713 nthreads + affinity_count);
714 }
715 }
716
717 if (i == nthreads)
718 goto do_release;
719
720 }
721
722 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
723 {
724 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
725
726 if (old_threads_used == 0)
727 --diff;
728
729 #ifdef HAVE_SYNC_BUILTINS
730 __sync_fetch_and_add (&gomp_managed_threads, diff);
731 #else
732 gomp_mutex_lock (&gomp_managed_threads_lock);
733 gomp_managed_threads += diff;
734 gomp_mutex_unlock (&gomp_managed_threads_lock);
735 #endif
736 }
737
738 attr = &gomp_thread_attr;
739 if (__builtin_expect (gomp_places_list != NULL, 0))
740 {
741 size_t stacksize;
742 pthread_attr_init (&thread_attr);
743 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
744 pthread_attr_setstacksize (&thread_attr, stacksize);
745 attr = &thread_attr;
746 }
747
748 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
749 * (nthreads - i));
750
751 /* Launch new threads. */
752 for (; i < nthreads; ++i)
753 {
754 int err;
755
756 start_data->ts.place_partition_off = thr->ts.place_partition_off;
757 start_data->ts.place_partition_len = thr->ts.place_partition_len;
758 start_data->place = 0;
759 if (__builtin_expect (gomp_places_list != NULL, 0))
760 {
761 switch (bind)
762 {
763 case omp_proc_bind_true:
764 case omp_proc_bind_close:
765 if (k == s)
766 {
767 ++p;
768 if (p == (team->prev_ts.place_partition_off
769 + team->prev_ts.place_partition_len))
770 p = team->prev_ts.place_partition_off;
771 k = 1;
772 if (i == nthreads - rest)
773 s = 1;
774 }
775 else
776 ++k;
777 break;
778 case omp_proc_bind_master:
779 break;
780 case omp_proc_bind_spread:
781 if (k == 0)
782 {
783 /* T <= P. */
784 if (p < rest)
785 p += s + 1;
786 else
787 p += s;
788 if (p == (team->prev_ts.place_partition_off
789 + team->prev_ts.place_partition_len))
790 p = team->prev_ts.place_partition_off;
791 start_data->ts.place_partition_off = p;
792 if (p < rest)
793 start_data->ts.place_partition_len = s + 1;
794 else
795 start_data->ts.place_partition_len = s;
796 }
797 else
798 {
799 /* T > P. */
800 if (k == s)
801 {
802 ++p;
803 if (p == (team->prev_ts.place_partition_off
804 + team->prev_ts.place_partition_len))
805 p = team->prev_ts.place_partition_off;
806 k = 1;
807 if (i == nthreads - rest)
808 s = 1;
809 }
810 else
811 ++k;
812 start_data->ts.place_partition_off = p;
813 start_data->ts.place_partition_len = 1;
814 }
815 break;
816 }
817 start_data->place = p + 1;
818 if (affinity_thr != NULL && pool->threads[i] != NULL)
819 continue;
820 gomp_init_thread_affinity (attr, p);
821 }
822
823 start_data->fn = fn;
824 start_data->fn_data = data;
825 start_data->ts.team = team;
826 start_data->ts.work_share = &team->work_shares[0];
827 start_data->ts.last_work_share = NULL;
828 start_data->ts.team_id = i;
829 start_data->ts.level = team->prev_ts.level + 1;
830 start_data->ts.active_level = thr->ts.active_level;
831 start_data->ts.def_allocator = thr->ts.def_allocator;
832 #ifdef HAVE_SYNC_BUILTINS
833 start_data->ts.single_count = 0;
834 #endif
835 start_data->ts.static_trip = 0;
836 start_data->task = &team->implicit_task[i];
837 gomp_init_task (start_data->task, task, icv);
838 team->implicit_task[i].icv.nthreads_var = nthreads_var;
839 team->implicit_task[i].icv.bind_var = bind_var;
840 start_data->task->taskgroup = taskgroup;
841 start_data->thread_pool = pool;
842 start_data->nested = nested;
843
844 attr = gomp_adjust_thread_attr (attr, &thread_attr);
845 err = pthread_create (&start_data->handle, attr, gomp_thread_start,
846 start_data);
847 start_data++;
848 if (err != 0)
849 gomp_fatal ("Thread creation failed: %s", strerror (err));
850 }
851
852 if (__builtin_expect (attr == &thread_attr, 0))
853 pthread_attr_destroy (&thread_attr);
854
855 do_release:
856 if (nested)
857 gomp_barrier_wait (&team->barrier);
858 else
859 gomp_simple_barrier_wait (&pool->threads_dock);
860
861 /* Decrease the barrier threshold to match the number of threads
862 that should arrive back at the end of this team. The extra
863 threads should be exiting. Note that we arrange for this test
864 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
865 the barrier as well as gomp_managed_threads was temporarily
866 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
867 AFFINITY_COUNT if non-zero will be always at least
868 OLD_THREADS_COUNT - NTHREADS. */
869 if (__builtin_expect (nthreads < old_threads_used, 0)
870 || __builtin_expect (affinity_count, 0))
871 {
872 long diff = (long) nthreads - (long) old_threads_used;
873
874 if (affinity_count)
875 diff = -affinity_count;
876
877 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
878
879 #ifdef HAVE_SYNC_BUILTINS
880 __sync_fetch_and_add (&gomp_managed_threads, diff);
881 #else
882 gomp_mutex_lock (&gomp_managed_threads_lock);
883 gomp_managed_threads += diff;
884 gomp_mutex_unlock (&gomp_managed_threads_lock);
885 #endif
886 }
887 if (__builtin_expect (gomp_display_affinity_var, 0))
888 {
889 if (nested
890 || nthreads != old_threads_used
891 || force_display)
892 {
893 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
894 thr->place);
895 if (nested)
896 {
897 start_data -= nthreads - 1;
898 for (i = 1; i < nthreads; ++i)
899 {
900 gomp_display_affinity_thread (
901 #ifdef LIBGOMP_USE_PTHREADS
902 start_data->handle,
903 #else
904 gomp_thread_self (),
905 #endif
906 &start_data->ts,
907 start_data->place);
908 start_data++;
909 }
910 }
911 else
912 {
913 for (i = 1; i < nthreads; ++i)
914 {
915 gomp_thread_handle handle
916 = gomp_thread_to_pthread_t (pool->threads[i]);
917 gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
918 pool->threads[i]->place);
919 }
920 }
921 }
922 }
923 if (__builtin_expect (affinity_thr != NULL, 0)
924 && team->prev_ts.place_partition_len > 64)
925 free (affinity_thr);
926 }
927 #endif
928
929
930 /* Terminate the current team. This is only to be called by the master
931 thread. We assume that we must wait for the other threads. */
932
933 void
934 gomp_team_end (void)
935 {
936 struct gomp_thread *thr = gomp_thread ();
937 struct gomp_team *team = thr->ts.team;
938
939 /* This barrier handles all pending explicit threads.
940 As #pragma omp cancel parallel might get awaited count in
941 team->barrier in a inconsistent state, we need to use a different
942 counter here. */
943 gomp_team_barrier_wait_final (&team->barrier);
944 if (__builtin_expect (team->team_cancelled, 0))
945 {
946 struct gomp_work_share *ws = team->work_shares_to_free;
947 do
948 {
949 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
950 if (next_ws == NULL)
951 gomp_ptrlock_set (&ws->next_ws, ws);
952 gomp_fini_work_share (ws);
953 ws = next_ws;
954 }
955 while (ws != NULL);
956 }
957 else
958 gomp_fini_work_share (thr->ts.work_share);
959
960 gomp_end_task ();
961 thr->ts = team->prev_ts;
962
963 if (__builtin_expect (thr->ts.level != 0, 0))
964 {
965 #ifdef HAVE_SYNC_BUILTINS
966 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
967 #else
968 gomp_mutex_lock (&gomp_managed_threads_lock);
969 gomp_managed_threads -= team->nthreads - 1L;
970 gomp_mutex_unlock (&gomp_managed_threads_lock);
971 #endif
972 /* This barrier has gomp_barrier_wait_last counterparts
973 and ensures the team can be safely destroyed. */
974 gomp_barrier_wait (&team->barrier);
975 }
976
977 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
978 {
979 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
980 do
981 {
982 struct gomp_work_share *next_ws = ws->next_alloc;
983 free (ws);
984 ws = next_ws;
985 }
986 while (ws != NULL);
987 }
988 gomp_sem_destroy (&team->master_release);
989
990 if (__builtin_expect (thr->ts.team != NULL, 0)
991 || __builtin_expect (team->nthreads == 1, 0))
992 free_team (team);
993 else
994 {
995 struct gomp_thread_pool *pool = thr->thread_pool;
996 if (pool->last_team)
997 free_team (pool->last_team);
998 pool->last_team = team;
999 gomp_release_thread_pool (pool);
1000 }
1001 }
1002
1003 #ifdef LIBGOMP_USE_PTHREADS
1004
1005 /* Constructors for this file. */
1006
1007 static void __attribute__((constructor))
1008 initialize_team (void)
1009 {
1010 #if !defined HAVE_TLS && !defined USE_EMUTLS
1011 static struct gomp_thread initial_thread_tls_data;
1012
1013 pthread_key_create (&gomp_tls_key, NULL);
1014 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1015 #endif
1016
1017 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1018 gomp_fatal ("could not create thread pool destructor.");
1019 }
1020
1021 static void __attribute__((destructor))
1022 team_destructor (void)
1023 {
1024 /* Without this dlclose on libgomp could lead to subsequent
1025 crashes. */
1026 pthread_key_delete (gomp_thread_destructor);
1027 }
1028
1029 /* Similar to gomp_free_pool_helper, but don't detach itself,
1030 gomp_pause_host will pthread_join those threads. */
1031
1032 static void
1033 gomp_pause_pool_helper (void *thread_pool)
1034 {
1035 struct gomp_thread *thr = gomp_thread ();
1036 struct gomp_thread_pool *pool
1037 = (struct gomp_thread_pool *) thread_pool;
1038 gomp_simple_barrier_wait_last (&pool->threads_dock);
1039 gomp_sem_destroy (&thr->release);
1040 thr->thread_pool = NULL;
1041 thr->task = NULL;
1042 pthread_exit (NULL);
1043 }
1044
1045 /* Free a thread pool and release its threads. Return non-zero on
1046 failure. */
1047
1048 int
1049 gomp_pause_host (void)
1050 {
1051 struct gomp_thread *thr = gomp_thread ();
1052 struct gomp_thread_pool *pool = thr->thread_pool;
1053 if (thr->ts.level)
1054 return -1;
1055 if (pool)
1056 {
1057 if (pool->threads_used > 0)
1058 {
1059 int i;
1060 pthread_t *thrs
1061 = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1062 for (i = 1; i < pool->threads_used; i++)
1063 {
1064 struct gomp_thread *nthr = pool->threads[i];
1065 nthr->fn = gomp_pause_pool_helper;
1066 nthr->data = pool;
1067 thrs[i] = gomp_thread_to_pthread_t (nthr);
1068 }
1069 /* This barrier undocks threads docked on pool->threads_dock. */
1070 gomp_simple_barrier_wait (&pool->threads_dock);
1071 /* And this waits till all threads have called gomp_barrier_wait_last
1072 in gomp_pause_pool_helper. */
1073 gomp_simple_barrier_wait (&pool->threads_dock);
1074 /* Now it is safe to destroy the barrier and free the pool. */
1075 gomp_simple_barrier_destroy (&pool->threads_dock);
1076
1077 #ifdef HAVE_SYNC_BUILTINS
1078 __sync_fetch_and_add (&gomp_managed_threads,
1079 1L - pool->threads_used);
1080 #else
1081 gomp_mutex_lock (&gomp_managed_threads_lock);
1082 gomp_managed_threads -= pool->threads_used - 1L;
1083 gomp_mutex_unlock (&gomp_managed_threads_lock);
1084 #endif
1085 for (i = 1; i < pool->threads_used; i++)
1086 pthread_join (thrs[i], NULL);
1087 }
1088 if (pool->last_team)
1089 free_team (pool->last_team);
1090 #ifndef __nvptx__
1091 team_free (pool->threads);
1092 team_free (pool);
1093 #endif
1094 thr->thread_pool = NULL;
1095 }
1096 return 0;
1097 }
1098 #endif
1099
1100 struct gomp_task_icv *
1101 gomp_new_icv (void)
1102 {
1103 struct gomp_thread *thr = gomp_thread ();
1104 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1105 gomp_init_task (task, NULL, &gomp_global_icv);
1106 thr->task = task;
1107 #ifdef LIBGOMP_USE_PTHREADS
1108 pthread_setspecific (gomp_thread_destructor, thr);
1109 #endif
1110 return &task->icv;
1111 }