libgomp: Fix up GOMP_task on s390x
[gcc.git] / libgomp / task.c
1 /* Copyright (C) 2007-2021 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the maintenance of tasks in response to task
27 creation and termination. */
28
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include "gomp-constants.h"
33
34 typedef struct gomp_task_depend_entry *hash_entry_type;
35
36 static inline void *
37 htab_alloc (size_t size)
38 {
39 return gomp_malloc (size);
40 }
41
42 static inline void
43 htab_free (void *ptr)
44 {
45 free (ptr);
46 }
47
48 #include "hashtab.h"
49
50 static inline hashval_t
51 htab_hash (hash_entry_type element)
52 {
53 return hash_pointer (element->addr);
54 }
55
56 static inline bool
57 htab_eq (hash_entry_type x, hash_entry_type y)
58 {
59 return x->addr == y->addr;
60 }
61
62 /* Create a new task data structure. */
63
64 void
65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 struct gomp_task_icv *prev_icv)
67 {
68 /* It would seem that using memset here would be a win, but it turns
69 out that partially filling gomp_task allows us to keep the
70 overhead of task creation low. In the nqueens-1.c test, for a
71 sufficiently large N, we drop the overhead from 5-6% to 1%.
72
73 Note, the nqueens-1.c test in serial mode is a good test to
74 benchmark the overhead of creating tasks as there are millions of
75 tiny tasks created that all run undeferred. */
76 task->parent = parent_task;
77 task->icv = *prev_icv;
78 task->kind = GOMP_TASK_IMPLICIT;
79 task->taskwait = NULL;
80 task->in_tied_task = false;
81 task->final_task = false;
82 task->copy_ctors_done = false;
83 task->parent_depends_on = false;
84 priority_queue_init (&task->children_queue);
85 task->taskgroup = NULL;
86 task->dependers = NULL;
87 task->depend_hash = NULL;
88 task->depend_count = 0;
89 task->detach = false;
90 }
91
92 /* Clean up a task, after completing it. */
93
94 void
95 gomp_end_task (void)
96 {
97 struct gomp_thread *thr = gomp_thread ();
98 struct gomp_task *task = thr->task;
99
100 gomp_finish_task (task);
101 thr->task = task->parent;
102 }
103
104 /* Clear the parent field of every task in LIST. */
105
106 static inline void
107 gomp_clear_parent_in_list (struct priority_list *list)
108 {
109 struct priority_node *p = list->tasks;
110 if (p)
111 do
112 {
113 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
114 p = p->next;
115 }
116 while (p != list->tasks);
117 }
118
119 /* Splay tree version of gomp_clear_parent_in_list.
120
121 Clear the parent field of every task in NODE within SP, and free
122 the node when done. */
123
124 static void
125 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
126 {
127 if (!node)
128 return;
129 prio_splay_tree_node left = node->left, right = node->right;
130 gomp_clear_parent_in_list (&node->key.l);
131 #if _LIBGOMP_CHECKING_
132 memset (node, 0xaf, sizeof (*node));
133 #endif
134 /* No need to remove the node from the tree. We're nuking
135 everything, so just free the nodes and our caller can clear the
136 entire splay tree. */
137 free (node);
138 gomp_clear_parent_in_tree (sp, left);
139 gomp_clear_parent_in_tree (sp, right);
140 }
141
142 /* Clear the parent field of every task in Q and remove every task
143 from Q. */
144
145 static inline void
146 gomp_clear_parent (struct priority_queue *q)
147 {
148 if (priority_queue_multi_p (q))
149 {
150 gomp_clear_parent_in_tree (&q->t, q->t.root);
151 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
152 No need to remove anything. We can just nuke everything. */
153 q->t.root = NULL;
154 }
155 else
156 gomp_clear_parent_in_list (&q->l);
157 }
158
159 /* Helper function for GOMP_task and gomp_create_target_task.
160
161 For a TASK with in/out dependencies, fill in the various dependency
162 queues. PARENT is the parent of said task. DEPEND is as in
163 GOMP_task. */
164
165 static void
166 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
167 void **depend)
168 {
169 size_t ndepend = (uintptr_t) depend[0];
170 size_t i;
171 hash_entry_type ent;
172
173 if (ndepend)
174 {
175 /* depend[0] is total # */
176 size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
177 /* ndepend - nout is # of in: */
178 for (i = 0; i < ndepend; i++)
179 {
180 task->depend[i].addr = depend[2 + i];
181 task->depend[i].is_in = i >= nout;
182 }
183 }
184 else
185 {
186 ndepend = (uintptr_t) depend[1]; /* total # */
187 size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
188 size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
189 /* For now we treat mutexinoutset like out, which is compliant, but
190 inefficient. */
191 size_t nin = (uintptr_t) depend[4]; /* # of in: */
192 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
193 size_t normal = nout + nmutexinoutset + nin;
194 size_t n = 0;
195 for (i = normal; i < ndepend; i++)
196 {
197 void **d = (void **) (uintptr_t) depend[5 + i];
198 switch ((uintptr_t) d[1])
199 {
200 case GOMP_DEPEND_OUT:
201 case GOMP_DEPEND_INOUT:
202 case GOMP_DEPEND_MUTEXINOUTSET:
203 break;
204 case GOMP_DEPEND_IN:
205 continue;
206 default:
207 gomp_fatal ("unknown omp_depend_t dependence type %d",
208 (int) (uintptr_t) d[1]);
209 }
210 task->depend[n].addr = d[0];
211 task->depend[n++].is_in = 0;
212 }
213 for (i = 0; i < normal; i++)
214 {
215 task->depend[n].addr = depend[5 + i];
216 task->depend[n++].is_in = i >= nout + nmutexinoutset;
217 }
218 for (i = normal; i < ndepend; i++)
219 {
220 void **d = (void **) (uintptr_t) depend[5 + i];
221 if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
222 continue;
223 task->depend[n].addr = d[0];
224 task->depend[n++].is_in = 1;
225 }
226 }
227 task->depend_count = ndepend;
228 task->num_dependees = 0;
229 if (parent->depend_hash == NULL)
230 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
231 for (i = 0; i < ndepend; i++)
232 {
233 task->depend[i].next = NULL;
234 task->depend[i].prev = NULL;
235 task->depend[i].task = task;
236 task->depend[i].redundant = false;
237 task->depend[i].redundant_out = false;
238
239 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
240 &task->depend[i], INSERT);
241 hash_entry_type out = NULL, last = NULL;
242 if (*slot)
243 {
244 /* If multiple depends on the same task are the same, all but the
245 first one are redundant. As inout/out come first, if any of them
246 is inout/out, it will win, which is the right semantics. */
247 if ((*slot)->task == task)
248 {
249 task->depend[i].redundant = true;
250 continue;
251 }
252 for (ent = *slot; ent; ent = ent->next)
253 {
254 if (ent->redundant_out)
255 break;
256
257 last = ent;
258
259 /* depend(in:...) doesn't depend on earlier depend(in:...). */
260 if (task->depend[i].is_in && ent->is_in)
261 continue;
262
263 if (!ent->is_in)
264 out = ent;
265
266 struct gomp_task *tsk = ent->task;
267 if (tsk->dependers == NULL)
268 {
269 tsk->dependers
270 = gomp_malloc (sizeof (struct gomp_dependers_vec)
271 + 6 * sizeof (struct gomp_task *));
272 tsk->dependers->n_elem = 1;
273 tsk->dependers->allocated = 6;
274 tsk->dependers->elem[0] = task;
275 task->num_dependees++;
276 continue;
277 }
278 /* We already have some other dependency on tsk from earlier
279 depend clause. */
280 else if (tsk->dependers->n_elem
281 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
282 == task))
283 continue;
284 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
285 {
286 tsk->dependers->allocated
287 = tsk->dependers->allocated * 2 + 2;
288 tsk->dependers
289 = gomp_realloc (tsk->dependers,
290 sizeof (struct gomp_dependers_vec)
291 + (tsk->dependers->allocated
292 * sizeof (struct gomp_task *)));
293 }
294 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
295 task->num_dependees++;
296 }
297 task->depend[i].next = *slot;
298 (*slot)->prev = &task->depend[i];
299 }
300 *slot = &task->depend[i];
301
302 /* There is no need to store more than one depend({,in}out:) task per
303 address in the hash table chain for the purpose of creation of
304 deferred tasks, because each out depends on all earlier outs, thus it
305 is enough to record just the last depend({,in}out:). For depend(in:),
306 we need to keep all of the previous ones not terminated yet, because
307 a later depend({,in}out:) might need to depend on all of them. So, if
308 the new task's clause is depend({,in}out:), we know there is at most
309 one other depend({,in}out:) clause in the list (out). For
310 non-deferred tasks we want to see all outs, so they are moved to the
311 end of the chain, after first redundant_out entry all following
312 entries should be redundant_out. */
313 if (!task->depend[i].is_in && out)
314 {
315 if (out != last)
316 {
317 out->next->prev = out->prev;
318 out->prev->next = out->next;
319 out->next = last->next;
320 out->prev = last;
321 last->next = out;
322 if (out->next)
323 out->next->prev = out;
324 }
325 out->redundant_out = true;
326 }
327 }
328 }
329
330 static bool
331 task_fulfilled_p (struct gomp_task *task)
332 {
333 return gomp_sem_getcount (&task->completion_sem) > 0;
334 }
335
336 /* Called when encountering an explicit task directive. If IF_CLAUSE is
337 false, then we must not delay in executing the task. If UNTIED is true,
338 then the task may be executed by any member of the team.
339
340 DEPEND is an array containing:
341 if depend[0] is non-zero, then:
342 depend[0]: number of depend elements.
343 depend[1]: number of depend elements of type "out/inout".
344 depend[2..N+1]: address of [1..N]th depend element.
345 otherwise, when depend[0] is zero, then:
346 depend[1]: number of depend elements.
347 depend[2]: number of depend elements of type "out/inout".
348 depend[3]: number of depend elements of type "mutexinoutset".
349 depend[4]: number of depend elements of type "in".
350 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
351 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
352 omp_depend_t objects. */
353
354 void
355 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
356 long arg_size, long arg_align, bool if_clause, unsigned flags,
357 void **depend, int priority_arg, void *detach)
358 {
359 struct gomp_thread *thr = gomp_thread ();
360 struct gomp_team *team = thr->ts.team;
361 int priority = 0;
362
363 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
364 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
365 tied to one thread all the time. This means UNTIED tasks must be
366 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
367 might be running on different thread than FN. */
368 if (cpyfn)
369 if_clause = false;
370 flags &= ~GOMP_TASK_FLAG_UNTIED;
371 #endif
372
373 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
374 if (__builtin_expect (gomp_cancel_var, 0) && team)
375 {
376 if (gomp_team_barrier_cancelled (&team->barrier))
377 return;
378 if (thr->task->taskgroup)
379 {
380 if (thr->task->taskgroup->cancelled)
381 return;
382 if (thr->task->taskgroup->workshare
383 && thr->task->taskgroup->prev
384 && thr->task->taskgroup->prev->cancelled)
385 return;
386 }
387 }
388
389 if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0))
390 {
391 priority = priority_arg;
392 if (priority > gomp_max_task_priority_var)
393 priority = gomp_max_task_priority_var;
394 }
395
396 if (!if_clause || team == NULL
397 || (thr->task && thr->task->final_task)
398 || team->task_count > 64 * team->nthreads)
399 {
400 struct gomp_task task;
401
402 /* If there are depend clauses and earlier deferred sibling tasks
403 with depend clauses, check if there isn't a dependency. If there
404 is, we need to wait for them. There is no need to handle
405 depend clauses for non-deferred tasks other than this, because
406 the parent task is suspended until the child task finishes and thus
407 it can't start further child tasks. */
408 if ((flags & GOMP_TASK_FLAG_DEPEND)
409 && thr->task && thr->task->depend_hash)
410 gomp_task_maybe_wait_for_dependencies (depend);
411
412 gomp_init_task (&task, thr->task, gomp_icv (false));
413 task.kind = GOMP_TASK_UNDEFERRED;
414 task.final_task = (thr->task && thr->task->final_task)
415 || (flags & GOMP_TASK_FLAG_FINAL);
416 task.priority = priority;
417
418 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
419 {
420 task.detach = true;
421 gomp_sem_init (&task.completion_sem, 0);
422 *(void **) detach = &task.completion_sem;
423 if (data)
424 *(void **) data = &task.completion_sem;
425
426 gomp_debug (0, "New event: %p\n", &task.completion_sem);
427 }
428
429 if (thr->task)
430 {
431 task.in_tied_task = thr->task->in_tied_task;
432 task.taskgroup = thr->task->taskgroup;
433 }
434 thr->task = &task;
435 if (__builtin_expect (cpyfn != NULL, 0))
436 {
437 char buf[arg_size + arg_align - 1];
438 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
439 & ~(uintptr_t) (arg_align - 1));
440 cpyfn (arg, data);
441 fn (arg);
442 }
443 else
444 fn (data);
445
446 if (task.detach && !task_fulfilled_p (&task))
447 gomp_sem_wait (&task.completion_sem);
448
449 /* Access to "children" is normally done inside a task_lock
450 mutex region, but the only way this particular task.children
451 can be set is if this thread's task work function (fn)
452 creates children. So since the setter is *this* thread, we
453 need no barriers here when testing for non-NULL. We can have
454 task.children set by the current thread then changed by a
455 child thread, but seeing a stale non-NULL value is not a
456 problem. Once past the task_lock acquisition, this thread
457 will see the real value of task.children. */
458 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
459 {
460 gomp_mutex_lock (&team->task_lock);
461 gomp_clear_parent (&task.children_queue);
462 gomp_mutex_unlock (&team->task_lock);
463 }
464 gomp_end_task ();
465 }
466 else
467 {
468 struct gomp_task *task;
469 struct gomp_task *parent = thr->task;
470 struct gomp_taskgroup *taskgroup = parent->taskgroup;
471 char *arg;
472 bool do_wake;
473 size_t depend_size = 0;
474
475 if (flags & GOMP_TASK_FLAG_DEPEND)
476 depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
477 * sizeof (struct gomp_task_depend_entry));
478 task = gomp_malloc (sizeof (*task) + depend_size
479 + arg_size + arg_align - 1);
480 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
481 & ~(uintptr_t) (arg_align - 1));
482 gomp_init_task (task, parent, gomp_icv (false));
483 task->priority = priority;
484 task->kind = GOMP_TASK_UNDEFERRED;
485 task->in_tied_task = parent->in_tied_task;
486 task->taskgroup = taskgroup;
487 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
488 {
489 task->detach = true;
490 gomp_sem_init (&task->completion_sem, 0);
491 *(void **) detach = &task->completion_sem;
492 if (data)
493 *(void **) data = &task->completion_sem;
494
495 gomp_debug (0, "New event: %p\n", &task->completion_sem);
496 }
497 thr->task = task;
498 if (cpyfn)
499 {
500 cpyfn (arg, data);
501 task->copy_ctors_done = true;
502 }
503 else
504 memcpy (arg, data, arg_size);
505 thr->task = parent;
506 task->kind = GOMP_TASK_WAITING;
507 task->fn = fn;
508 task->fn_data = arg;
509 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
510 gomp_mutex_lock (&team->task_lock);
511 /* If parallel or taskgroup has been cancelled, don't start new
512 tasks. */
513 if (__builtin_expect (gomp_cancel_var, 0)
514 && !task->copy_ctors_done)
515 {
516 if (gomp_team_barrier_cancelled (&team->barrier))
517 {
518 do_cancel:
519 gomp_mutex_unlock (&team->task_lock);
520 gomp_finish_task (task);
521 free (task);
522 return;
523 }
524 if (taskgroup)
525 {
526 if (taskgroup->cancelled)
527 goto do_cancel;
528 if (taskgroup->workshare
529 && taskgroup->prev
530 && taskgroup->prev->cancelled)
531 goto do_cancel;
532 }
533 }
534 if (taskgroup)
535 taskgroup->num_children++;
536 if (depend_size)
537 {
538 gomp_task_handle_depend (task, parent, depend);
539 if (task->num_dependees)
540 {
541 /* Tasks that depend on other tasks are not put into the
542 various waiting queues, so we are done for now. Said
543 tasks are instead put into the queues via
544 gomp_task_run_post_handle_dependers() after their
545 dependencies have been satisfied. After which, they
546 can be picked up by the various scheduling
547 points. */
548 gomp_mutex_unlock (&team->task_lock);
549 return;
550 }
551 }
552
553 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
554 task, priority,
555 PRIORITY_INSERT_BEGIN,
556 /*adjust_parent_depends_on=*/false,
557 task->parent_depends_on);
558 if (taskgroup)
559 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
560 task, priority,
561 PRIORITY_INSERT_BEGIN,
562 /*adjust_parent_depends_on=*/false,
563 task->parent_depends_on);
564
565 priority_queue_insert (PQ_TEAM, &team->task_queue,
566 task, priority,
567 PRIORITY_INSERT_END,
568 /*adjust_parent_depends_on=*/false,
569 task->parent_depends_on);
570
571 ++team->task_count;
572 ++team->task_queued_count;
573 gomp_team_barrier_set_task_pending (&team->barrier);
574 do_wake = team->task_running_count + !parent->in_tied_task
575 < team->nthreads;
576 gomp_mutex_unlock (&team->task_lock);
577 if (do_wake)
578 gomp_team_barrier_wake (&team->barrier, 1);
579 }
580 }
581
582 ialias (GOMP_taskgroup_start)
583 ialias (GOMP_taskgroup_end)
584 ialias (GOMP_taskgroup_reduction_register)
585
586 #define TYPE long
587 #define UTYPE unsigned long
588 #define TYPE_is_long 1
589 #include "taskloop.c"
590 #undef TYPE
591 #undef UTYPE
592 #undef TYPE_is_long
593
594 #define TYPE unsigned long long
595 #define UTYPE TYPE
596 #define GOMP_taskloop GOMP_taskloop_ull
597 #include "taskloop.c"
598 #undef TYPE
599 #undef UTYPE
600 #undef GOMP_taskloop
601
602 static void inline
603 priority_queue_move_task_first (enum priority_queue_type type,
604 struct priority_queue *head,
605 struct gomp_task *task)
606 {
607 #if _LIBGOMP_CHECKING_
608 if (!priority_queue_task_in_queue_p (type, head, task))
609 gomp_fatal ("Attempt to move first missing task %p", task);
610 #endif
611 struct priority_list *list;
612 if (priority_queue_multi_p (head))
613 {
614 list = priority_queue_lookup_priority (head, task->priority);
615 #if _LIBGOMP_CHECKING_
616 if (!list)
617 gomp_fatal ("Unable to find priority %d", task->priority);
618 #endif
619 }
620 else
621 list = &head->l;
622 priority_list_remove (list, task_to_priority_node (type, task), 0);
623 priority_list_insert (type, list, task, task->priority,
624 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
625 task->parent_depends_on);
626 }
627
628 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
629 with team->task_lock held, or is executed in the thread that called
630 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
631 run before it acquires team->task_lock. */
632
633 static void
634 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
635 {
636 struct gomp_task *parent = task->parent;
637 if (parent)
638 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
639 task);
640
641 struct gomp_taskgroup *taskgroup = task->taskgroup;
642 if (taskgroup)
643 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
644 task);
645
646 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
647 PRIORITY_INSERT_BEGIN, false,
648 task->parent_depends_on);
649 task->kind = GOMP_TASK_WAITING;
650 if (parent && parent->taskwait)
651 {
652 if (parent->taskwait->in_taskwait)
653 {
654 /* One more task has had its dependencies met.
655 Inform any waiters. */
656 parent->taskwait->in_taskwait = false;
657 gomp_sem_post (&parent->taskwait->taskwait_sem);
658 }
659 else if (parent->taskwait->in_depend_wait)
660 {
661 /* One more task has had its dependencies met.
662 Inform any waiters. */
663 parent->taskwait->in_depend_wait = false;
664 gomp_sem_post (&parent->taskwait->taskwait_sem);
665 }
666 }
667 if (taskgroup && taskgroup->in_taskgroup_wait)
668 {
669 /* One more task has had its dependencies met.
670 Inform any waiters. */
671 taskgroup->in_taskgroup_wait = false;
672 gomp_sem_post (&taskgroup->taskgroup_sem);
673 }
674
675 ++team->task_queued_count;
676 gomp_team_barrier_set_task_pending (&team->barrier);
677 /* I'm afraid this can't be done after releasing team->task_lock,
678 as gomp_target_task_completion is run from unrelated thread and
679 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
680 the team could be gone already. */
681 if (team->nthreads > team->task_running_count)
682 gomp_team_barrier_wake (&team->barrier, 1);
683 }
684
685 /* Signal that a target task TTASK has completed the asynchronously
686 running phase and should be requeued as a task to handle the
687 variable unmapping. */
688
689 void
690 GOMP_PLUGIN_target_task_completion (void *data)
691 {
692 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
693 struct gomp_task *task = ttask->task;
694 struct gomp_team *team = ttask->team;
695
696 gomp_mutex_lock (&team->task_lock);
697 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
698 {
699 ttask->state = GOMP_TARGET_TASK_FINISHED;
700 gomp_mutex_unlock (&team->task_lock);
701 return;
702 }
703 ttask->state = GOMP_TARGET_TASK_FINISHED;
704 gomp_target_task_completion (team, task);
705 gomp_mutex_unlock (&team->task_lock);
706 }
707
708 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
709
710 /* Called for nowait target tasks. */
711
712 bool
713 gomp_create_target_task (struct gomp_device_descr *devicep,
714 void (*fn) (void *), size_t mapnum, void **hostaddrs,
715 size_t *sizes, unsigned short *kinds,
716 unsigned int flags, void **depend, void **args,
717 enum gomp_target_task_state state)
718 {
719 struct gomp_thread *thr = gomp_thread ();
720 struct gomp_team *team = thr->ts.team;
721
722 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
723 if (__builtin_expect (gomp_cancel_var, 0) && team)
724 {
725 if (gomp_team_barrier_cancelled (&team->barrier))
726 return true;
727 if (thr->task->taskgroup)
728 {
729 if (thr->task->taskgroup->cancelled)
730 return true;
731 if (thr->task->taskgroup->workshare
732 && thr->task->taskgroup->prev
733 && thr->task->taskgroup->prev->cancelled)
734 return true;
735 }
736 }
737
738 struct gomp_target_task *ttask;
739 struct gomp_task *task;
740 struct gomp_task *parent = thr->task;
741 struct gomp_taskgroup *taskgroup = parent->taskgroup;
742 bool do_wake;
743 size_t depend_size = 0;
744 uintptr_t depend_cnt = 0;
745 size_t tgt_align = 0, tgt_size = 0;
746
747 if (depend != NULL)
748 {
749 depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
750 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
751 }
752 if (fn)
753 {
754 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
755 firstprivate on the target task. */
756 size_t i;
757 for (i = 0; i < mapnum; i++)
758 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
759 {
760 size_t align = (size_t) 1 << (kinds[i] >> 8);
761 if (tgt_align < align)
762 tgt_align = align;
763 tgt_size = (tgt_size + align - 1) & ~(align - 1);
764 tgt_size += sizes[i];
765 }
766 if (tgt_align)
767 tgt_size += tgt_align - 1;
768 else
769 tgt_size = 0;
770 }
771
772 task = gomp_malloc (sizeof (*task) + depend_size
773 + sizeof (*ttask)
774 + mapnum * (sizeof (void *) + sizeof (size_t)
775 + sizeof (unsigned short))
776 + tgt_size);
777 gomp_init_task (task, parent, gomp_icv (false));
778 task->priority = 0;
779 task->kind = GOMP_TASK_WAITING;
780 task->in_tied_task = parent->in_tied_task;
781 task->taskgroup = taskgroup;
782 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
783 ttask->devicep = devicep;
784 ttask->fn = fn;
785 ttask->mapnum = mapnum;
786 ttask->args = args;
787 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
788 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
789 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
790 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
791 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
792 if (tgt_align)
793 {
794 char *tgt = (char *) &ttask->kinds[mapnum];
795 size_t i;
796 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
797 if (al)
798 tgt += tgt_align - al;
799 tgt_size = 0;
800 for (i = 0; i < mapnum; i++)
801 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
802 {
803 size_t align = (size_t) 1 << (kinds[i] >> 8);
804 tgt_size = (tgt_size + align - 1) & ~(align - 1);
805 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
806 ttask->hostaddrs[i] = tgt + tgt_size;
807 tgt_size = tgt_size + sizes[i];
808 }
809 }
810 ttask->flags = flags;
811 ttask->state = state;
812 ttask->task = task;
813 ttask->team = team;
814 task->fn = NULL;
815 task->fn_data = ttask;
816 task->final_task = 0;
817 gomp_mutex_lock (&team->task_lock);
818 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
819 if (__builtin_expect (gomp_cancel_var, 0))
820 {
821 if (gomp_team_barrier_cancelled (&team->barrier))
822 {
823 do_cancel:
824 gomp_mutex_unlock (&team->task_lock);
825 gomp_finish_task (task);
826 free (task);
827 return true;
828 }
829 if (taskgroup)
830 {
831 if (taskgroup->cancelled)
832 goto do_cancel;
833 if (taskgroup->workshare
834 && taskgroup->prev
835 && taskgroup->prev->cancelled)
836 goto do_cancel;
837 }
838 }
839 if (depend_size)
840 {
841 gomp_task_handle_depend (task, parent, depend);
842 if (task->num_dependees)
843 {
844 if (taskgroup)
845 taskgroup->num_children++;
846 gomp_mutex_unlock (&team->task_lock);
847 return true;
848 }
849 }
850 if (state == GOMP_TARGET_TASK_DATA)
851 {
852 gomp_task_run_post_handle_depend_hash (task);
853 gomp_mutex_unlock (&team->task_lock);
854 gomp_finish_task (task);
855 free (task);
856 return false;
857 }
858 if (taskgroup)
859 taskgroup->num_children++;
860 /* For async offloading, if we don't need to wait for dependencies,
861 run the gomp_target_task_fn right away, essentially schedule the
862 mapping part of the task in the current thread. */
863 if (devicep != NULL
864 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
865 {
866 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
867 PRIORITY_INSERT_END,
868 /*adjust_parent_depends_on=*/false,
869 task->parent_depends_on);
870 if (taskgroup)
871 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
872 task, 0, PRIORITY_INSERT_END,
873 /*adjust_parent_depends_on=*/false,
874 task->parent_depends_on);
875 task->pnode[PQ_TEAM].next = NULL;
876 task->pnode[PQ_TEAM].prev = NULL;
877 task->kind = GOMP_TASK_TIED;
878 ++team->task_count;
879 gomp_mutex_unlock (&team->task_lock);
880
881 thr->task = task;
882 gomp_target_task_fn (task->fn_data);
883 thr->task = parent;
884
885 gomp_mutex_lock (&team->task_lock);
886 task->kind = GOMP_TASK_ASYNC_RUNNING;
887 /* If GOMP_PLUGIN_target_task_completion has run already
888 in between gomp_target_task_fn and the mutex lock,
889 perform the requeuing here. */
890 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
891 gomp_target_task_completion (team, task);
892 else
893 ttask->state = GOMP_TARGET_TASK_RUNNING;
894 gomp_mutex_unlock (&team->task_lock);
895 return true;
896 }
897 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
898 PRIORITY_INSERT_BEGIN,
899 /*adjust_parent_depends_on=*/false,
900 task->parent_depends_on);
901 if (taskgroup)
902 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
903 PRIORITY_INSERT_BEGIN,
904 /*adjust_parent_depends_on=*/false,
905 task->parent_depends_on);
906 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
907 PRIORITY_INSERT_END,
908 /*adjust_parent_depends_on=*/false,
909 task->parent_depends_on);
910 ++team->task_count;
911 ++team->task_queued_count;
912 gomp_team_barrier_set_task_pending (&team->barrier);
913 do_wake = team->task_running_count + !parent->in_tied_task
914 < team->nthreads;
915 gomp_mutex_unlock (&team->task_lock);
916 if (do_wake)
917 gomp_team_barrier_wake (&team->barrier, 1);
918 return true;
919 }
920
921 /* Given a parent_depends_on task in LIST, move it to the front of its
922 priority so it is run as soon as possible.
923
924 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
925
926 We rearrange the queue such that all parent_depends_on tasks are
927 first, and last_parent_depends_on points to the last such task we
928 rearranged. For example, given the following tasks in a queue
929 where PD[123] are the parent_depends_on tasks:
930
931 task->children
932 |
933 V
934 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
935
936 We rearrange such that:
937
938 task->children
939 | +--- last_parent_depends_on
940 | |
941 V V
942 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
943
944 static void inline
945 priority_list_upgrade_task (struct priority_list *list,
946 struct priority_node *node)
947 {
948 struct priority_node *last_parent_depends_on
949 = list->last_parent_depends_on;
950 if (last_parent_depends_on)
951 {
952 node->prev->next = node->next;
953 node->next->prev = node->prev;
954 node->prev = last_parent_depends_on;
955 node->next = last_parent_depends_on->next;
956 node->prev->next = node;
957 node->next->prev = node;
958 }
959 else if (node != list->tasks)
960 {
961 node->prev->next = node->next;
962 node->next->prev = node->prev;
963 node->prev = list->tasks->prev;
964 node->next = list->tasks;
965 list->tasks = node;
966 node->prev->next = node;
967 node->next->prev = node;
968 }
969 list->last_parent_depends_on = node;
970 }
971
972 /* Given a parent_depends_on TASK in its parent's children_queue, move
973 it to the front of its priority so it is run as soon as possible.
974
975 PARENT is passed as an optimization.
976
977 (This function could be defined in priority_queue.c, but we want it
978 inlined, and putting it in priority_queue.h is not an option, given
979 that gomp_task has not been properly defined at that point). */
980
981 static void inline
982 priority_queue_upgrade_task (struct gomp_task *task,
983 struct gomp_task *parent)
984 {
985 struct priority_queue *head = &parent->children_queue;
986 struct priority_node *node = &task->pnode[PQ_CHILDREN];
987 #if _LIBGOMP_CHECKING_
988 if (!task->parent_depends_on)
989 gomp_fatal ("priority_queue_upgrade_task: task must be a "
990 "parent_depends_on task");
991 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
992 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
993 #endif
994 if (priority_queue_multi_p (head))
995 {
996 struct priority_list *list
997 = priority_queue_lookup_priority (head, task->priority);
998 priority_list_upgrade_task (list, node);
999 }
1000 else
1001 priority_list_upgrade_task (&head->l, node);
1002 }
1003
1004 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
1005 the way in LIST so that other tasks can be considered for
1006 execution. LIST contains tasks of type TYPE.
1007
1008 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1009 if applicable. */
1010
1011 static void inline
1012 priority_list_downgrade_task (enum priority_queue_type type,
1013 struct priority_list *list,
1014 struct gomp_task *child_task)
1015 {
1016 struct priority_node *node = task_to_priority_node (type, child_task);
1017 if (list->tasks == node)
1018 list->tasks = node->next;
1019 else if (node->next != list->tasks)
1020 {
1021 /* The task in NODE is about to become TIED and TIED tasks
1022 cannot come before WAITING tasks. If we're about to
1023 leave the queue in such an indeterminate state, rewire
1024 things appropriately. However, a TIED task at the end is
1025 perfectly fine. */
1026 struct gomp_task *next_task = priority_node_to_task (type, node->next);
1027 if (next_task->kind == GOMP_TASK_WAITING)
1028 {
1029 /* Remove from list. */
1030 node->prev->next = node->next;
1031 node->next->prev = node->prev;
1032 /* Rewire at the end. */
1033 node->next = list->tasks;
1034 node->prev = list->tasks->prev;
1035 list->tasks->prev->next = node;
1036 list->tasks->prev = node;
1037 }
1038 }
1039
1040 /* If the current task is the last_parent_depends_on for its
1041 priority, adjust last_parent_depends_on appropriately. */
1042 if (__builtin_expect (child_task->parent_depends_on, 0)
1043 && list->last_parent_depends_on == node)
1044 {
1045 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1046 if (node->prev != node
1047 && prev_child->kind == GOMP_TASK_WAITING
1048 && prev_child->parent_depends_on)
1049 list->last_parent_depends_on = node->prev;
1050 else
1051 {
1052 /* There are no more parent_depends_on entries waiting
1053 to run, clear the list. */
1054 list->last_parent_depends_on = NULL;
1055 }
1056 }
1057 }
1058
1059 /* Given a TASK in HEAD that is about to be executed, move it out of
1060 the way so that other tasks can be considered for execution. HEAD
1061 contains tasks of type TYPE.
1062
1063 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1064 if applicable.
1065
1066 (This function could be defined in priority_queue.c, but we want it
1067 inlined, and putting it in priority_queue.h is not an option, given
1068 that gomp_task has not been properly defined at that point). */
1069
1070 static void inline
1071 priority_queue_downgrade_task (enum priority_queue_type type,
1072 struct priority_queue *head,
1073 struct gomp_task *task)
1074 {
1075 #if _LIBGOMP_CHECKING_
1076 if (!priority_queue_task_in_queue_p (type, head, task))
1077 gomp_fatal ("Attempt to downgrade missing task %p", task);
1078 #endif
1079 if (priority_queue_multi_p (head))
1080 {
1081 struct priority_list *list
1082 = priority_queue_lookup_priority (head, task->priority);
1083 priority_list_downgrade_task (type, list, task);
1084 }
1085 else
1086 priority_list_downgrade_task (type, &head->l, task);
1087 }
1088
1089 /* Setup CHILD_TASK to execute. This is done by setting the task to
1090 TIED, and updating all relevant queues so that CHILD_TASK is no
1091 longer chosen for scheduling. Also, remove CHILD_TASK from the
1092 overall team task queue entirely.
1093
1094 Return TRUE if task or its containing taskgroup has been
1095 cancelled. */
1096
1097 static inline bool
1098 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1099 struct gomp_team *team)
1100 {
1101 #if _LIBGOMP_CHECKING_
1102 if (child_task->parent)
1103 priority_queue_verify (PQ_CHILDREN,
1104 &child_task->parent->children_queue, true);
1105 if (child_task->taskgroup)
1106 priority_queue_verify (PQ_TASKGROUP,
1107 &child_task->taskgroup->taskgroup_queue, false);
1108 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1109 #endif
1110
1111 /* Task is about to go tied, move it out of the way. */
1112 if (parent)
1113 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1114 child_task);
1115
1116 /* Task is about to go tied, move it out of the way. */
1117 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1118 if (taskgroup)
1119 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1120 child_task);
1121
1122 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1123 MEMMODEL_RELAXED);
1124 child_task->pnode[PQ_TEAM].next = NULL;
1125 child_task->pnode[PQ_TEAM].prev = NULL;
1126 child_task->kind = GOMP_TASK_TIED;
1127
1128 if (--team->task_queued_count == 0)
1129 gomp_team_barrier_clear_task_pending (&team->barrier);
1130 if (__builtin_expect (gomp_cancel_var, 0)
1131 && !child_task->copy_ctors_done)
1132 {
1133 if (gomp_team_barrier_cancelled (&team->barrier))
1134 return true;
1135 if (taskgroup)
1136 {
1137 if (taskgroup->cancelled)
1138 return true;
1139 if (taskgroup->workshare
1140 && taskgroup->prev
1141 && taskgroup->prev->cancelled)
1142 return true;
1143 }
1144 }
1145 return false;
1146 }
1147
1148 static void
1149 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1150 {
1151 struct gomp_task *parent = child_task->parent;
1152 size_t i;
1153
1154 for (i = 0; i < child_task->depend_count; i++)
1155 if (!child_task->depend[i].redundant)
1156 {
1157 if (child_task->depend[i].next)
1158 child_task->depend[i].next->prev = child_task->depend[i].prev;
1159 if (child_task->depend[i].prev)
1160 child_task->depend[i].prev->next = child_task->depend[i].next;
1161 else
1162 {
1163 hash_entry_type *slot
1164 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1165 NO_INSERT);
1166 if (*slot != &child_task->depend[i])
1167 abort ();
1168 if (child_task->depend[i].next)
1169 *slot = child_task->depend[i].next;
1170 else
1171 htab_clear_slot (parent->depend_hash, slot);
1172 }
1173 }
1174 }
1175
1176 /* After a CHILD_TASK has been run, adjust the dependency queue for
1177 each task that depends on CHILD_TASK, to record the fact that there
1178 is one less dependency to worry about. If a task that depended on
1179 CHILD_TASK now has no dependencies, place it in the various queues
1180 so it gets scheduled to run.
1181
1182 TEAM is the team to which CHILD_TASK belongs to. */
1183
1184 static size_t
1185 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1186 struct gomp_team *team)
1187 {
1188 struct gomp_task *parent = child_task->parent;
1189 size_t i, count = child_task->dependers->n_elem, ret = 0;
1190 for (i = 0; i < count; i++)
1191 {
1192 struct gomp_task *task = child_task->dependers->elem[i];
1193
1194 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1195 TASK's remaining dependencies. Once TASK has no other
1196 dependencies, put it into the various queues so it will get
1197 scheduled for execution. */
1198 if (--task->num_dependees != 0)
1199 continue;
1200
1201 struct gomp_taskgroup *taskgroup = task->taskgroup;
1202 if (parent)
1203 {
1204 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1205 task, task->priority,
1206 PRIORITY_INSERT_BEGIN,
1207 /*adjust_parent_depends_on=*/true,
1208 task->parent_depends_on);
1209 if (parent->taskwait)
1210 {
1211 if (parent->taskwait->in_taskwait)
1212 {
1213 /* One more task has had its dependencies met.
1214 Inform any waiters. */
1215 parent->taskwait->in_taskwait = false;
1216 gomp_sem_post (&parent->taskwait->taskwait_sem);
1217 }
1218 else if (parent->taskwait->in_depend_wait)
1219 {
1220 /* One more task has had its dependencies met.
1221 Inform any waiters. */
1222 parent->taskwait->in_depend_wait = false;
1223 gomp_sem_post (&parent->taskwait->taskwait_sem);
1224 }
1225 }
1226 }
1227 if (taskgroup)
1228 {
1229 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1230 task, task->priority,
1231 PRIORITY_INSERT_BEGIN,
1232 /*adjust_parent_depends_on=*/false,
1233 task->parent_depends_on);
1234 if (taskgroup->in_taskgroup_wait)
1235 {
1236 /* One more task has had its dependencies met.
1237 Inform any waiters. */
1238 taskgroup->in_taskgroup_wait = false;
1239 gomp_sem_post (&taskgroup->taskgroup_sem);
1240 }
1241 }
1242 priority_queue_insert (PQ_TEAM, &team->task_queue,
1243 task, task->priority,
1244 PRIORITY_INSERT_END,
1245 /*adjust_parent_depends_on=*/false,
1246 task->parent_depends_on);
1247 ++team->task_count;
1248 ++team->task_queued_count;
1249 ++ret;
1250 }
1251 free (child_task->dependers);
1252 child_task->dependers = NULL;
1253 if (ret > 1)
1254 gomp_team_barrier_set_task_pending (&team->barrier);
1255 return ret;
1256 }
1257
1258 static inline size_t
1259 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1260 struct gomp_team *team)
1261 {
1262 if (child_task->depend_count == 0)
1263 return 0;
1264
1265 /* If parent is gone already, the hash table is freed and nothing
1266 will use the hash table anymore, no need to remove anything from it. */
1267 if (child_task->parent != NULL)
1268 gomp_task_run_post_handle_depend_hash (child_task);
1269
1270 if (child_task->dependers == NULL)
1271 return 0;
1272
1273 return gomp_task_run_post_handle_dependers (child_task, team);
1274 }
1275
1276 /* Remove CHILD_TASK from its parent. */
1277
1278 static inline void
1279 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1280 {
1281 struct gomp_task *parent = child_task->parent;
1282 if (parent == NULL)
1283 return;
1284
1285 /* If this was the last task the parent was depending on,
1286 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1287 clean up and return. */
1288 if (__builtin_expect (child_task->parent_depends_on, 0)
1289 && --parent->taskwait->n_depend == 0
1290 && parent->taskwait->in_depend_wait)
1291 {
1292 parent->taskwait->in_depend_wait = false;
1293 gomp_sem_post (&parent->taskwait->taskwait_sem);
1294 }
1295
1296 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1297 child_task, MEMMODEL_RELEASE)
1298 && parent->taskwait && parent->taskwait->in_taskwait)
1299 {
1300 parent->taskwait->in_taskwait = false;
1301 gomp_sem_post (&parent->taskwait->taskwait_sem);
1302 }
1303 child_task->pnode[PQ_CHILDREN].next = NULL;
1304 child_task->pnode[PQ_CHILDREN].prev = NULL;
1305 }
1306
1307 /* Remove CHILD_TASK from its taskgroup. */
1308
1309 static inline void
1310 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1311 {
1312 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1313 if (taskgroup == NULL)
1314 return;
1315 bool empty = priority_queue_remove (PQ_TASKGROUP,
1316 &taskgroup->taskgroup_queue,
1317 child_task, MEMMODEL_RELAXED);
1318 child_task->pnode[PQ_TASKGROUP].next = NULL;
1319 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1320 if (taskgroup->num_children > 1)
1321 --taskgroup->num_children;
1322 else
1323 {
1324 /* We access taskgroup->num_children in GOMP_taskgroup_end
1325 outside of the task lock mutex region, so
1326 need a release barrier here to ensure memory
1327 written by child_task->fn above is flushed
1328 before the NULL is written. */
1329 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1330 }
1331 if (empty && taskgroup->in_taskgroup_wait)
1332 {
1333 taskgroup->in_taskgroup_wait = false;
1334 gomp_sem_post (&taskgroup->taskgroup_sem);
1335 }
1336 }
1337
1338 void
1339 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1340 {
1341 struct gomp_thread *thr = gomp_thread ();
1342 struct gomp_team *team = thr->ts.team;
1343 struct gomp_task *task = thr->task;
1344 struct gomp_task *child_task = NULL;
1345 struct gomp_task *to_free = NULL;
1346 int do_wake = 0;
1347
1348 gomp_mutex_lock (&team->task_lock);
1349 if (gomp_barrier_last_thread (state))
1350 {
1351 if (team->task_count == 0)
1352 {
1353 gomp_team_barrier_done (&team->barrier, state);
1354 gomp_mutex_unlock (&team->task_lock);
1355 gomp_team_barrier_wake (&team->barrier, 0);
1356 return;
1357 }
1358 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1359 }
1360
1361 while (1)
1362 {
1363 bool cancelled = false;
1364
1365 /* Look for a queued detached task with a fulfilled completion event
1366 that is ready to finish. */
1367 child_task = priority_queue_find (PQ_TEAM, &team->task_detach_queue,
1368 task_fulfilled_p);
1369 if (child_task)
1370 {
1371 priority_queue_remove (PQ_TEAM, &team->task_detach_queue,
1372 child_task, MEMMODEL_RELAXED);
1373 --team->task_detach_count;
1374 gomp_debug (0, "thread %d: found task with fulfilled event %p\n",
1375 thr->ts.team_id, &child_task->completion_sem);
1376
1377 if (to_free)
1378 {
1379 gomp_finish_task (to_free);
1380 free (to_free);
1381 to_free = NULL;
1382 }
1383 goto finish_cancelled;
1384 }
1385
1386 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1387 {
1388 bool ignored;
1389 child_task
1390 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1391 PQ_IGNORED, NULL,
1392 &ignored);
1393 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1394 team);
1395 if (__builtin_expect (cancelled, 0))
1396 {
1397 if (to_free)
1398 {
1399 gomp_finish_task (to_free);
1400 free (to_free);
1401 to_free = NULL;
1402 }
1403 goto finish_cancelled;
1404 }
1405 team->task_running_count++;
1406 child_task->in_tied_task = true;
1407 }
1408 gomp_mutex_unlock (&team->task_lock);
1409 if (do_wake)
1410 {
1411 gomp_team_barrier_wake (&team->barrier, do_wake);
1412 do_wake = 0;
1413 }
1414 if (to_free)
1415 {
1416 gomp_finish_task (to_free);
1417 free (to_free);
1418 to_free = NULL;
1419 }
1420 if (child_task)
1421 {
1422 thr->task = child_task;
1423 if (__builtin_expect (child_task->fn == NULL, 0))
1424 {
1425 if (gomp_target_task_fn (child_task->fn_data))
1426 {
1427 thr->task = task;
1428 gomp_mutex_lock (&team->task_lock);
1429 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1430 team->task_running_count--;
1431 struct gomp_target_task *ttask
1432 = (struct gomp_target_task *) child_task->fn_data;
1433 /* If GOMP_PLUGIN_target_task_completion has run already
1434 in between gomp_target_task_fn and the mutex lock,
1435 perform the requeuing here. */
1436 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1437 gomp_target_task_completion (team, child_task);
1438 else
1439 ttask->state = GOMP_TARGET_TASK_RUNNING;
1440 child_task = NULL;
1441 continue;
1442 }
1443 }
1444 else
1445 child_task->fn (child_task->fn_data);
1446 thr->task = task;
1447 }
1448 else
1449 return;
1450 gomp_mutex_lock (&team->task_lock);
1451 if (child_task)
1452 {
1453 if (child_task->detach && !task_fulfilled_p (child_task))
1454 {
1455 priority_queue_insert (PQ_TEAM, &team->task_detach_queue,
1456 child_task, child_task->priority,
1457 PRIORITY_INSERT_END,
1458 false, false);
1459 ++team->task_detach_count;
1460 gomp_debug (0, "thread %d: queueing task with event %p\n",
1461 thr->ts.team_id, &child_task->completion_sem);
1462 child_task = NULL;
1463 }
1464 else
1465 {
1466 finish_cancelled:;
1467 size_t new_tasks
1468 = gomp_task_run_post_handle_depend (child_task, team);
1469 gomp_task_run_post_remove_parent (child_task);
1470 gomp_clear_parent (&child_task->children_queue);
1471 gomp_task_run_post_remove_taskgroup (child_task);
1472 to_free = child_task;
1473 child_task = NULL;
1474 if (!cancelled)
1475 team->task_running_count--;
1476 if (new_tasks > 1)
1477 {
1478 do_wake = team->nthreads - team->task_running_count;
1479 if (do_wake > new_tasks)
1480 do_wake = new_tasks;
1481 }
1482 if (--team->task_count == 0
1483 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1484 {
1485 gomp_team_barrier_done (&team->barrier, state);
1486 gomp_mutex_unlock (&team->task_lock);
1487 gomp_team_barrier_wake (&team->barrier, 0);
1488 gomp_mutex_lock (&team->task_lock);
1489 }
1490 }
1491 }
1492 }
1493 }
1494
1495 /* Called when encountering a taskwait directive.
1496
1497 Wait for all children of the current task. */
1498
1499 void
1500 GOMP_taskwait (void)
1501 {
1502 struct gomp_thread *thr = gomp_thread ();
1503 struct gomp_team *team = thr->ts.team;
1504 struct gomp_task *task = thr->task;
1505 struct gomp_task *child_task = NULL;
1506 struct gomp_task *to_free = NULL;
1507 struct gomp_taskwait taskwait;
1508 int do_wake = 0;
1509
1510 /* The acquire barrier on load of task->children here synchronizes
1511 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1512 not necessary that we synchronize with other non-NULL writes at
1513 this point, but we must ensure that all writes to memory by a
1514 child thread task work function are seen before we exit from
1515 GOMP_taskwait. */
1516 if (task == NULL
1517 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1518 return;
1519
1520 memset (&taskwait, 0, sizeof (taskwait));
1521 bool child_q = false;
1522 gomp_mutex_lock (&team->task_lock);
1523 while (1)
1524 {
1525 bool cancelled = false;
1526 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1527 {
1528 bool destroy_taskwait = task->taskwait != NULL;
1529 task->taskwait = NULL;
1530 gomp_mutex_unlock (&team->task_lock);
1531 if (to_free)
1532 {
1533 gomp_finish_task (to_free);
1534 free (to_free);
1535 }
1536 if (destroy_taskwait)
1537 gomp_sem_destroy (&taskwait.taskwait_sem);
1538 return;
1539 }
1540 struct gomp_task *next_task
1541 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1542 PQ_TEAM, &team->task_queue, &child_q);
1543 if (next_task->kind == GOMP_TASK_WAITING)
1544 {
1545 child_task = next_task;
1546 cancelled
1547 = gomp_task_run_pre (child_task, task, team);
1548 if (__builtin_expect (cancelled, 0))
1549 {
1550 if (to_free)
1551 {
1552 gomp_finish_task (to_free);
1553 free (to_free);
1554 to_free = NULL;
1555 }
1556 goto finish_cancelled;
1557 }
1558 }
1559 else
1560 {
1561 /* All tasks we are waiting for are either running in other
1562 threads, or they are tasks that have not had their
1563 dependencies met (so they're not even in the queue). Wait
1564 for them. */
1565 if (task->taskwait == NULL)
1566 {
1567 taskwait.in_depend_wait = false;
1568 gomp_sem_init (&taskwait.taskwait_sem, 0);
1569 task->taskwait = &taskwait;
1570 }
1571 taskwait.in_taskwait = true;
1572 }
1573 gomp_mutex_unlock (&team->task_lock);
1574 if (do_wake)
1575 {
1576 gomp_team_barrier_wake (&team->barrier, do_wake);
1577 do_wake = 0;
1578 }
1579 if (to_free)
1580 {
1581 gomp_finish_task (to_free);
1582 free (to_free);
1583 to_free = NULL;
1584 }
1585 if (child_task)
1586 {
1587 thr->task = child_task;
1588 if (__builtin_expect (child_task->fn == NULL, 0))
1589 {
1590 if (gomp_target_task_fn (child_task->fn_data))
1591 {
1592 thr->task = task;
1593 gomp_mutex_lock (&team->task_lock);
1594 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1595 struct gomp_target_task *ttask
1596 = (struct gomp_target_task *) child_task->fn_data;
1597 /* If GOMP_PLUGIN_target_task_completion has run already
1598 in between gomp_target_task_fn and the mutex lock,
1599 perform the requeuing here. */
1600 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1601 gomp_target_task_completion (team, child_task);
1602 else
1603 ttask->state = GOMP_TARGET_TASK_RUNNING;
1604 child_task = NULL;
1605 continue;
1606 }
1607 }
1608 else
1609 child_task->fn (child_task->fn_data);
1610 thr->task = task;
1611 }
1612 else
1613 gomp_sem_wait (&taskwait.taskwait_sem);
1614 gomp_mutex_lock (&team->task_lock);
1615 if (child_task)
1616 {
1617 finish_cancelled:;
1618 size_t new_tasks
1619 = gomp_task_run_post_handle_depend (child_task, team);
1620
1621 if (child_q)
1622 {
1623 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1624 child_task, MEMMODEL_RELAXED);
1625 child_task->pnode[PQ_CHILDREN].next = NULL;
1626 child_task->pnode[PQ_CHILDREN].prev = NULL;
1627 }
1628
1629 gomp_clear_parent (&child_task->children_queue);
1630
1631 gomp_task_run_post_remove_taskgroup (child_task);
1632
1633 to_free = child_task;
1634 child_task = NULL;
1635 team->task_count--;
1636 if (new_tasks > 1)
1637 {
1638 do_wake = team->nthreads - team->task_running_count
1639 - !task->in_tied_task;
1640 if (do_wake > new_tasks)
1641 do_wake = new_tasks;
1642 }
1643 }
1644 }
1645 }
1646
1647 /* Called when encountering a taskwait directive with depend clause(s).
1648 Wait as if it was an mergeable included task construct with empty body. */
1649
1650 void
1651 GOMP_taskwait_depend (void **depend)
1652 {
1653 struct gomp_thread *thr = gomp_thread ();
1654 struct gomp_team *team = thr->ts.team;
1655
1656 /* If parallel or taskgroup has been cancelled, return early. */
1657 if (__builtin_expect (gomp_cancel_var, 0) && team)
1658 {
1659 if (gomp_team_barrier_cancelled (&team->barrier))
1660 return;
1661 if (thr->task->taskgroup)
1662 {
1663 if (thr->task->taskgroup->cancelled)
1664 return;
1665 if (thr->task->taskgroup->workshare
1666 && thr->task->taskgroup->prev
1667 && thr->task->taskgroup->prev->cancelled)
1668 return;
1669 }
1670 }
1671
1672 if (thr->task && thr->task->depend_hash)
1673 gomp_task_maybe_wait_for_dependencies (depend);
1674 }
1675
1676 /* An undeferred task is about to run. Wait for all tasks that this
1677 undeferred task depends on.
1678
1679 This is done by first putting all known ready dependencies
1680 (dependencies that have their own dependencies met) at the top of
1681 the scheduling queues. Then we iterate through these imminently
1682 ready tasks (and possibly other high priority tasks), and run them.
1683 If we run out of ready dependencies to execute, we either wait for
1684 the remaining dependencies to finish, or wait for them to get
1685 scheduled so we can run them.
1686
1687 DEPEND is as in GOMP_task. */
1688
1689 void
1690 gomp_task_maybe_wait_for_dependencies (void **depend)
1691 {
1692 struct gomp_thread *thr = gomp_thread ();
1693 struct gomp_task *task = thr->task;
1694 struct gomp_team *team = thr->ts.team;
1695 struct gomp_task_depend_entry elem, *ent = NULL;
1696 struct gomp_taskwait taskwait;
1697 size_t orig_ndepend = (uintptr_t) depend[0];
1698 size_t nout = (uintptr_t) depend[1];
1699 size_t ndepend = orig_ndepend;
1700 size_t normal = ndepend;
1701 size_t n = 2;
1702 size_t i;
1703 size_t num_awaited = 0;
1704 struct gomp_task *child_task = NULL;
1705 struct gomp_task *to_free = NULL;
1706 int do_wake = 0;
1707
1708 if (ndepend == 0)
1709 {
1710 ndepend = nout;
1711 nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1712 normal = nout + (uintptr_t) depend[4];
1713 n = 5;
1714 }
1715 gomp_mutex_lock (&team->task_lock);
1716 for (i = 0; i < ndepend; i++)
1717 {
1718 elem.addr = depend[i + n];
1719 elem.is_in = i >= nout;
1720 if (__builtin_expect (i >= normal, 0))
1721 {
1722 void **d = (void **) elem.addr;
1723 switch ((uintptr_t) d[1])
1724 {
1725 case GOMP_DEPEND_IN:
1726 break;
1727 case GOMP_DEPEND_OUT:
1728 case GOMP_DEPEND_INOUT:
1729 case GOMP_DEPEND_MUTEXINOUTSET:
1730 elem.is_in = 0;
1731 break;
1732 default:
1733 gomp_fatal ("unknown omp_depend_t dependence type %d",
1734 (int) (uintptr_t) d[1]);
1735 }
1736 elem.addr = d[0];
1737 }
1738 ent = htab_find (task->depend_hash, &elem);
1739 for (; ent; ent = ent->next)
1740 if (elem.is_in && ent->is_in)
1741 continue;
1742 else
1743 {
1744 struct gomp_task *tsk = ent->task;
1745 if (!tsk->parent_depends_on)
1746 {
1747 tsk->parent_depends_on = true;
1748 ++num_awaited;
1749 /* If dependency TSK itself has no dependencies and is
1750 ready to run, move it up front so that we run it as
1751 soon as possible. */
1752 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1753 priority_queue_upgrade_task (tsk, task);
1754 }
1755 }
1756 }
1757 if (num_awaited == 0)
1758 {
1759 gomp_mutex_unlock (&team->task_lock);
1760 return;
1761 }
1762
1763 memset (&taskwait, 0, sizeof (taskwait));
1764 taskwait.n_depend = num_awaited;
1765 gomp_sem_init (&taskwait.taskwait_sem, 0);
1766 task->taskwait = &taskwait;
1767
1768 while (1)
1769 {
1770 bool cancelled = false;
1771 if (taskwait.n_depend == 0)
1772 {
1773 task->taskwait = NULL;
1774 gomp_mutex_unlock (&team->task_lock);
1775 if (to_free)
1776 {
1777 gomp_finish_task (to_free);
1778 free (to_free);
1779 }
1780 gomp_sem_destroy (&taskwait.taskwait_sem);
1781 return;
1782 }
1783
1784 /* Theoretically when we have multiple priorities, we should
1785 chose between the highest priority item in
1786 task->children_queue and team->task_queue here, so we should
1787 use priority_queue_next_task(). However, since we are
1788 running an undeferred task, perhaps that makes all tasks it
1789 depends on undeferred, thus a priority of INF? This would
1790 make it unnecessary to take anything into account here,
1791 but the dependencies.
1792
1793 On the other hand, if we want to use priority_queue_next_task(),
1794 care should be taken to only use priority_queue_remove()
1795 below if the task was actually removed from the children
1796 queue. */
1797 bool ignored;
1798 struct gomp_task *next_task
1799 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1800 PQ_IGNORED, NULL, &ignored);
1801
1802 if (next_task->kind == GOMP_TASK_WAITING)
1803 {
1804 child_task = next_task;
1805 cancelled
1806 = gomp_task_run_pre (child_task, task, team);
1807 if (__builtin_expect (cancelled, 0))
1808 {
1809 if (to_free)
1810 {
1811 gomp_finish_task (to_free);
1812 free (to_free);
1813 to_free = NULL;
1814 }
1815 goto finish_cancelled;
1816 }
1817 }
1818 else
1819 /* All tasks we are waiting for are either running in other
1820 threads, or they are tasks that have not had their
1821 dependencies met (so they're not even in the queue). Wait
1822 for them. */
1823 taskwait.in_depend_wait = true;
1824 gomp_mutex_unlock (&team->task_lock);
1825 if (do_wake)
1826 {
1827 gomp_team_barrier_wake (&team->barrier, do_wake);
1828 do_wake = 0;
1829 }
1830 if (to_free)
1831 {
1832 gomp_finish_task (to_free);
1833 free (to_free);
1834 to_free = NULL;
1835 }
1836 if (child_task)
1837 {
1838 thr->task = child_task;
1839 if (__builtin_expect (child_task->fn == NULL, 0))
1840 {
1841 if (gomp_target_task_fn (child_task->fn_data))
1842 {
1843 thr->task = task;
1844 gomp_mutex_lock (&team->task_lock);
1845 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1846 struct gomp_target_task *ttask
1847 = (struct gomp_target_task *) child_task->fn_data;
1848 /* If GOMP_PLUGIN_target_task_completion has run already
1849 in between gomp_target_task_fn and the mutex lock,
1850 perform the requeuing here. */
1851 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1852 gomp_target_task_completion (team, child_task);
1853 else
1854 ttask->state = GOMP_TARGET_TASK_RUNNING;
1855 child_task = NULL;
1856 continue;
1857 }
1858 }
1859 else
1860 child_task->fn (child_task->fn_data);
1861 thr->task = task;
1862 }
1863 else
1864 gomp_sem_wait (&taskwait.taskwait_sem);
1865 gomp_mutex_lock (&team->task_lock);
1866 if (child_task)
1867 {
1868 finish_cancelled:;
1869 size_t new_tasks
1870 = gomp_task_run_post_handle_depend (child_task, team);
1871 if (child_task->parent_depends_on)
1872 --taskwait.n_depend;
1873
1874 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1875 child_task, MEMMODEL_RELAXED);
1876 child_task->pnode[PQ_CHILDREN].next = NULL;
1877 child_task->pnode[PQ_CHILDREN].prev = NULL;
1878
1879 gomp_clear_parent (&child_task->children_queue);
1880 gomp_task_run_post_remove_taskgroup (child_task);
1881 to_free = child_task;
1882 child_task = NULL;
1883 team->task_count--;
1884 if (new_tasks > 1)
1885 {
1886 do_wake = team->nthreads - team->task_running_count
1887 - !task->in_tied_task;
1888 if (do_wake > new_tasks)
1889 do_wake = new_tasks;
1890 }
1891 }
1892 }
1893 }
1894
1895 /* Called when encountering a taskyield directive. */
1896
1897 void
1898 GOMP_taskyield (void)
1899 {
1900 /* Nothing at the moment. */
1901 }
1902
1903 static inline struct gomp_taskgroup *
1904 gomp_taskgroup_init (struct gomp_taskgroup *prev)
1905 {
1906 struct gomp_taskgroup *taskgroup
1907 = gomp_malloc (sizeof (struct gomp_taskgroup));
1908 taskgroup->prev = prev;
1909 priority_queue_init (&taskgroup->taskgroup_queue);
1910 taskgroup->reductions = prev ? prev->reductions : NULL;
1911 taskgroup->in_taskgroup_wait = false;
1912 taskgroup->cancelled = false;
1913 taskgroup->workshare = false;
1914 taskgroup->num_children = 0;
1915 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1916 return taskgroup;
1917 }
1918
1919 void
1920 GOMP_taskgroup_start (void)
1921 {
1922 struct gomp_thread *thr = gomp_thread ();
1923 struct gomp_team *team = thr->ts.team;
1924 struct gomp_task *task = thr->task;
1925
1926 /* If team is NULL, all tasks are executed as
1927 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1928 taskgroup and their descendant tasks will be finished
1929 by the time GOMP_taskgroup_end is called. */
1930 if (team == NULL)
1931 return;
1932 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
1933 }
1934
1935 void
1936 GOMP_taskgroup_end (void)
1937 {
1938 struct gomp_thread *thr = gomp_thread ();
1939 struct gomp_team *team = thr->ts.team;
1940 struct gomp_task *task = thr->task;
1941 struct gomp_taskgroup *taskgroup;
1942 struct gomp_task *child_task = NULL;
1943 struct gomp_task *to_free = NULL;
1944 int do_wake = 0;
1945
1946 if (team == NULL)
1947 return;
1948 taskgroup = task->taskgroup;
1949 if (__builtin_expect (taskgroup == NULL, 0)
1950 && thr->ts.level == 0)
1951 {
1952 /* This can happen if GOMP_taskgroup_start is called when
1953 thr->ts.team == NULL, but inside of the taskgroup there
1954 is #pragma omp target nowait that creates an implicit
1955 team with a single thread. In this case, we want to wait
1956 for all outstanding tasks in this team. */
1957 gomp_team_barrier_wait (&team->barrier);
1958 return;
1959 }
1960
1961 /* The acquire barrier on load of taskgroup->num_children here
1962 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1963 It is not necessary that we synchronize with other non-0 writes at
1964 this point, but we must ensure that all writes to memory by a
1965 child thread task work function are seen before we exit from
1966 GOMP_taskgroup_end. */
1967 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1968 goto finish;
1969
1970 bool unused;
1971 gomp_mutex_lock (&team->task_lock);
1972 while (1)
1973 {
1974 bool cancelled = false;
1975 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1976 MEMMODEL_RELAXED))
1977 {
1978 if (taskgroup->num_children)
1979 {
1980 if (priority_queue_empty_p (&task->children_queue,
1981 MEMMODEL_RELAXED))
1982 goto do_wait;
1983 child_task
1984 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1985 PQ_TEAM, &team->task_queue,
1986 &unused);
1987 }
1988 else
1989 {
1990 gomp_mutex_unlock (&team->task_lock);
1991 if (to_free)
1992 {
1993 gomp_finish_task (to_free);
1994 free (to_free);
1995 }
1996 goto finish;
1997 }
1998 }
1999 else
2000 child_task
2001 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
2002 PQ_TEAM, &team->task_queue, &unused);
2003 if (child_task->kind == GOMP_TASK_WAITING)
2004 {
2005 cancelled
2006 = gomp_task_run_pre (child_task, child_task->parent, team);
2007 if (__builtin_expect (cancelled, 0))
2008 {
2009 if (to_free)
2010 {
2011 gomp_finish_task (to_free);
2012 free (to_free);
2013 to_free = NULL;
2014 }
2015 goto finish_cancelled;
2016 }
2017 }
2018 else
2019 {
2020 child_task = NULL;
2021 do_wait:
2022 /* All tasks we are waiting for are either running in other
2023 threads, or they are tasks that have not had their
2024 dependencies met (so they're not even in the queue). Wait
2025 for them. */
2026 taskgroup->in_taskgroup_wait = true;
2027 }
2028 gomp_mutex_unlock (&team->task_lock);
2029 if (do_wake)
2030 {
2031 gomp_team_barrier_wake (&team->barrier, do_wake);
2032 do_wake = 0;
2033 }
2034 if (to_free)
2035 {
2036 gomp_finish_task (to_free);
2037 free (to_free);
2038 to_free = NULL;
2039 }
2040 if (child_task)
2041 {
2042 thr->task = child_task;
2043 if (__builtin_expect (child_task->fn == NULL, 0))
2044 {
2045 if (gomp_target_task_fn (child_task->fn_data))
2046 {
2047 thr->task = task;
2048 gomp_mutex_lock (&team->task_lock);
2049 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
2050 struct gomp_target_task *ttask
2051 = (struct gomp_target_task *) child_task->fn_data;
2052 /* If GOMP_PLUGIN_target_task_completion has run already
2053 in between gomp_target_task_fn and the mutex lock,
2054 perform the requeuing here. */
2055 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2056 gomp_target_task_completion (team, child_task);
2057 else
2058 ttask->state = GOMP_TARGET_TASK_RUNNING;
2059 child_task = NULL;
2060 continue;
2061 }
2062 }
2063 else
2064 child_task->fn (child_task->fn_data);
2065 thr->task = task;
2066 }
2067 else
2068 gomp_sem_wait (&taskgroup->taskgroup_sem);
2069 gomp_mutex_lock (&team->task_lock);
2070 if (child_task)
2071 {
2072 finish_cancelled:;
2073 size_t new_tasks
2074 = gomp_task_run_post_handle_depend (child_task, team);
2075 gomp_task_run_post_remove_parent (child_task);
2076 gomp_clear_parent (&child_task->children_queue);
2077 gomp_task_run_post_remove_taskgroup (child_task);
2078 to_free = child_task;
2079 child_task = NULL;
2080 team->task_count--;
2081 if (new_tasks > 1)
2082 {
2083 do_wake = team->nthreads - team->task_running_count
2084 - !task->in_tied_task;
2085 if (do_wake > new_tasks)
2086 do_wake = new_tasks;
2087 }
2088 }
2089 }
2090
2091 finish:
2092 task->taskgroup = taskgroup->prev;
2093 gomp_sem_destroy (&taskgroup->taskgroup_sem);
2094 free (taskgroup);
2095 }
2096
2097 static inline __attribute__((always_inline)) void
2098 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2099 unsigned nthreads)
2100 {
2101 size_t total_cnt = 0;
2102 uintptr_t *d = data;
2103 struct htab *old_htab = NULL, *new_htab;
2104 do
2105 {
2106 if (__builtin_expect (orig != NULL, 0))
2107 {
2108 /* For worksharing task reductions, memory has been allocated
2109 already by some other thread that encountered the construct
2110 earlier. */
2111 d[2] = orig[2];
2112 d[6] = orig[6];
2113 orig = (uintptr_t *) orig[4];
2114 }
2115 else
2116 {
2117 size_t sz = d[1] * nthreads;
2118 /* Should use omp_alloc if d[3] is not -1. */
2119 void *ptr = gomp_aligned_alloc (d[2], sz);
2120 memset (ptr, '\0', sz);
2121 d[2] = (uintptr_t) ptr;
2122 d[6] = d[2] + sz;
2123 }
2124 d[5] = 0;
2125 total_cnt += d[0];
2126 if (d[4] == 0)
2127 {
2128 d[4] = (uintptr_t) old;
2129 break;
2130 }
2131 else
2132 d = (uintptr_t *) d[4];
2133 }
2134 while (1);
2135 if (old && old[5])
2136 {
2137 old_htab = (struct htab *) old[5];
2138 total_cnt += htab_elements (old_htab);
2139 }
2140 new_htab = htab_create (total_cnt);
2141 if (old_htab)
2142 {
2143 /* Copy old hash table, like in htab_expand. */
2144 hash_entry_type *p, *olimit;
2145 new_htab->n_elements = htab_elements (old_htab);
2146 olimit = old_htab->entries + old_htab->size;
2147 p = old_htab->entries;
2148 do
2149 {
2150 hash_entry_type x = *p;
2151 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2152 *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2153 p++;
2154 }
2155 while (p < olimit);
2156 }
2157 d = data;
2158 do
2159 {
2160 size_t j;
2161 for (j = 0; j < d[0]; ++j)
2162 {
2163 uintptr_t *p = d + 7 + j * 3;
2164 p[2] = (uintptr_t) d;
2165 /* Ugly hack, hash_entry_type is defined for the task dependencies,
2166 which hash on the first element which is a pointer. We need
2167 to hash also on the first sizeof (uintptr_t) bytes which contain
2168 a pointer. Hide the cast from the compiler. */
2169 hash_entry_type n;
2170 __asm ("" : "=g" (n) : "0" (p));
2171 *htab_find_slot (&new_htab, n, INSERT) = n;
2172 }
2173 if (d[4] == (uintptr_t) old)
2174 break;
2175 else
2176 d = (uintptr_t *) d[4];
2177 }
2178 while (1);
2179 d[5] = (uintptr_t) new_htab;
2180 }
2181
2182 static void
2183 gomp_create_artificial_team (void)
2184 {
2185 struct gomp_thread *thr = gomp_thread ();
2186 struct gomp_task_icv *icv;
2187 struct gomp_team *team = gomp_new_team (1);
2188 struct gomp_task *task = thr->task;
2189 icv = task ? &task->icv : &gomp_global_icv;
2190 team->prev_ts = thr->ts;
2191 thr->ts.team = team;
2192 thr->ts.team_id = 0;
2193 thr->ts.work_share = &team->work_shares[0];
2194 thr->ts.last_work_share = NULL;
2195 #ifdef HAVE_SYNC_BUILTINS
2196 thr->ts.single_count = 0;
2197 #endif
2198 thr->ts.static_trip = 0;
2199 thr->task = &team->implicit_task[0];
2200 gomp_init_task (thr->task, NULL, icv);
2201 if (task)
2202 {
2203 thr->task = task;
2204 gomp_end_task ();
2205 free (task);
2206 thr->task = &team->implicit_task[0];
2207 }
2208 #ifdef LIBGOMP_USE_PTHREADS
2209 else
2210 pthread_setspecific (gomp_thread_destructor, thr);
2211 #endif
2212 }
2213
2214 /* The format of data is:
2215 data[0] cnt
2216 data[1] size
2217 data[2] alignment (on output array pointer)
2218 data[3] allocator (-1 if malloc allocator)
2219 data[4] next pointer
2220 data[5] used internally (htab pointer)
2221 data[6] used internally (end of array)
2222 cnt times
2223 ent[0] address
2224 ent[1] offset
2225 ent[2] used internally (pointer to data[0])
2226 The entries are sorted by increasing offset, so that a binary
2227 search can be performed. Normally, data[8] is 0, exception is
2228 for worksharing construct task reductions in cancellable parallel,
2229 where at offset 0 there should be space for a pointer and an integer
2230 which are used internally. */
2231
2232 void
2233 GOMP_taskgroup_reduction_register (uintptr_t *data)
2234 {
2235 struct gomp_thread *thr = gomp_thread ();
2236 struct gomp_team *team = thr->ts.team;
2237 struct gomp_task *task;
2238 unsigned nthreads;
2239 if (__builtin_expect (team == NULL, 0))
2240 {
2241 /* The task reduction code needs a team and task, so for
2242 orphaned taskgroups just create the implicit team. */
2243 gomp_create_artificial_team ();
2244 ialias_call (GOMP_taskgroup_start) ();
2245 team = thr->ts.team;
2246 }
2247 nthreads = team->nthreads;
2248 task = thr->task;
2249 gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2250 task->taskgroup->reductions = data;
2251 }
2252
2253 void
2254 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2255 {
2256 uintptr_t *d = data;
2257 htab_free ((struct htab *) data[5]);
2258 do
2259 {
2260 gomp_aligned_free ((void *) d[2]);
2261 d = (uintptr_t *) d[4];
2262 }
2263 while (d && !d[5]);
2264 }
2265 ialias (GOMP_taskgroup_reduction_unregister)
2266
2267 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2268 original list item or address of previously remapped original list
2269 item to address of the private copy, store that to ptrs[i].
2270 For i < cntorig, additionally set ptrs[cnt+i] to the address of
2271 the original list item. */
2272
2273 void
2274 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2275 {
2276 struct gomp_thread *thr = gomp_thread ();
2277 struct gomp_task *task = thr->task;
2278 unsigned id = thr->ts.team_id;
2279 uintptr_t *data = task->taskgroup->reductions;
2280 uintptr_t *d;
2281 struct htab *reduction_htab = (struct htab *) data[5];
2282 size_t i;
2283 for (i = 0; i < cnt; ++i)
2284 {
2285 hash_entry_type ent, n;
2286 __asm ("" : "=g" (ent) : "0" (ptrs + i));
2287 n = htab_find (reduction_htab, ent);
2288 if (n)
2289 {
2290 uintptr_t *p;
2291 __asm ("" : "=g" (p) : "0" (n));
2292 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2293 p[1] is the offset within the allocated chunk for each
2294 thread, p[2] is the array registered with
2295 GOMP_taskgroup_reduction_register, d[2] is the base of the
2296 allocated memory and d[1] is the size of the allocated chunk
2297 for one thread. */
2298 d = (uintptr_t *) p[2];
2299 ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2300 if (__builtin_expect (i < cntorig, 0))
2301 ptrs[cnt + i] = (void *) p[0];
2302 continue;
2303 }
2304 d = data;
2305 while (d != NULL)
2306 {
2307 if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2308 break;
2309 d = (uintptr_t *) d[4];
2310 }
2311 if (d == NULL)
2312 gomp_fatal ("couldn't find matching task_reduction or reduction with "
2313 "task modifier for %p", ptrs[i]);
2314 uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2315 ptrs[i] = (void *) (d[2] + id * d[1] + off);
2316 if (__builtin_expect (i < cntorig, 0))
2317 {
2318 size_t lo = 0, hi = d[0] - 1;
2319 while (lo <= hi)
2320 {
2321 size_t m = (lo + hi) / 2;
2322 if (d[7 + 3 * m + 1] < off)
2323 lo = m + 1;
2324 else if (d[7 + 3 * m + 1] == off)
2325 {
2326 ptrs[cnt + i] = (void *) d[7 + 3 * m];
2327 break;
2328 }
2329 else
2330 hi = m - 1;
2331 }
2332 if (lo > hi)
2333 gomp_fatal ("couldn't find matching task_reduction or reduction "
2334 "with task modifier for %p", ptrs[i]);
2335 }
2336 }
2337 }
2338
2339 struct gomp_taskgroup *
2340 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2341 {
2342 struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2343 gomp_reduction_register (data, NULL, NULL, nthreads);
2344 taskgroup->reductions = data;
2345 return taskgroup;
2346 }
2347
2348 void
2349 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2350 {
2351 struct gomp_thread *thr = gomp_thread ();
2352 struct gomp_team *team = thr->ts.team;
2353 struct gomp_task *task = thr->task;
2354 unsigned nthreads = team->nthreads;
2355 gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2356 task->taskgroup->reductions = data;
2357 }
2358
2359 void
2360 gomp_workshare_taskgroup_start (void)
2361 {
2362 struct gomp_thread *thr = gomp_thread ();
2363 struct gomp_team *team = thr->ts.team;
2364 struct gomp_task *task;
2365
2366 if (team == NULL)
2367 {
2368 gomp_create_artificial_team ();
2369 team = thr->ts.team;
2370 }
2371 task = thr->task;
2372 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2373 task->taskgroup->workshare = true;
2374 }
2375
2376 void
2377 GOMP_workshare_task_reduction_unregister (bool cancelled)
2378 {
2379 struct gomp_thread *thr = gomp_thread ();
2380 struct gomp_task *task = thr->task;
2381 struct gomp_team *team = thr->ts.team;
2382 uintptr_t *data = task->taskgroup->reductions;
2383 ialias_call (GOMP_taskgroup_end) ();
2384 if (thr->ts.team_id == 0)
2385 ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2386 else
2387 htab_free ((struct htab *) data[5]);
2388
2389 if (!cancelled)
2390 gomp_team_barrier_wait (&team->barrier);
2391 }
2392
2393 int
2394 omp_in_final (void)
2395 {
2396 struct gomp_thread *thr = gomp_thread ();
2397 return thr->task && thr->task->final_task;
2398 }
2399
2400 ialias (omp_in_final)
2401
2402 void
2403 omp_fulfill_event (omp_event_handle_t event)
2404 {
2405 gomp_sem_t *sem = (gomp_sem_t *) event;
2406 struct gomp_thread *thr = gomp_thread ();
2407 struct gomp_team *team = thr ? thr->ts.team : NULL;
2408
2409 if (gomp_sem_getcount (sem) > 0)
2410 gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", sem);
2411
2412 gomp_debug (0, "omp_fulfill_event: %p\n", sem);
2413 gomp_sem_post (sem);
2414 if (team)
2415 gomp_team_barrier_wake (&team->barrier, 1);
2416 }
2417
2418 ialias (omp_fulfill_event)