5957 lines
174 KiB
Diff
5957 lines
174 KiB
Diff
2014-08-04 Jakub Jelinek <jakub@redhat.com>
|
||
|
||
* task.c (GOMP_taskgroup_end): If taskgroup->num_children
|
||
is not zero, but taskgroup->children is NULL and there are
|
||
any task->children, schedule those instead of waiting.
|
||
|
||
2014-08-01 Jakub Jelinek <jakub@redhat.com>
|
||
|
||
* libgomp.h (struct gomp_task_depend_entry): Add redundant_out field.
|
||
(struct gomp_taskwait): New type.
|
||
(struct gomp_task): Add taskwait and parent_depends_on, remove
|
||
in_taskwait and taskwait_sem fields.
|
||
(gomp_finish_task): Don't destroy taskwait_sem.
|
||
* task.c (gomp_init_task): Don't init in_taskwait, instead init
|
||
taskwait and parent_depends_on.
|
||
(GOMP_task): For if (0) tasks with depend clause that depend on
|
||
earlier tasks don't defer them, instead call
|
||
gomp_task_maybe_wait_for_dependencies to wait for the dependencies.
|
||
Initialize redundant_out field, for redundant out entries just
|
||
move them at the end of linked list instead of removing them
|
||
completely, and set redundant_out flag instead of redundant.
|
||
(gomp_task_run_pre): Update last_parent_depends_on if scheduling
|
||
that task.
|
||
(gomp_task_run_post_handle_dependers): If parent is in
|
||
gomp_task_maybe_wait_for_dependencies and newly runnable task
|
||
is not parent_depends_on, queue it in parent->children linked
|
||
list after all runnable tasks with parent_depends_on set.
|
||
Adjust for addition of taskwait indirection.
|
||
(gomp_task_run_post_remove_parent): If parent is in
|
||
gomp_task_maybe_wait_for_dependencies and task to be removed
|
||
is parent_depends_on, decrement n_depend and if needed awake
|
||
parent. Adjust for addition of taskwait indirection.
|
||
(GOMP_taskwait): Adjust for addition of taskwait indirection.
|
||
(gomp_task_maybe_wait_for_dependencies): New function.
|
||
|
||
2013-10-14 Jakub Jelinek <jakub@redhat.com>
|
||
|
||
* env.c (parse_bind_var): Initialize value to avoid
|
||
(false positive) warning.
|
||
|
||
2013-10-12 Jakub Jelinek <jakub@redhat.com>
|
||
|
||
PR libgomp/58691
|
||
* config/linux/proc.c (gomp_cpuset_popcount): Add unused attribute
|
||
to check variable.
|
||
(gomp_init_num_threads): Move i variable declaration into
|
||
#ifdef CPU_ALLOC_SIZE block.
|
||
* config/linux/affinity.c (gomp_affinity_init_level): Test
|
||
gomp_places_list_len == 0 rather than gomp_places_list == 0
|
||
when checking for topology reading error.
|
||
* team.c (gomp_team_start): Don't handle bind == omp_proc_bind_false.
|
||
* env.c (parse_affinity): Add ignore argument, if true, don't populate
|
||
gomp_places_list, only parse env var and always return false.
|
||
(parse_places_var): Likewise. Don't check gomp_global_icv.bind_var.
|
||
(initialize_env): Always parse OMP_PLACES and GOMP_CPU_AFFINITY env
|
||
vars, default to OMP_PROC_BIND=true if OMP_PROC_BIND wasn't specified
|
||
and either of these variables were parsed correctly into a places
|
||
list.
|
||
|
||
2013-10-11 Thomas Schwinge <thomas@codesourcery.com>
|
||
|
||
* testsuite/libgomp.c/lib-1.c (main): Add missing error check.
|
||
* testsuite/libgomp.fortran/lib1.f90: Likewise.
|
||
* testsuite/libgomp.fortran/lib2.f: Likewise.
|
||
* testsuite/libgomp.fortran/lib3.f: Likewise.
|
||
|
||
2013-10-11 Jakub Jelinek <jakub@redhat.com>
|
||
Tobias Burnus <burnus@net-b.de>
|
||
Richard Henderson <rth@redhat.com>
|
||
|
||
* target.c: New file.
|
||
* Makefile.am (libgomp_la_SOURCES): Add target.c.
|
||
* Makefile.in: Regenerated.
|
||
* libgomp_g.h (GOMP_task): Add depend argument.
|
||
(GOMP_barrier_cancel, GOMP_loop_end_cancel,
|
||
GOMP_sections_end_cancel, GOMP_target, GOMP_target_data,
|
||
GOMP_target_end_data, GOMP_target_update, GOMP_teams,
|
||
GOMP_parallel_loop_static, GOMP_parallel_loop_dynamic,
|
||
GOMP_parallel_loop_guided, GOMP_parallel_loop_runtime,
|
||
GOMP_parallel, GOMP_cancel, GOMP_cancellation_point,
|
||
GOMP_taskgroup_start, GOMP_taskgroup_end,
|
||
GOMP_parallel_sections): New prototypes.
|
||
* fortran.c (omp_is_initial_device): Add ialias_redirect.
|
||
(omp_is_initial_device_): New function.
|
||
(ULP, STR1, STR2, ialias_redirect): Removed.
|
||
(omp_get_cancellation_, omp_get_proc_bind_, omp_set_default_device_,
|
||
omp_set_default_device_8_, omp_get_default_device_,
|
||
omp_get_num_devices_, omp_get_num_teams_, omp_get_team_num_): New
|
||
functions.
|
||
* libgomp.map (GOMP_barrier_cancel, GOMP_loop_end_cancel,
|
||
GOMP_sections_end_cancel, GOMP_target, GOMP_target_data,
|
||
GOMP_target_end_data, GOMP_target_update, GOMP_teams): Export
|
||
@@GOMP_4.0.
|
||
(omp_is_initial_device, omp_is_initial_device_, omp_get_cancellation,
|
||
omp_get_cancellation_, omp_get_proc_bind, omp_get_proc_bind_,
|
||
omp_set_default_device, omp_set_default_device_,
|
||
omp_set_default_device_8_, omp_get_default_device,
|
||
omp_get_default_device_, omp_get_num_devices, omp_get_num_devices_,
|
||
omp_get_num_teams, omp_get_num_teams_, omp_get_team_num,
|
||
omp_get_team_num_): Export @@OMP_4.0.
|
||
* team.c (struct gomp_thread_start_data): Add place field.
|
||
(gomp_thread_start): Clear thr->thread_pool and
|
||
thr->task before returning. Use gomp_team_barrier_wait_final
|
||
instead of gomp_team_barrier_wait. Initialize thr->place.
|
||
(gomp_new_team): Initialize work_shares_to_free, work_share_cancelled,
|
||
team_cancelled and task_queued_count fields.
|
||
(gomp_free_pool_helper): Clear thr->thread_pool and thr->task
|
||
before calling pthread_exit.
|
||
(gomp_free_thread): No longer static. Use
|
||
gomp_managed_threads_lock instead of gomp_remaining_threads_lock.
|
||
(gomp_team_start): Add flags argument. Set
|
||
thr->thread_pool->threads_busy to nthreads immediately after creating
|
||
new pool. Use gomp_managed_threads_lock instead of
|
||
gomp_remaining_threads_lock. Handle OpenMP 4.0 affinity.
|
||
(gomp_team_end): Use gomp_managed_threads_lock instead of
|
||
gomp_remaining_threads_lock. Use gomp_team_barrier_wait_final instead
|
||
of gomp_team_barrier_wait. If team->team_cancelled, call
|
||
gomp_fini_worshare on ws chain starting at team->work_shares_to_free
|
||
rather than thr->ts.work_share.
|
||
(initialize_team): Don't call gomp_sem_init here.
|
||
* sections.c (GOMP_parallel_sections_start): Adjust gomp_team_start
|
||
caller.
|
||
(GOMP_parallel_sections, GOMP_sections_end_cancel): New functions.
|
||
* env.c (gomp_global_icv): Add default_device_var, target_data and
|
||
bind_var initializers.
|
||
(gomp_cpu_affinity, gomp_cpu_affinity_len): Remove.
|
||
(gomp_bind_var_list, gomp_bind_var_list_len, gomp_places_list,
|
||
gomp_places_list_len): New variables.
|
||
(parse_bind_var, parse_one_place, parse_places_var): New functions.
|
||
(parse_affinity): Rewritten to construct OMP_PLACES list with unit
|
||
sized places.
|
||
(gomp_cancel_var): New global variable.
|
||
(parse_int): New function.
|
||
(handle_omp_display_env): New function.
|
||
(initialize_env): Use it. Initialize default_device_var.
|
||
Parse OMP_CANCELLATION env var. Use parse_bind_var to parse
|
||
OMP_PROC_BIND instead of parse_boolean. Use parse_places_var for
|
||
OMP_PLACES parsing. Don't call parse_affinity if OMP_PLACES has
|
||
been successfully parsed (and call gomp_init_affinity in that case).
|
||
(omp_get_cancellation, omp_get_proc_bind, omp_set_default_device,
|
||
omp_get_default_device, omp_get_num_devices, omp_get_num_teams,
|
||
omp_get_team_num, omp_is_initial_device): New functions.
|
||
* libgomp.h: Include stdlib.h.
|
||
(ialias_ulp, ialias_str1, ialias_str2, ialias_redirect, ialias_call):
|
||
Define.
|
||
(struct target_mem_desc): Forward declare.
|
||
(struct gomp_task_icv): Add default_device_var, target_data, bind_var
|
||
and thread_limit_var fields.
|
||
(gomp_get_num_devices): New prototype.
|
||
(gomp_cancel_var): New extern decl.
|
||
(struct gomp_team): Add work_shares_to_free, work_share_cancelled,
|
||
team_cancelled and task_queued_count fields. Add comments about
|
||
task_{,queued_,running_}count.
|
||
(gomp_cancel_kind): New enum.
|
||
(gomp_work_share_end_cancel): New prototype.
|
||
(struct gomp_task): Add next_taskgroup, prev_taskgroup, taskgroup,
|
||
copy_ctors_done, dependers, depend_hash, depend_count, num_dependees
|
||
and depend fields.
|
||
(struct gomp_taskgroup): New type.
|
||
(struct gomp_task_depend_entry,
|
||
struct gomp_dependers_vec): New types.
|
||
(gomp_finish_task): Free depend_hash if non-NULL.
|
||
(struct gomp_team_state): Add place_partition_off
|
||
and place_partition_len fields.
|
||
(gomp_bind_var_list, gomp_bind_var_list_len, gomp_places_list,
|
||
gomp_places_list_len): New extern decls.
|
||
(struct gomp_thread): Add place field.
|
||
(gomp_cpu_affinity, gomp_cpu_affinity_len): Remove.
|
||
(gomp_init_thread_affinity): Add place argument.
|
||
(gomp_affinity_alloc, gomp_affinity_init_place, gomp_affinity_add_cpus,
|
||
gomp_affinity_remove_cpu, gomp_affinity_copy_place,
|
||
gomp_affinity_same_place, gomp_affinity_finalize_place_list,
|
||
gomp_affinity_init_level, gomp_affinity_print_place): New
|
||
prototypes.
|
||
(gomp_team_start): Add flags argument.
|
||
(gomp_thread_limit_var, gomp_remaining_threads_count,
|
||
gomp_remaining_threads_lock): Remove.
|
||
(gomp_managed_threads_lock): New variable.
|
||
(struct gomp_thread_pool): Add threads_busy field.
|
||
(gomp_free_thread): New prototype.
|
||
* task.c: Include hashtab.h.
|
||
(hash_entry_type): New typedef.
|
||
(htab_alloc, htab_free, htab_hash, htab_eq): New inlines.
|
||
(gomp_init_task): Clear dependers, depend_hash, depend_count,
|
||
copy_ctors_done and taskgroup fields.
|
||
(GOMP_task): Add depend argument, handle depend clauses. If
|
||
gomp_team_barrier_cancelled or if it's taskgroup has been
|
||
cancelled, don't queue or start new tasks. Set copy_ctors_done
|
||
field if needed. Initialize taskgroup field. If copy_ctors_done
|
||
and already cancelled, don't discard the task. If taskgroup is
|
||
non-NULL, enqueue the task into taskgroup queue. Increment
|
||
num_children field in taskgroup. Increment task_queued_count.
|
||
(gomp_task_run_pre, gomp_task_run_post_remove_parent,
|
||
gomp_task_run_post_remove_taskgroup): New inline functions.
|
||
(gomp_task_run_post_handle_depend_hash,
|
||
gomp_task_run_post_handle_dependers,
|
||
gomp_task_run_post_handle_depend): New functions.
|
||
(GOMP_taskwait): Use them. If more than one new tasks
|
||
have been queued, wake other threads if needed.
|
||
(gomp_barrier_handle_tasks): Likewise. If
|
||
gomp_team_barrier_cancelled, don't start any new tasks, just free
|
||
all tasks.
|
||
(GOMP_taskgroup_start, GOMP_taskgroup_end): New functions.
|
||
* loop.c (gomp_parallel_loop_start): Add flags argument, pass it
|
||
through to gomp_team_start.
|
||
(GOMP_parallel_loop_static_start, GOMP_parallel_loop_dynamic_start,
|
||
GOMP_parallel_loop_guided_start, GOMP_parallel_loop_runtime_start):
|
||
Adjust gomp_parallel_loop_start callers.
|
||
(GOMP_parallel_loop_static, GOMP_parallel_loop_dynamic,
|
||
GOMP_parallel_loop_guided, GOMP_parallel_loop_runtime,
|
||
GOMP_loop_end_cancel): New functions.
|
||
(GOMP_parallel_end): Add ialias_redirect.
|
||
* hashtab.h: New file.
|
||
* work.c (gomp_work_share_end, gomp_work_share_end_nowait): Set
|
||
team->work_shares_to_free to thr->ts.work_share before calling
|
||
free_work_share.
|
||
(gomp_work_share_end_cancel): New function.
|
||
* config/linux/proc.c: Include errno.h.
|
||
(gomp_get_cpuset_size, gomp_cpuset_size, gomp_cpusetp): New variables.
|
||
(gomp_cpuset_popcount): Add cpusetsize argument, use it instead of
|
||
sizeof (cpu_set_t) to determine number of iterations. Fix up check
|
||
extern decl. Use CPU_COUNT_S if available, or CPU_COUNT if
|
||
gomp_cpuset_size is sizeof (cpu_set_t).
|
||
(gomp_init_num_threads): Initialize gomp_cpuset_size,
|
||
gomp_get_cpuset_size and gomp_cpusetp here, use gomp_cpusetp instead
|
||
of &cpuset and pass gomp_cpuset_size instead of sizeof (cpu_set_t)
|
||
to pthread_getaffinity_np. Free and clear gomp_cpusetp if it didn't
|
||
contain any logical CPUs.
|
||
(get_num_procs): Don't call pthread_getaffinity_np if gomp_cpusetp
|
||
is NULL. Use gomp_cpusetp instead of &cpuset and pass
|
||
gomp_get_cpuset_size instead of sizeof (cpu_set_t) to
|
||
pthread_getaffinity_np. Check gomp_places_list instead of
|
||
gomp_cpu_affinity. Adjust gomp_cpuset_popcount caller.
|
||
* config/linux/bar.c (gomp_barrier_wait_end,
|
||
gomp_barrier_wait_last): Use BAR_* defines.
|
||
(gomp_team_barrier_wait_end): Likewise. Clear BAR_CANCELLED
|
||
from state where needed. Set work_share_cancelled to 0 on last
|
||
thread.
|
||
(gomp_team_barrier_wait_final, gomp_team_barrier_wait_cancel_end,
|
||
gomp_team_barrier_wait_cancel, gomp_team_barrier_cancel): New
|
||
functions.
|
||
* config/linux/proc.h (gomp_cpuset_popcount): Add attribute_hidden.
|
||
Add cpusetsize argument.
|
||
(gomp_cpuset_size, gomp_cpusetp): Declare.
|
||
* config/linux/affinity.c: Include errno.h, stdio.h and string.h.
|
||
(affinity_counter): Remove.
|
||
(CPU_ISSET_S, CPU_ZERO_S, CPU_SET_S, CPU_CLR_S): Define
|
||
if CPU_ALLOC_SIZE isn't defined.
|
||
(gomp_init_affinity): Rewritten, if gomp_places_list is NULL, try
|
||
silently create OMP_PLACES=threads, if it is non-NULL afterwards,
|
||
bind current thread to the first place.
|
||
(gomp_init_thread_affinity): Rewritten. Add place argument, just
|
||
pthread_setaffinity_np to gomp_places_list[place].
|
||
(gomp_affinity_alloc, gomp_affinity_init_place, gomp_affinity_add_cpus,
|
||
gomp_affinity_remove_cpu, gomp_affinity_copy_place,
|
||
gomp_affinity_same_place, gomp_affinity_finalize_place_list,
|
||
gomp_affinity_init_level, gomp_affinity_print_place): New functions.
|
||
* config/linux/bar.h (BAR_TASK_PENDING, BAR_WAS_LAST,
|
||
BAR_WAITING_FOR_TASK, BAR_INCR, BAR_CANCELLED): Define.
|
||
(gomp_barrier_t): Add awaited_final field.
|
||
(gomp_barrier_init): Initialize awaited_final field.
|
||
(gomp_team_barrier_wait_final, gomp_team_barrier_wait_cancel,
|
||
gomp_team_barrier_wait_cancel_end, gomp_team_barrier_cancel): New
|
||
prototypes.
|
||
(gomp_barrier_wait_start): Preserve BAR_CANCELLED bit. Use BAR_*
|
||
defines.
|
||
(gomp_barrier_wait_cancel_start, gomp_team_barrier_wait_final_start,
|
||
gomp_team_barrier_cancelled): New inline functions.
|
||
(gomp_barrier_last_thread,
|
||
gomp_team_barrier_set_task_pending,
|
||
gomp_team_barrier_clear_task_pending,
|
||
gomp_team_barrier_set_waiting_for_tasks,
|
||
gomp_team_barrier_waiting_for_tasks,
|
||
gomp_team_barrier_done): Use BAR_* defines.
|
||
* config/posix/bar.c (gomp_barrier_init): Clear cancellable field.
|
||
(gomp_barrier_wait_end): Use BAR_* defines.
|
||
(gomp_team_barrier_wait_end): Clear BAR_CANCELLED from state.
|
||
Set work_share_cancelled to 0 on last thread, use __atomic_load_n.
|
||
Use BAR_* defines.
|
||
(gomp_team_barrier_wait_cancel_end, gomp_team_barrier_wait_cancel,
|
||
gomp_team_barrier_cancel): New functions.
|
||
* config/posix/affinity.c (gomp_init_thread_affinity): Add place
|
||
argument.
|
||
(gomp_affinity_alloc, gomp_affinity_init_place, gomp_affinity_add_cpus,
|
||
gomp_affinity_remove_cpu, gomp_affinity_copy_place,
|
||
gomp_affinity_same_place, gomp_affinity_finalize_place_list,
|
||
gomp_affinity_init_level, gomp_affinity_print_place): New stubs.
|
||
* config/posix/bar.h (BAR_TASK_PENDING, BAR_WAS_LAST,
|
||
BAR_WAITING_FOR_TASK, BAR_INCR, BAR_CANCELLED): Define.
|
||
(gomp_barrier_t): Add cancellable field.
|
||
(gomp_team_barrier_wait_cancel, gomp_team_barrier_wait_cancel_end,
|
||
gomp_team_barrier_cancel): New prototypes.
|
||
(gomp_barrier_wait_start): Preserve BAR_CANCELLED bit.
|
||
(gomp_barrier_wait_cancel_start, gomp_team_barrier_wait_final,
|
||
gomp_team_barrier_cancelled): New inline functions.
|
||
(gomp_barrier_wait_start, gomp_barrier_last_thread,
|
||
gomp_team_barrier_set_task_pending,
|
||
gomp_team_barrier_clear_task_pending,
|
||
gomp_team_barrier_set_waiting_for_tasks,
|
||
gomp_team_barrier_waiting_for_tasks,
|
||
gomp_team_barrier_done): Use BAR_* defines.
|
||
* barrier.c (GOMP_barrier_cancel): New function.
|
||
* parallel.c (GOMP_parallel, GOMP_cancel, GOMP_cancellation_point):
|
||
New functions.
|
||
(gomp_resolve_num_threads): Adjust for thread_limit now being in
|
||
icv->thread_limit_var. Use UINT_MAX instead of ULONG_MAX as
|
||
infinity. If not nested, just return minimum of max_num_threads
|
||
and icv->thread_limit_var and if thr->thread_pool, set threads_busy
|
||
to the returned value. Otherwise, don't update atomically
|
||
gomp_remaining_threads_count, but instead thr->thread_pool->threads_busy.
|
||
(GOMP_parallel_end): Adjust for thread_limit now being in
|
||
icv->thread_limit_var. Use UINT_MAX instead of ULONG_MAX as
|
||
infinity. Adjust threads_busy in the pool rather than
|
||
gomp_remaining_threads_count. Remember team->nthreads and call
|
||
gomp_team_end before adjusting threads_busy, if not nested
|
||
afterwards, just set it to 1 non-atomically. Add ialias.
|
||
(GOMP_parallel_start): Adjust gomp_team_start caller.
|
||
* testsuite/libgomp.c/atomic-14.c: Add parens to make it valid.
|
||
|
||
2012-11-21 Jakub Jelinek <jakub@redhat.com>
|
||
|
||
PR libgomp/55411
|
||
* team.c (gomp_free_thread): Decrease gomp_managed_threads
|
||
if pool had any threads_used.
|
||
|
||
2011-11-30 Alan Modra <amodra@gmail.com>
|
||
|
||
PR libgomp/51298
|
||
* task.c (gomp_barrier_handle_tasks): Regain lock so as to not
|
||
double unlock.
|
||
|
||
--- libgomp/hashtab.h.jj 2014-05-15 13:12:53.904857203 +0200
|
||
+++ libgomp/hashtab.h 2014-05-15 13:12:53.904857203 +0200
|
||
@@ -0,0 +1,443 @@
|
||
+/* An expandable hash tables datatype.
|
||
+ Copyright (C) 1999-2013
|
||
+ Free Software Foundation, Inc.
|
||
+ Contributed by Vladimir Makarov <vmakarov@cygnus.com>.
|
||
+
|
||
+This program is free software; you can redistribute it and/or modify
|
||
+it under the terms of the GNU General Public License as published by
|
||
+the Free Software Foundation; either version 2 of the License, or
|
||
+(at your option) any later version.
|
||
+
|
||
+This program is distributed in the hope that it will be useful,
|
||
+but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+GNU General Public License for more details.
|
||
+
|
||
+You should have received a copy of the GNU General Public License
|
||
+along with this program; if not, write to the Free Software
|
||
+Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */
|
||
+
|
||
+/* The hash table code copied from include/hashtab.[hc] and adjusted,
|
||
+ so that the hash table entries are in the flexible array at the end
|
||
+ of the control structure, no callbacks are used and the elements in the
|
||
+ table are of the hash_entry_type type.
|
||
+ Before including this file, define hash_entry_type type and
|
||
+ htab_alloc and htab_free functions. After including it, define
|
||
+ htab_hash and htab_eq inline functions. */
|
||
+
|
||
+/* This package implements basic hash table functionality. It is possible
|
||
+ to search for an entry, create an entry and destroy an entry.
|
||
+
|
||
+ Elements in the table are generic pointers.
|
||
+
|
||
+ The size of the table is not fixed; if the occupancy of the table
|
||
+ grows too high the hash table will be expanded.
|
||
+
|
||
+ The abstract data implementation is based on generalized Algorithm D
|
||
+ from Knuth's book "The art of computer programming". Hash table is
|
||
+ expanded by creation of new hash table and transferring elements from
|
||
+ the old table to the new table. */
|
||
+
|
||
+/* The type for a hash code. */
|
||
+typedef unsigned int hashval_t;
|
||
+
|
||
+static inline hashval_t htab_hash (hash_entry_type);
|
||
+static inline bool htab_eq (hash_entry_type, hash_entry_type);
|
||
+
|
||
+/* This macro defines reserved value for empty table entry. */
|
||
+
|
||
+#define HTAB_EMPTY_ENTRY ((hash_entry_type) 0)
|
||
+
|
||
+/* This macro defines reserved value for table entry which contained
|
||
+ a deleted element. */
|
||
+
|
||
+#define HTAB_DELETED_ENTRY ((hash_entry_type) 1)
|
||
+
|
||
+/* Hash tables are of the following type. The structure
|
||
+ (implementation) of this type is not needed for using the hash
|
||
+ tables. All work with hash table should be executed only through
|
||
+ functions mentioned below. The size of this structure is subject to
|
||
+ change. */
|
||
+
|
||
+struct htab {
|
||
+ /* Current size (in entries) of the hash table. */
|
||
+ size_t size;
|
||
+
|
||
+ /* Current number of elements including also deleted elements. */
|
||
+ size_t n_elements;
|
||
+
|
||
+ /* Current number of deleted elements in the table. */
|
||
+ size_t n_deleted;
|
||
+
|
||
+ /* Current size (in entries) of the hash table, as an index into the
|
||
+ table of primes. */
|
||
+ unsigned int size_prime_index;
|
||
+
|
||
+ /* Table itself. */
|
||
+ hash_entry_type entries[];
|
||
+};
|
||
+
|
||
+typedef struct htab *htab_t;
|
||
+
|
||
+/* An enum saying whether we insert into the hash table or not. */
|
||
+enum insert_option {NO_INSERT, INSERT};
|
||
+
|
||
+/* Table of primes and multiplicative inverses.
|
||
+
|
||
+ Note that these are not minimally reduced inverses. Unlike when generating
|
||
+ code to divide by a constant, we want to be able to use the same algorithm
|
||
+ all the time. All of these inverses (are implied to) have bit 32 set.
|
||
+
|
||
+ For the record, the function that computed the table is in
|
||
+ libiberty/hashtab.c. */
|
||
+
|
||
+struct prime_ent
|
||
+{
|
||
+ hashval_t prime;
|
||
+ hashval_t inv;
|
||
+ hashval_t inv_m2; /* inverse of prime-2 */
|
||
+ hashval_t shift;
|
||
+};
|
||
+
|
||
+static struct prime_ent const prime_tab[] = {
|
||
+ { 7, 0x24924925, 0x9999999b, 2 },
|
||
+ { 13, 0x3b13b13c, 0x745d1747, 3 },
|
||
+ { 31, 0x08421085, 0x1a7b9612, 4 },
|
||
+ { 61, 0x0c9714fc, 0x15b1e5f8, 5 },
|
||
+ { 127, 0x02040811, 0x0624dd30, 6 },
|
||
+ { 251, 0x05197f7e, 0x073260a5, 7 },
|
||
+ { 509, 0x01824366, 0x02864fc8, 8 },
|
||
+ { 1021, 0x00c0906d, 0x014191f7, 9 },
|
||
+ { 2039, 0x0121456f, 0x0161e69e, 10 },
|
||
+ { 4093, 0x00300902, 0x00501908, 11 },
|
||
+ { 8191, 0x00080041, 0x00180241, 12 },
|
||
+ { 16381, 0x000c0091, 0x00140191, 13 },
|
||
+ { 32749, 0x002605a5, 0x002a06e6, 14 },
|
||
+ { 65521, 0x000f00e2, 0x00110122, 15 },
|
||
+ { 131071, 0x00008001, 0x00018003, 16 },
|
||
+ { 262139, 0x00014002, 0x0001c004, 17 },
|
||
+ { 524287, 0x00002001, 0x00006001, 18 },
|
||
+ { 1048573, 0x00003001, 0x00005001, 19 },
|
||
+ { 2097143, 0x00004801, 0x00005801, 20 },
|
||
+ { 4194301, 0x00000c01, 0x00001401, 21 },
|
||
+ { 8388593, 0x00001e01, 0x00002201, 22 },
|
||
+ { 16777213, 0x00000301, 0x00000501, 23 },
|
||
+ { 33554393, 0x00001381, 0x00001481, 24 },
|
||
+ { 67108859, 0x00000141, 0x000001c1, 25 },
|
||
+ { 134217689, 0x000004e1, 0x00000521, 26 },
|
||
+ { 268435399, 0x00000391, 0x000003b1, 27 },
|
||
+ { 536870909, 0x00000019, 0x00000029, 28 },
|
||
+ { 1073741789, 0x0000008d, 0x00000095, 29 },
|
||
+ { 2147483647, 0x00000003, 0x00000007, 30 },
|
||
+ /* Avoid "decimal constant so large it is unsigned" for 4294967291. */
|
||
+ { 0xfffffffb, 0x00000006, 0x00000008, 31 }
|
||
+};
|
||
+
|
||
+/* The following function returns an index into the above table of the
|
||
+ nearest prime number which is greater than N, and near a power of two. */
|
||
+
|
||
+static unsigned int
|
||
+higher_prime_index (unsigned long n)
|
||
+{
|
||
+ unsigned int low = 0;
|
||
+ unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]);
|
||
+
|
||
+ while (low != high)
|
||
+ {
|
||
+ unsigned int mid = low + (high - low) / 2;
|
||
+ if (n > prime_tab[mid].prime)
|
||
+ low = mid + 1;
|
||
+ else
|
||
+ high = mid;
|
||
+ }
|
||
+
|
||
+ /* If we've run out of primes, abort. */
|
||
+ if (n > prime_tab[low].prime)
|
||
+ abort ();
|
||
+
|
||
+ return low;
|
||
+}
|
||
+
|
||
+/* Return the current size of given hash table. */
|
||
+
|
||
+static inline size_t
|
||
+htab_size (htab_t htab)
|
||
+{
|
||
+ return htab->size;
|
||
+}
|
||
+
|
||
+/* Return the current number of elements in given hash table. */
|
||
+
|
||
+static inline size_t
|
||
+htab_elements (htab_t htab)
|
||
+{
|
||
+ return htab->n_elements - htab->n_deleted;
|
||
+}
|
||
+
|
||
+/* Return X % Y. */
|
||
+
|
||
+static inline hashval_t
|
||
+htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift)
|
||
+{
|
||
+ /* The multiplicative inverses computed above are for 32-bit types, and
|
||
+ requires that we be able to compute a highpart multiply. */
|
||
+ if (sizeof (hashval_t) * __CHAR_BIT__ <= 32)
|
||
+ {
|
||
+ hashval_t t1, t2, t3, t4, q, r;
|
||
+
|
||
+ t1 = ((unsigned long long)x * inv) >> 32;
|
||
+ t2 = x - t1;
|
||
+ t3 = t2 >> 1;
|
||
+ t4 = t1 + t3;
|
||
+ q = t4 >> shift;
|
||
+ r = x - (q * y);
|
||
+
|
||
+ return r;
|
||
+ }
|
||
+
|
||
+ /* Otherwise just use the native division routines. */
|
||
+ return x % y;
|
||
+}
|
||
+
|
||
+/* Compute the primary hash for HASH given HTAB's current size. */
|
||
+
|
||
+static inline hashval_t
|
||
+htab_mod (hashval_t hash, htab_t htab)
|
||
+{
|
||
+ const struct prime_ent *p = &prime_tab[htab->size_prime_index];
|
||
+ return htab_mod_1 (hash, p->prime, p->inv, p->shift);
|
||
+}
|
||
+
|
||
+/* Compute the secondary hash for HASH given HTAB's current size. */
|
||
+
|
||
+static inline hashval_t
|
||
+htab_mod_m2 (hashval_t hash, htab_t htab)
|
||
+{
|
||
+ const struct prime_ent *p = &prime_tab[htab->size_prime_index];
|
||
+ return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift);
|
||
+}
|
||
+
|
||
+/* Create hash table of size SIZE. */
|
||
+
|
||
+static htab_t
|
||
+htab_create (size_t size)
|
||
+{
|
||
+ htab_t result;
|
||
+ unsigned int size_prime_index;
|
||
+
|
||
+ size_prime_index = higher_prime_index (size);
|
||
+ size = prime_tab[size_prime_index].prime;
|
||
+
|
||
+ result = (htab_t) htab_alloc (sizeof (struct htab)
|
||
+ + size * sizeof (hash_entry_type));
|
||
+ result->size = size;
|
||
+ result->n_elements = 0;
|
||
+ result->n_deleted = 0;
|
||
+ result->size_prime_index = size_prime_index;
|
||
+ memset (result->entries, 0, size * sizeof (hash_entry_type));
|
||
+ return result;
|
||
+}
|
||
+
|
||
+/* Similar to htab_find_slot, but without several unwanted side effects:
|
||
+ - Does not call htab_eq when it finds an existing entry.
|
||
+ - Does not change the count of elements in the hash table.
|
||
+ This function also assumes there are no deleted entries in the table.
|
||
+ HASH is the hash value for the element to be inserted. */
|
||
+
|
||
+static hash_entry_type *
|
||
+find_empty_slot_for_expand (htab_t htab, hashval_t hash)
|
||
+{
|
||
+ hashval_t index = htab_mod (hash, htab);
|
||
+ size_t size = htab_size (htab);
|
||
+ hash_entry_type *slot = htab->entries + index;
|
||
+ hashval_t hash2;
|
||
+
|
||
+ if (*slot == HTAB_EMPTY_ENTRY)
|
||
+ return slot;
|
||
+ else if (*slot == HTAB_DELETED_ENTRY)
|
||
+ abort ();
|
||
+
|
||
+ hash2 = htab_mod_m2 (hash, htab);
|
||
+ for (;;)
|
||
+ {
|
||
+ index += hash2;
|
||
+ if (index >= size)
|
||
+ index -= size;
|
||
+
|
||
+ slot = htab->entries + index;
|
||
+ if (*slot == HTAB_EMPTY_ENTRY)
|
||
+ return slot;
|
||
+ else if (*slot == HTAB_DELETED_ENTRY)
|
||
+ abort ();
|
||
+ }
|
||
+}
|
||
+
|
||
+/* The following function changes size of memory allocated for the
|
||
+ entries and repeatedly inserts the table elements. The occupancy
|
||
+ of the table after the call will be about 50%. Naturally the hash
|
||
+ table must already exist. Remember also that the place of the
|
||
+ table entries is changed. */
|
||
+
|
||
+static htab_t
|
||
+htab_expand (htab_t htab)
|
||
+{
|
||
+ htab_t nhtab;
|
||
+ hash_entry_type *olimit;
|
||
+ hash_entry_type *p;
|
||
+ size_t osize, elts;
|
||
+
|
||
+ osize = htab->size;
|
||
+ olimit = htab->entries + osize;
|
||
+ elts = htab_elements (htab);
|
||
+
|
||
+ /* Resize only when table after removal of unused elements is either
|
||
+ too full or too empty. */
|
||
+ if (elts * 2 > osize || (elts * 8 < osize && osize > 32))
|
||
+ nhtab = htab_create (elts * 2);
|
||
+ else
|
||
+ nhtab = htab_create (osize - 1);
|
||
+ nhtab->n_elements = htab->n_elements - htab->n_deleted;
|
||
+
|
||
+ p = htab->entries;
|
||
+ do
|
||
+ {
|
||
+ hash_entry_type x = *p;
|
||
+
|
||
+ if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
|
||
+ *find_empty_slot_for_expand (nhtab, htab_hash (x)) = x;
|
||
+
|
||
+ p++;
|
||
+ }
|
||
+ while (p < olimit);
|
||
+
|
||
+ htab_free (htab);
|
||
+ return nhtab;
|
||
+}
|
||
+
|
||
+/* This function searches for a hash table entry equal to the given
|
||
+ element. It cannot be used to insert or delete an element. */
|
||
+
|
||
+static hash_entry_type
|
||
+htab_find (htab_t htab, const hash_entry_type element)
|
||
+{
|
||
+ hashval_t index, hash2, hash = htab_hash (element);
|
||
+ size_t size;
|
||
+ hash_entry_type entry;
|
||
+
|
||
+ size = htab_size (htab);
|
||
+ index = htab_mod (hash, htab);
|
||
+
|
||
+ entry = htab->entries[index];
|
||
+ if (entry == HTAB_EMPTY_ENTRY
|
||
+ || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element)))
|
||
+ return entry;
|
||
+
|
||
+ hash2 = htab_mod_m2 (hash, htab);
|
||
+ for (;;)
|
||
+ {
|
||
+ index += hash2;
|
||
+ if (index >= size)
|
||
+ index -= size;
|
||
+
|
||
+ entry = htab->entries[index];
|
||
+ if (entry == HTAB_EMPTY_ENTRY
|
||
+ || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element)))
|
||
+ return entry;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* This function searches for a hash table slot containing an entry
|
||
+ equal to the given element. To delete an entry, call this with
|
||
+ insert=NO_INSERT, then call htab_clear_slot on the slot returned
|
||
+ (possibly after doing some checks). To insert an entry, call this
|
||
+ with insert=INSERT, then write the value you want into the returned
|
||
+ slot. */
|
||
+
|
||
+static hash_entry_type *
|
||
+htab_find_slot (htab_t *htabp, const hash_entry_type element,
|
||
+ enum insert_option insert)
|
||
+{
|
||
+ hash_entry_type *first_deleted_slot;
|
||
+ hashval_t index, hash2, hash = htab_hash (element);
|
||
+ size_t size;
|
||
+ hash_entry_type entry;
|
||
+ htab_t htab = *htabp;
|
||
+
|
||
+ size = htab_size (htab);
|
||
+ if (insert == INSERT && size * 3 <= htab->n_elements * 4)
|
||
+ {
|
||
+ htab = *htabp = htab_expand (htab);
|
||
+ size = htab_size (htab);
|
||
+ }
|
||
+
|
||
+ index = htab_mod (hash, htab);
|
||
+
|
||
+ first_deleted_slot = NULL;
|
||
+
|
||
+ entry = htab->entries[index];
|
||
+ if (entry == HTAB_EMPTY_ENTRY)
|
||
+ goto empty_entry;
|
||
+ else if (entry == HTAB_DELETED_ENTRY)
|
||
+ first_deleted_slot = &htab->entries[index];
|
||
+ else if (htab_eq (entry, element))
|
||
+ return &htab->entries[index];
|
||
+
|
||
+ hash2 = htab_mod_m2 (hash, htab);
|
||
+ for (;;)
|
||
+ {
|
||
+ index += hash2;
|
||
+ if (index >= size)
|
||
+ index -= size;
|
||
+
|
||
+ entry = htab->entries[index];
|
||
+ if (entry == HTAB_EMPTY_ENTRY)
|
||
+ goto empty_entry;
|
||
+ else if (entry == HTAB_DELETED_ENTRY)
|
||
+ {
|
||
+ if (!first_deleted_slot)
|
||
+ first_deleted_slot = &htab->entries[index];
|
||
+ }
|
||
+ else if (htab_eq (entry, element))
|
||
+ return &htab->entries[index];
|
||
+ }
|
||
+
|
||
+ empty_entry:
|
||
+ if (insert == NO_INSERT)
|
||
+ return NULL;
|
||
+
|
||
+ if (first_deleted_slot)
|
||
+ {
|
||
+ htab->n_deleted--;
|
||
+ *first_deleted_slot = HTAB_EMPTY_ENTRY;
|
||
+ return first_deleted_slot;
|
||
+ }
|
||
+
|
||
+ htab->n_elements++;
|
||
+ return &htab->entries[index];
|
||
+}
|
||
+
|
||
+/* This function clears a specified slot in a hash table. It is
|
||
+ useful when you've already done the lookup and don't want to do it
|
||
+ again. */
|
||
+
|
||
+static inline void
|
||
+htab_clear_slot (htab_t htab, hash_entry_type *slot)
|
||
+{
|
||
+ if (slot < htab->entries || slot >= htab->entries + htab_size (htab)
|
||
+ || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
|
||
+ abort ();
|
||
+
|
||
+ *slot = HTAB_DELETED_ENTRY;
|
||
+ htab->n_deleted++;
|
||
+}
|
||
+
|
||
+/* Returns a hash code for pointer P. Simplified version of evahash */
|
||
+
|
||
+static inline hashval_t
|
||
+hash_pointer (const void *p)
|
||
+{
|
||
+ uintptr_t v = (uintptr_t) p;
|
||
+ if (sizeof (v) > sizeof (hashval_t))
|
||
+ v ^= v >> (sizeof (uintptr_t) / 2 * __CHAR_BIT__);
|
||
+ return v;
|
||
+}
|
||
--- libgomp/fortran.c.jj 2014-05-15 11:39:33.603782318 +0200
|
||
+++ libgomp/fortran.c 2014-05-15 13:12:46.507895135 +0200
|
||
@@ -31,11 +31,6 @@
|
||
|
||
#ifdef HAVE_ATTRIBUTE_ALIAS
|
||
/* Use internal aliases if possible. */
|
||
-# define ULP STR1(__USER_LABEL_PREFIX__)
|
||
-# define STR1(x) STR2(x)
|
||
-# define STR2(x) #x
|
||
-# define ialias_redirect(fn) \
|
||
- extern __typeof (fn) fn __asm__ (ULP "gomp_ialias_" #fn) attribute_hidden;
|
||
# ifndef LIBGOMP_GNU_SYMBOL_VERSIONING
|
||
ialias_redirect (omp_init_lock)
|
||
ialias_redirect (omp_init_nest_lock)
|
||
@@ -70,6 +65,14 @@ ialias_redirect (omp_get_ancestor_thread
|
||
ialias_redirect (omp_get_team_size)
|
||
ialias_redirect (omp_get_active_level)
|
||
ialias_redirect (omp_in_final)
|
||
+ialias_redirect (omp_get_cancellation)
|
||
+ialias_redirect (omp_get_proc_bind)
|
||
+ialias_redirect (omp_set_default_device)
|
||
+ialias_redirect (omp_get_default_device)
|
||
+ialias_redirect (omp_get_num_devices)
|
||
+ialias_redirect (omp_get_num_teams)
|
||
+ialias_redirect (omp_get_team_num)
|
||
+ialias_redirect (omp_is_initial_device)
|
||
#endif
|
||
|
||
#ifndef LIBGOMP_GNU_SYMBOL_VERSIONING
|
||
@@ -435,3 +438,57 @@ omp_in_final_ (void)
|
||
{
|
||
return omp_in_final ();
|
||
}
|
||
+
|
||
+int32_t
|
||
+omp_get_cancellation_ (void)
|
||
+{
|
||
+ return omp_get_cancellation ();
|
||
+}
|
||
+
|
||
+int32_t
|
||
+omp_get_proc_bind_ (void)
|
||
+{
|
||
+ return omp_get_proc_bind ();
|
||
+}
|
||
+
|
||
+void
|
||
+omp_set_default_device_ (const int32_t *device_num)
|
||
+{
|
||
+ return omp_set_default_device (*device_num);
|
||
+}
|
||
+
|
||
+void
|
||
+omp_set_default_device_8_ (const int64_t *device_num)
|
||
+{
|
||
+ return omp_set_default_device (TO_INT (*device_num));
|
||
+}
|
||
+
|
||
+int32_t
|
||
+omp_get_default_device_ (void)
|
||
+{
|
||
+ return omp_get_default_device ();
|
||
+}
|
||
+
|
||
+int32_t
|
||
+omp_get_num_devices_ (void)
|
||
+{
|
||
+ return omp_get_num_devices ();
|
||
+}
|
||
+
|
||
+int32_t
|
||
+omp_get_num_teams_ (void)
|
||
+{
|
||
+ return omp_get_num_teams ();
|
||
+}
|
||
+
|
||
+int32_t
|
||
+omp_get_team_num_ (void)
|
||
+{
|
||
+ return omp_get_team_num ();
|
||
+}
|
||
+
|
||
+int32_t
|
||
+omp_is_initial_device_ (void)
|
||
+{
|
||
+ return omp_is_initial_device ();
|
||
+}
|
||
--- libgomp/Makefile.in.jj 2014-04-24 21:38:15.591686821 +0200
|
||
+++ libgomp/Makefile.in 2014-05-15 13:12:46.026898921 +0200
|
||
@@ -86,7 +86,7 @@ am_libgomp_la_OBJECTS = alloc.lo barrier
|
||
error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \
|
||
parallel.lo sections.lo single.lo task.lo team.lo work.lo \
|
||
lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \
|
||
- fortran.lo affinity.lo
|
||
+ fortran.lo affinity.lo target.lo
|
||
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
|
||
DEFAULT_INCLUDES = -I. -I$(srcdir) -I.
|
||
depcomp = $(SHELL) $(top_srcdir)/../depcomp
|
||
@@ -300,7 +300,7 @@ libgomp_la_LDFLAGS = $(libgomp_version_i
|
||
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
|
||
iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
|
||
task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
|
||
- time.c fortran.c affinity.c
|
||
+ time.c fortran.c affinity.c target.c
|
||
|
||
nodist_noinst_HEADERS = libgomp_f.h
|
||
nodist_libsubinclude_HEADERS = omp.h
|
||
@@ -448,6 +448,7 @@ distclean-compile:
|
||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
|
||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
|
||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
|
||
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
|
||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
|
||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@
|
||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@
|
||
--- libgomp/target.c.jj 2014-05-15 13:12:53.904857203 +0200
|
||
+++ libgomp/target.c 2014-05-15 13:12:53.904857203 +0200
|
||
@@ -0,0 +1,96 @@
|
||
+/* Copyright (C) 2013 Free Software Foundation, Inc.
|
||
+ Contributed by Jakub Jelinek <jakub@redhat.com>.
|
||
+
|
||
+ This file is part of the GNU OpenMP Library (libgomp).
|
||
+
|
||
+ Libgomp is free software; you can redistribute it and/or modify it
|
||
+ under the terms of the GNU General Public License as published by
|
||
+ the Free Software Foundation; either version 3, or (at your option)
|
||
+ any later version.
|
||
+
|
||
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||
+ more details.
|
||
+
|
||
+ Under Section 7 of GPL version 3, you are granted additional
|
||
+ permissions described in the GCC Runtime Library Exception, version
|
||
+ 3.1, as published by the Free Software Foundation.
|
||
+
|
||
+ You should have received a copy of the GNU General Public License and
|
||
+ a copy of the GCC Runtime Library Exception along with this program;
|
||
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||
+ <http://www.gnu.org/licenses/>. */
|
||
+
|
||
+/* This file handles the maintainence of threads in response to team
|
||
+ creation and termination. */
|
||
+
|
||
+#include "libgomp.h"
|
||
+#include <limits.h>
|
||
+#include <stdbool.h>
|
||
+#include <stdlib.h>
|
||
+#include <string.h>
|
||
+
|
||
+attribute_hidden int
|
||
+gomp_get_num_devices (void)
|
||
+{
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+/* Called when encountering a target directive. If DEVICE
|
||
+ is -1, it means use device-var ICV. If it is -2 (or any other value
|
||
+ larger than last available hw device, use host fallback.
|
||
+ FN is address of host code, OPENMP_TARGET contains value of the
|
||
+ __OPENMP_TARGET__ symbol in the shared library or binary that invokes
|
||
+ GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
|
||
+ with MAPNUM entries, with addresses of the host objects,
|
||
+ sizes of the host objects (resp. for pointer kind pointer bias
|
||
+ and assumed sizeof (void *) size) and kinds. */
|
||
+
|
||
+void
|
||
+GOMP_target (int device, void (*fn) (void *), const void *openmp_target,
|
||
+ size_t mapnum, void **hostaddrs, size_t *sizes,
|
||
+ unsigned char *kinds)
|
||
+{
|
||
+ /* Host fallback. */
|
||
+ struct gomp_thread old_thr, *thr = gomp_thread ();
|
||
+ old_thr = *thr;
|
||
+ memset (thr, '\0', sizeof (*thr));
|
||
+ if (gomp_places_list)
|
||
+ {
|
||
+ thr->place = old_thr.place;
|
||
+ thr->ts.place_partition_len = gomp_places_list_len;
|
||
+ }
|
||
+ fn (hostaddrs);
|
||
+ gomp_free_thread (thr);
|
||
+ *thr = old_thr;
|
||
+}
|
||
+
|
||
+void
|
||
+GOMP_target_data (int device, const void *openmp_target, size_t mapnum,
|
||
+ void **hostaddrs, size_t *sizes, unsigned char *kinds)
|
||
+{
|
||
+}
|
||
+
|
||
+void
|
||
+GOMP_target_end_data (void)
|
||
+{
|
||
+}
|
||
+
|
||
+void
|
||
+GOMP_target_update (int device, const void *openmp_target, size_t mapnum,
|
||
+ void **hostaddrs, size_t *sizes, unsigned char *kinds)
|
||
+{
|
||
+}
|
||
+
|
||
+void
|
||
+GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
||
+{
|
||
+ if (thread_limit)
|
||
+ {
|
||
+ struct gomp_task_icv *icv = gomp_icv (true);
|
||
+ icv->thread_limit_var
|
||
+ = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
|
||
+ }
|
||
+ (void) num_teams;
|
||
+}
|
||
--- libgomp/config/posix/affinity.c.jj 2009-04-14 15:41:09.000000000 +0200
|
||
+++ libgomp/config/posix/affinity.c 2014-05-15 13:12:46.504895015 +0200
|
||
@@ -32,7 +32,84 @@ gomp_init_affinity (void)
|
||
}
|
||
|
||
void
|
||
-gomp_init_thread_affinity (pthread_attr_t *attr)
|
||
+gomp_init_thread_affinity (pthread_attr_t *attr, unsigned int place)
|
||
{
|
||
(void) attr;
|
||
+ (void) place;
|
||
+}
|
||
+
|
||
+void **
|
||
+gomp_affinity_alloc (unsigned long count, bool quiet)
|
||
+{
|
||
+ (void) count;
|
||
+ if (!quiet)
|
||
+ gomp_error ("Affinity not supported on this configuration");
|
||
+ return NULL;
|
||
+}
|
||
+
|
||
+void
|
||
+gomp_affinity_init_place (void *p)
|
||
+{
|
||
+ (void) p;
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_add_cpus (void *p, unsigned long num,
|
||
+ unsigned long len, long stride, bool quiet)
|
||
+{
|
||
+ (void) p;
|
||
+ (void) num;
|
||
+ (void) len;
|
||
+ (void) stride;
|
||
+ (void) quiet;
|
||
+ return false;
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_remove_cpu (void *p, unsigned long num)
|
||
+{
|
||
+ (void) p;
|
||
+ (void) num;
|
||
+ return false;
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_copy_place (void *p, void *q, long stride)
|
||
+{
|
||
+ (void) p;
|
||
+ (void) q;
|
||
+ (void) stride;
|
||
+ return false;
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_same_place (void *p, void *q)
|
||
+{
|
||
+ (void) p;
|
||
+ (void) q;
|
||
+ return false;
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_finalize_place_list (bool quiet)
|
||
+{
|
||
+ (void) quiet;
|
||
+ return false;
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_init_level (int level, unsigned long count, bool quiet)
|
||
+{
|
||
+ (void) level;
|
||
+ (void) count;
|
||
+ (void) quiet;
|
||
+ if (!quiet)
|
||
+ gomp_error ("Affinity not supported on this configuration");
|
||
+ return NULL;
|
||
+}
|
||
+
|
||
+void
|
||
+gomp_affinity_print_place (void *p)
|
||
+{
|
||
+ (void) p;
|
||
}
|
||
--- libgomp/config/posix/bar.c.jj 2009-04-14 15:41:07.000000000 +0200
|
||
+++ libgomp/config/posix/bar.c 2014-05-15 15:02:28.966690479 +0200
|
||
@@ -42,6 +42,7 @@ gomp_barrier_init (gomp_barrier_t *bar,
|
||
bar->total = count;
|
||
bar->arrived = 0;
|
||
bar->generation = 0;
|
||
+ bar->cancellable = false;
|
||
}
|
||
|
||
void
|
||
@@ -72,7 +73,7 @@ gomp_barrier_wait_end (gomp_barrier_t *b
|
||
{
|
||
unsigned int n;
|
||
|
||
- if (state & 1)
|
||
+ if (state & BAR_WAS_LAST)
|
||
{
|
||
n = --bar->arrived;
|
||
if (n > 0)
|
||
@@ -113,12 +114,14 @@ gomp_team_barrier_wait_end (gomp_barrier
|
||
{
|
||
unsigned int n;
|
||
|
||
- if (state & 1)
|
||
+ state &= ~BAR_CANCELLED;
|
||
+ if (state & BAR_WAS_LAST)
|
||
{
|
||
n = --bar->arrived;
|
||
struct gomp_thread *thr = gomp_thread ();
|
||
struct gomp_team *team = thr->ts.team;
|
||
|
||
+ team->work_share_cancelled = 0;
|
||
if (team->task_count)
|
||
{
|
||
gomp_barrier_handle_tasks (state);
|
||
@@ -128,7 +131,7 @@ gomp_team_barrier_wait_end (gomp_barrier
|
||
return;
|
||
}
|
||
|
||
- bar->generation = state + 3;
|
||
+ bar->generation = state + BAR_INCR - BAR_WAS_LAST;
|
||
if (n > 0)
|
||
{
|
||
do
|
||
@@ -141,13 +144,18 @@ gomp_team_barrier_wait_end (gomp_barrier
|
||
else
|
||
{
|
||
gomp_mutex_unlock (&bar->mutex1);
|
||
+ int gen;
|
||
do
|
||
{
|
||
gomp_sem_wait (&bar->sem1);
|
||
- if (bar->generation & 1)
|
||
- gomp_barrier_handle_tasks (state);
|
||
+ gen = bar->generation;
|
||
+ if (gen & BAR_TASK_PENDING)
|
||
+ {
|
||
+ gomp_barrier_handle_tasks (state);
|
||
+ gen = bar->generation;
|
||
+ }
|
||
}
|
||
- while (bar->generation != state + 4);
|
||
+ while (gen != state + BAR_INCR);
|
||
|
||
#ifdef HAVE_SYNC_BUILTINS
|
||
n = __sync_add_and_fetch (&bar->arrived, -1);
|
||
@@ -162,6 +170,81 @@ gomp_team_barrier_wait_end (gomp_barrier
|
||
}
|
||
}
|
||
|
||
+bool
|
||
+gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
|
||
+ gomp_barrier_state_t state)
|
||
+{
|
||
+ unsigned int n;
|
||
+
|
||
+ if (state & BAR_WAS_LAST)
|
||
+ {
|
||
+ bar->cancellable = false;
|
||
+ n = --bar->arrived;
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
+ struct gomp_team *team = thr->ts.team;
|
||
+
|
||
+ team->work_share_cancelled = 0;
|
||
+ if (team->task_count)
|
||
+ {
|
||
+ gomp_barrier_handle_tasks (state);
|
||
+ if (n > 0)
|
||
+ gomp_sem_wait (&bar->sem2);
|
||
+ gomp_mutex_unlock (&bar->mutex1);
|
||
+ return false;
|
||
+ }
|
||
+
|
||
+ bar->generation = state + BAR_INCR - BAR_WAS_LAST;
|
||
+ if (n > 0)
|
||
+ {
|
||
+ do
|
||
+ gomp_sem_post (&bar->sem1);
|
||
+ while (--n != 0);
|
||
+ gomp_sem_wait (&bar->sem2);
|
||
+ }
|
||
+ gomp_mutex_unlock (&bar->mutex1);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ if (state & BAR_CANCELLED)
|
||
+ {
|
||
+ gomp_mutex_unlock (&bar->mutex1);
|
||
+ return true;
|
||
+ }
|
||
+ bar->cancellable = true;
|
||
+ gomp_mutex_unlock (&bar->mutex1);
|
||
+ int gen;
|
||
+ do
|
||
+ {
|
||
+ gomp_sem_wait (&bar->sem1);
|
||
+ gen = bar->generation;
|
||
+ if (gen & BAR_CANCELLED)
|
||
+ break;
|
||
+ if (gen & BAR_TASK_PENDING)
|
||
+ {
|
||
+ gomp_barrier_handle_tasks (state);
|
||
+ gen = bar->generation;
|
||
+ if (gen & BAR_CANCELLED)
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ while (gen != state + BAR_INCR);
|
||
+
|
||
+#ifdef HAVE_SYNC_BUILTINS
|
||
+ n = __sync_add_and_fetch (&bar->arrived, -1);
|
||
+#else
|
||
+ gomp_mutex_lock (&bar->mutex2);
|
||
+ n = --bar->arrived;
|
||
+ gomp_mutex_unlock (&bar->mutex2);
|
||
+#endif
|
||
+
|
||
+ if (n == 0)
|
||
+ gomp_sem_post (&bar->sem2);
|
||
+ if (gen & BAR_CANCELLED)
|
||
+ return true;
|
||
+ }
|
||
+ return false;
|
||
+}
|
||
+
|
||
void
|
||
gomp_team_barrier_wait (gomp_barrier_t *barrier)
|
||
{
|
||
@@ -176,3 +259,40 @@ gomp_team_barrier_wake (gomp_barrier_t *
|
||
while (count-- > 0)
|
||
gomp_sem_post (&bar->sem1);
|
||
}
|
||
+
|
||
+bool
|
||
+gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
|
||
+{
|
||
+ gomp_barrier_state_t state = gomp_barrier_wait_cancel_start (bar);
|
||
+ return gomp_team_barrier_wait_cancel_end (bar, state);
|
||
+}
|
||
+
|
||
+void
|
||
+gomp_team_barrier_cancel (struct gomp_team *team)
|
||
+{
|
||
+ if (team->barrier.generation & BAR_CANCELLED)
|
||
+ return;
|
||
+ gomp_mutex_lock (&team->barrier.mutex1);
|
||
+ gomp_mutex_lock (&team->task_lock);
|
||
+ if (team->barrier.generation & BAR_CANCELLED)
|
||
+ {
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ gomp_mutex_unlock (&team->barrier.mutex1);
|
||
+ return;
|
||
+ }
|
||
+ team->barrier.generation |= BAR_CANCELLED;
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ if (team->barrier.cancellable)
|
||
+ {
|
||
+ int n = team->barrier.arrived;
|
||
+ if (n > 0)
|
||
+ {
|
||
+ do
|
||
+ gomp_sem_post (&team->barrier.sem1);
|
||
+ while (--n != 0);
|
||
+ gomp_sem_wait (&team->barrier.sem2);
|
||
+ }
|
||
+ team->barrier.cancellable = false;
|
||
+ }
|
||
+ gomp_mutex_unlock (&team->barrier.mutex1);
|
||
+}
|
||
--- libgomp/config/posix/bar.h.jj 2009-04-14 15:41:09.000000000 +0200
|
||
+++ libgomp/config/posix/bar.h 2014-05-15 13:12:46.506895099 +0200
|
||
@@ -43,9 +43,20 @@ typedef struct
|
||
unsigned total;
|
||
unsigned arrived;
|
||
unsigned generation;
|
||
+ bool cancellable;
|
||
} gomp_barrier_t;
|
||
+
|
||
typedef unsigned int gomp_barrier_state_t;
|
||
|
||
+/* The generation field contains a counter in the high bits, with a few
|
||
+ low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
|
||
+ share space because WAS_LAST is never stored back to generation. */
|
||
+#define BAR_TASK_PENDING 1
|
||
+#define BAR_WAS_LAST 1
|
||
+#define BAR_WAITING_FOR_TASK 2
|
||
+#define BAR_CANCELLED 4
|
||
+#define BAR_INCR 8
|
||
+
|
||
extern void gomp_barrier_init (gomp_barrier_t *, unsigned);
|
||
extern void gomp_barrier_reinit (gomp_barrier_t *, unsigned);
|
||
extern void gomp_barrier_destroy (gomp_barrier_t *);
|
||
@@ -55,22 +66,47 @@ extern void gomp_barrier_wait_end (gomp_
|
||
extern void gomp_team_barrier_wait (gomp_barrier_t *);
|
||
extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
|
||
gomp_barrier_state_t);
|
||
+extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
|
||
+extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
|
||
+ gomp_barrier_state_t);
|
||
extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
|
||
+struct gomp_team;
|
||
+extern void gomp_team_barrier_cancel (struct gomp_team *);
|
||
|
||
static inline gomp_barrier_state_t
|
||
gomp_barrier_wait_start (gomp_barrier_t *bar)
|
||
{
|
||
unsigned int ret;
|
||
gomp_mutex_lock (&bar->mutex1);
|
||
- ret = bar->generation & ~3;
|
||
- ret += ++bar->arrived == bar->total;
|
||
+ ret = bar->generation & (-BAR_INCR | BAR_CANCELLED);
|
||
+ if (++bar->arrived == bar->total)
|
||
+ ret |= BAR_WAS_LAST;
|
||
+ return ret;
|
||
+}
|
||
+
|
||
+static inline gomp_barrier_state_t
|
||
+gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
|
||
+{
|
||
+ unsigned int ret;
|
||
+ gomp_mutex_lock (&bar->mutex1);
|
||
+ ret = bar->generation & (-BAR_INCR | BAR_CANCELLED);
|
||
+ if (ret & BAR_CANCELLED)
|
||
+ return ret;
|
||
+ if (++bar->arrived == bar->total)
|
||
+ ret |= BAR_WAS_LAST;
|
||
return ret;
|
||
}
|
||
|
||
+static inline void
|
||
+gomp_team_barrier_wait_final (gomp_barrier_t *bar)
|
||
+{
|
||
+ gomp_team_barrier_wait (bar);
|
||
+}
|
||
+
|
||
static inline bool
|
||
gomp_barrier_last_thread (gomp_barrier_state_t state)
|
||
{
|
||
- return state & 1;
|
||
+ return state & BAR_WAS_LAST;
|
||
}
|
||
|
||
static inline void
|
||
@@ -85,31 +121,37 @@ gomp_barrier_wait_last (gomp_barrier_t *
|
||
static inline void
|
||
gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
|
||
{
|
||
- bar->generation |= 1;
|
||
+ bar->generation |= BAR_TASK_PENDING;
|
||
}
|
||
|
||
static inline void
|
||
gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
|
||
{
|
||
- bar->generation &= ~1;
|
||
+ bar->generation &= ~BAR_TASK_PENDING;
|
||
}
|
||
|
||
static inline void
|
||
gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
|
||
{
|
||
- bar->generation |= 2;
|
||
+ bar->generation |= BAR_WAITING_FOR_TASK;
|
||
}
|
||
|
||
static inline bool
|
||
gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
|
||
{
|
||
- return (bar->generation & 2) != 0;
|
||
+ return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
|
||
+}
|
||
+
|
||
+static inline bool
|
||
+gomp_team_barrier_cancelled (gomp_barrier_t *bar)
|
||
+{
|
||
+ return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
|
||
}
|
||
|
||
static inline void
|
||
gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||
{
|
||
- bar->generation = (state & ~3) + 4;
|
||
+ bar->generation = (state & -BAR_INCR) + BAR_INCR;
|
||
}
|
||
|
||
#endif /* GOMP_BARRIER_H */
|
||
--- libgomp/config/linux/proc.h.jj 2014-05-15 11:39:34.000000000 +0200
|
||
+++ libgomp/config/linux/proc.h 2014-05-15 13:12:46.487894100 +0200
|
||
@@ -28,7 +28,10 @@
|
||
#include <sched.h>
|
||
|
||
#ifdef HAVE_PTHREAD_AFFINITY_NP
|
||
-extern unsigned long gomp_cpuset_popcount (cpu_set_t *);
|
||
+extern unsigned long gomp_cpuset_size attribute_hidden;
|
||
+extern cpu_set_t *gomp_cpusetp attribute_hidden;
|
||
+extern unsigned long gomp_cpuset_popcount (unsigned long, cpu_set_t *)
|
||
+ attribute_hidden;
|
||
#endif
|
||
|
||
#endif /* GOMP_PROC_H */
|
||
--- libgomp/config/linux/affinity.c.jj 2014-05-15 13:12:46.027898917 +0200
|
||
+++ libgomp/config/linux/affinity.c 2014-05-15 15:10:28.251141091 +0200
|
||
@@ -30,90 +30,328 @@
|
||
#endif
|
||
#include "libgomp.h"
|
||
#include "proc.h"
|
||
+#include <errno.h>
|
||
#include <stdlib.h>
|
||
+#include <stdio.h>
|
||
+#include <string.h>
|
||
#include <unistd.h>
|
||
+#include <limits.h>
|
||
|
||
#ifdef HAVE_PTHREAD_AFFINITY_NP
|
||
|
||
-static unsigned int affinity_counter;
|
||
+#ifndef CPU_ALLOC_SIZE
|
||
+#define CPU_ISSET_S(idx, size, set) CPU_ISSET(idx, set)
|
||
+#define CPU_ZERO_S(size, set) CPU_ZERO(set)
|
||
+#define CPU_SET_S(idx, size, set) CPU_SET(idx, set)
|
||
+#define CPU_CLR_S(idx, size, set) CPU_CLR(idx, set)
|
||
+#endif
|
||
|
||
void
|
||
gomp_init_affinity (void)
|
||
{
|
||
- cpu_set_t cpuset, cpusetnew;
|
||
- size_t idx, widx;
|
||
- unsigned long cpus = 0;
|
||
-
|
||
- if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset))
|
||
- {
|
||
- gomp_error ("could not get CPU affinity set");
|
||
- free (gomp_cpu_affinity);
|
||
- gomp_cpu_affinity = NULL;
|
||
- gomp_cpu_affinity_len = 0;
|
||
- return;
|
||
- }
|
||
-
|
||
- CPU_ZERO (&cpusetnew);
|
||
- if (gomp_cpu_affinity_len == 0)
|
||
- {
|
||
- unsigned long count = gomp_cpuset_popcount (&cpuset);
|
||
- if (count >= 65536)
|
||
- count = 65536;
|
||
- gomp_cpu_affinity = malloc (count * sizeof (unsigned short));
|
||
- if (gomp_cpu_affinity == NULL)
|
||
+ if (gomp_places_list == NULL)
|
||
+ {
|
||
+ if (!gomp_affinity_init_level (1, ULONG_MAX, true))
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
+ pthread_setaffinity_np (pthread_self (), gomp_cpuset_size,
|
||
+ (cpu_set_t *) gomp_places_list[0]);
|
||
+ thr->place = 1;
|
||
+ thr->ts.place_partition_off = 0;
|
||
+ thr->ts.place_partition_len = gomp_places_list_len;
|
||
+}
|
||
+
|
||
+void
|
||
+gomp_init_thread_affinity (pthread_attr_t *attr, unsigned int place)
|
||
+{
|
||
+ pthread_attr_setaffinity_np (attr, gomp_cpuset_size,
|
||
+ (cpu_set_t *) gomp_places_list[place]);
|
||
+}
|
||
+
|
||
+void **
|
||
+gomp_affinity_alloc (unsigned long count, bool quiet)
|
||
+{
|
||
+ unsigned long i;
|
||
+ void **ret;
|
||
+ char *p;
|
||
+
|
||
+ if (gomp_cpusetp == NULL)
|
||
+ {
|
||
+ if (!quiet)
|
||
+ gomp_error ("Could not get CPU affinity set");
|
||
+ return NULL;
|
||
+ }
|
||
+
|
||
+ ret = malloc (count * sizeof (void *) + count * gomp_cpuset_size);
|
||
+ if (ret == NULL)
|
||
+ {
|
||
+ if (!quiet)
|
||
+ gomp_error ("Out of memory trying to allocate places list");
|
||
+ return NULL;
|
||
+ }
|
||
+
|
||
+ p = (char *) (ret + count);
|
||
+ for (i = 0; i < count; i++, p += gomp_cpuset_size)
|
||
+ ret[i] = p;
|
||
+ return ret;
|
||
+}
|
||
+
|
||
+void
|
||
+gomp_affinity_init_place (void *p)
|
||
+{
|
||
+ cpu_set_t *cpusetp = (cpu_set_t *) p;
|
||
+ CPU_ZERO_S (gomp_cpuset_size, cpusetp);
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_add_cpus (void *p, unsigned long num,
|
||
+ unsigned long len, long stride, bool quiet)
|
||
+{
|
||
+ cpu_set_t *cpusetp = (cpu_set_t *) p;
|
||
+ unsigned long max = 8 * gomp_cpuset_size;
|
||
+ for (;;)
|
||
+ {
|
||
+ if (num >= max)
|
||
{
|
||
- gomp_error ("not enough memory to store CPU affinity list");
|
||
- return;
|
||
+ if (!quiet)
|
||
+ gomp_error ("Logical CPU number %lu out of range", num);
|
||
+ return false;
|
||
}
|
||
- for (widx = idx = 0; widx < count && idx < 65536; idx++)
|
||
- if (CPU_ISSET (idx, &cpuset))
|
||
+ CPU_SET_S (num, gomp_cpuset_size, cpusetp);
|
||
+ if (--len == 0)
|
||
+ return true;
|
||
+ if ((stride < 0 && num + stride > num)
|
||
+ || (stride > 0 && num + stride < num))
|
||
+ {
|
||
+ if (!quiet)
|
||
+ gomp_error ("Logical CPU number %lu+%ld out of range",
|
||
+ num, stride);
|
||
+ return false;
|
||
+ }
|
||
+ num += stride;
|
||
+ }
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_remove_cpu (void *p, unsigned long num)
|
||
+{
|
||
+ cpu_set_t *cpusetp = (cpu_set_t *) p;
|
||
+ if (num >= 8 * gomp_cpuset_size)
|
||
+ {
|
||
+ gomp_error ("Logical CPU number %lu out of range", num);
|
||
+ return false;
|
||
+ }
|
||
+ if (!CPU_ISSET_S (num, gomp_cpuset_size, cpusetp))
|
||
+ {
|
||
+ gomp_error ("Logical CPU %lu to be removed is not in the set", num);
|
||
+ return false;
|
||
+ }
|
||
+ CPU_CLR_S (num, gomp_cpuset_size, cpusetp);
|
||
+ return true;
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_copy_place (void *p, void *q, long stride)
|
||
+{
|
||
+ unsigned long i, max = 8 * gomp_cpuset_size;
|
||
+ cpu_set_t *destp = (cpu_set_t *) p;
|
||
+ cpu_set_t *srcp = (cpu_set_t *) q;
|
||
+
|
||
+ CPU_ZERO_S (gomp_cpuset_size, destp);
|
||
+ for (i = 0; i < max; i++)
|
||
+ if (CPU_ISSET_S (i, gomp_cpuset_size, srcp))
|
||
+ {
|
||
+ if ((stride < 0 && i + stride > i)
|
||
+ || (stride > 0 && (i + stride < i || i + stride >= max)))
|
||
+ {
|
||
+ gomp_error ("Logical CPU number %lu+%ld out of range", i, stride);
|
||
+ return false;
|
||
+ }
|
||
+ CPU_SET_S (i + stride, gomp_cpuset_size, destp);
|
||
+ }
|
||
+ return true;
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_same_place (void *p, void *q)
|
||
+{
|
||
+#ifdef CPU_EQUAL_S
|
||
+ return CPU_EQUAL_S (gomp_cpuset_size, (cpu_set_t *) p, (cpu_set_t *) q);
|
||
+#else
|
||
+ return memcmp (p, q, gomp_cpuset_size) == 0;
|
||
+#endif
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_finalize_place_list (bool quiet)
|
||
+{
|
||
+ unsigned long i, j;
|
||
+
|
||
+ for (i = 0, j = 0; i < gomp_places_list_len; i++)
|
||
+ {
|
||
+ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[i];
|
||
+ bool nonempty = false;
|
||
+#ifdef CPU_AND_S
|
||
+ CPU_AND_S (gomp_cpuset_size, cpusetp, cpusetp, gomp_cpusetp);
|
||
+ nonempty = gomp_cpuset_popcount (gomp_cpuset_size, cpusetp) != 0;
|
||
+#else
|
||
+ unsigned long k, max = gomp_cpuset_size / sizeof (cpusetp->__bits[0]);
|
||
+ for (k = 0; k < max; k++)
|
||
+ if ((cpusetp->__bits[k] &= gomp_cpusetp->__bits[k]) != 0)
|
||
+ nonempty = true;
|
||
+#endif
|
||
+ if (nonempty)
|
||
+ gomp_places_list[j++] = gomp_places_list[i];
|
||
+ }
|
||
+
|
||
+ if (j == 0)
|
||
+ {
|
||
+ if (!quiet)
|
||
+ gomp_error ("None of the places contain usable logical CPUs");
|
||
+ return false;
|
||
+ }
|
||
+ else if (j < gomp_places_list_len)
|
||
+ {
|
||
+ if (!quiet)
|
||
+ gomp_error ("Number of places reduced from %ld to %ld because some "
|
||
+ "places didn't contain any usable logical CPUs",
|
||
+ gomp_places_list_len, j);
|
||
+ gomp_places_list_len = j;
|
||
+ }
|
||
+ return true;
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_affinity_init_level (int level, unsigned long count, bool quiet)
|
||
+{
|
||
+ unsigned long i, max = 8 * gomp_cpuset_size;
|
||
+
|
||
+ if (gomp_cpusetp)
|
||
+ {
|
||
+ unsigned long maxcount
|
||
+ = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp);
|
||
+ if (count > maxcount)
|
||
+ count = maxcount;
|
||
+ }
|
||
+ gomp_places_list = gomp_affinity_alloc (count, quiet);
|
||
+ gomp_places_list_len = 0;
|
||
+ if (gomp_places_list == NULL)
|
||
+ return false;
|
||
+ /* SMT (threads). */
|
||
+ if (level == 1)
|
||
+ {
|
||
+ for (i = 0; i < max && gomp_places_list_len < count; i++)
|
||
+ if (CPU_ISSET_S (i, gomp_cpuset_size, gomp_cpusetp))
|
||
{
|
||
- cpus++;
|
||
- gomp_cpu_affinity[widx++] = idx;
|
||
+ gomp_affinity_init_place (gomp_places_list[gomp_places_list_len]);
|
||
+ gomp_affinity_add_cpus (gomp_places_list[gomp_places_list_len],
|
||
+ i, 1, 0, true);
|
||
+ ++gomp_places_list_len;
|
||
}
|
||
+ return true;
|
||
}
|
||
else
|
||
- for (widx = idx = 0; idx < gomp_cpu_affinity_len; idx++)
|
||
- if (gomp_cpu_affinity[idx] < CPU_SETSIZE
|
||
- && CPU_ISSET (gomp_cpu_affinity[idx], &cpuset))
|
||
+ {
|
||
+ char name[sizeof ("/sys/devices/system/cpu/cpu/topology/"
|
||
+ "thread_siblings_list") + 3 * sizeof (unsigned long)];
|
||
+ size_t prefix_len = sizeof ("/sys/devices/system/cpu/cpu") - 1;
|
||
+ cpu_set_t *copy = gomp_alloca (gomp_cpuset_size);
|
||
+ FILE *f;
|
||
+ char *line = NULL;
|
||
+ size_t linelen = 0;
|
||
+
|
||
+ memcpy (name, "/sys/devices/system/cpu/cpu", prefix_len);
|
||
+ memcpy (copy, gomp_cpusetp, gomp_cpuset_size);
|
||
+ for (i = 0; i < max && gomp_places_list_len < count; i++)
|
||
+ if (CPU_ISSET_S (i, gomp_cpuset_size, copy))
|
||
+ {
|
||
+ sprintf (name + prefix_len, "%lu/topology/%s_siblings_list",
|
||
+ i, level == 2 ? "thread" : "core");
|
||
+ f = fopen (name, "r");
|
||
+ if (f != NULL)
|
||
+ {
|
||
+ if (getline (&line, &linelen, f) > 0)
|
||
+ {
|
||
+ char *p = line;
|
||
+ bool seen_i = false;
|
||
+ void *pl = gomp_places_list[gomp_places_list_len];
|
||
+ gomp_affinity_init_place (pl);
|
||
+ while (*p && *p != '\n')
|
||
+ {
|
||
+ unsigned long first, last;
|
||
+ errno = 0;
|
||
+ first = strtoul (p, &p, 10);
|
||
+ if (errno)
|
||
+ break;
|
||
+ last = first;
|
||
+ if (*p == '-')
|
||
+ {
|
||
+ errno = 0;
|
||
+ last = strtoul (p + 1, &p, 10);
|
||
+ if (errno || last < first)
|
||
+ break;
|
||
+ }
|
||
+ for (; first <= last; first++)
|
||
+ if (CPU_ISSET_S (first, gomp_cpuset_size, copy)
|
||
+ && gomp_affinity_add_cpus (pl, first, 1, 0,
|
||
+ true))
|
||
+ {
|
||
+ CPU_CLR_S (first, gomp_cpuset_size, copy);
|
||
+ if (first == i)
|
||
+ seen_i = true;
|
||
+ }
|
||
+ if (*p == ',')
|
||
+ ++p;
|
||
+ }
|
||
+ if (seen_i)
|
||
+ gomp_places_list_len++;
|
||
+ }
|
||
+ fclose (f);
|
||
+ }
|
||
+ }
|
||
+ if (gomp_places_list_len == 0)
|
||
{
|
||
- if (! CPU_ISSET (gomp_cpu_affinity[idx], &cpusetnew))
|
||
- {
|
||
- cpus++;
|
||
- CPU_SET (gomp_cpu_affinity[idx], &cpusetnew);
|
||
- }
|
||
- gomp_cpu_affinity[widx++] = gomp_cpu_affinity[idx];
|
||
+ if (!quiet)
|
||
+ gomp_error ("Error reading %s topology",
|
||
+ level == 2 ? "core" : "socket");
|
||
+ free (gomp_places_list);
|
||
+ gomp_places_list = NULL;
|
||
+ return false;
|
||
}
|
||
-
|
||
- if (widx == 0)
|
||
- {
|
||
- gomp_error ("no CPUs left for affinity setting");
|
||
- free (gomp_cpu_affinity);
|
||
- gomp_cpu_affinity = NULL;
|
||
- gomp_cpu_affinity_len = 0;
|
||
- return;
|
||
- }
|
||
-
|
||
- gomp_cpu_affinity_len = widx;
|
||
- if (cpus < gomp_available_cpus)
|
||
- gomp_available_cpus = cpus;
|
||
- CPU_ZERO (&cpuset);
|
||
- CPU_SET (gomp_cpu_affinity[0], &cpuset);
|
||
- pthread_setaffinity_np (pthread_self (), sizeof (cpuset), &cpuset);
|
||
- affinity_counter = 1;
|
||
+ return true;
|
||
+ }
|
||
+ return false;
|
||
}
|
||
|
||
void
|
||
-gomp_init_thread_affinity (pthread_attr_t *attr)
|
||
+gomp_affinity_print_place (void *p)
|
||
{
|
||
- unsigned int cpu;
|
||
- cpu_set_t cpuset;
|
||
+ unsigned long i, max = 8 * gomp_cpuset_size, len;
|
||
+ cpu_set_t *cpusetp = (cpu_set_t *) p;
|
||
+ bool notfirst = false;
|
||
|
||
- cpu = __sync_fetch_and_add (&affinity_counter, 1);
|
||
- cpu %= gomp_cpu_affinity_len;
|
||
- CPU_ZERO (&cpuset);
|
||
- CPU_SET (gomp_cpu_affinity[cpu], &cpuset);
|
||
- pthread_attr_setaffinity_np (attr, sizeof (cpu_set_t), &cpuset);
|
||
+ for (i = 0, len = 0; i < max; i++)
|
||
+ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp))
|
||
+ {
|
||
+ if (len == 0)
|
||
+ {
|
||
+ if (notfirst)
|
||
+ fputc (',', stderr);
|
||
+ notfirst = true;
|
||
+ fprintf (stderr, "%lu", i);
|
||
+ }
|
||
+ ++len;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ if (len > 1)
|
||
+ fprintf (stderr, ":%lu", len);
|
||
+ len = 0;
|
||
+ }
|
||
+ if (len > 1)
|
||
+ fprintf (stderr, ":%lu", len);
|
||
}
|
||
|
||
#else
|
||
--- libgomp/config/linux/bar.c.jj 2009-04-14 15:41:02.000000000 +0200
|
||
+++ libgomp/config/linux/bar.c 2014-05-15 14:35:41.458322771 +0200
|
||
@@ -33,12 +33,12 @@
|
||
void
|
||
gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||
{
|
||
- if (__builtin_expect ((state & 1) != 0, 0))
|
||
+ if (__builtin_expect ((state & BAR_WAS_LAST) != 0, 0))
|
||
{
|
||
/* Next time we'll be awaiting TOTAL threads again. */
|
||
bar->awaited = bar->total;
|
||
atomic_write_barrier ();
|
||
- bar->generation += 4;
|
||
+ bar->generation += BAR_INCR;
|
||
futex_wake ((int *) &bar->generation, INT_MAX);
|
||
}
|
||
else
|
||
@@ -68,7 +68,7 @@ void
|
||
gomp_barrier_wait_last (gomp_barrier_t *bar)
|
||
{
|
||
gomp_barrier_state_t state = gomp_barrier_wait_start (bar);
|
||
- if (state & 1)
|
||
+ if (state & BAR_WAS_LAST)
|
||
gomp_barrier_wait_end (bar, state);
|
||
}
|
||
|
||
@@ -81,38 +81,42 @@ gomp_team_barrier_wake (gomp_barrier_t *
|
||
void
|
||
gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||
{
|
||
- unsigned int generation;
|
||
+ unsigned int generation, gen;
|
||
|
||
- if (__builtin_expect ((state & 1) != 0, 0))
|
||
+ if (__builtin_expect ((state & BAR_WAS_LAST) != 0, 0))
|
||
{
|
||
/* Next time we'll be awaiting TOTAL threads again. */
|
||
struct gomp_thread *thr = gomp_thread ();
|
||
struct gomp_team *team = thr->ts.team;
|
||
bar->awaited = bar->total;
|
||
atomic_write_barrier ();
|
||
+ team->work_share_cancelled = 0;
|
||
if (__builtin_expect (team->task_count, 0))
|
||
{
|
||
gomp_barrier_handle_tasks (state);
|
||
- state &= ~1;
|
||
+ state &= ~BAR_WAS_LAST;
|
||
}
|
||
else
|
||
{
|
||
- bar->generation = state + 3;
|
||
+ state &= ~BAR_CANCELLED;
|
||
+ state += BAR_INCR - BAR_WAS_LAST;
|
||
+ bar->generation = state;
|
||
futex_wake ((int *) &bar->generation, INT_MAX);
|
||
return;
|
||
}
|
||
}
|
||
|
||
generation = state;
|
||
+ state &= ~BAR_CANCELLED;
|
||
do
|
||
{
|
||
do_wait ((int *) &bar->generation, generation);
|
||
- if (__builtin_expect (bar->generation & 1, 0))
|
||
+ gen = bar->generation;
|
||
+ if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
|
||
gomp_barrier_handle_tasks (state);
|
||
- if ((bar->generation & 2))
|
||
- generation |= 2;
|
||
+ generation |= (gen & BAR_WAITING_FOR_TASK);
|
||
}
|
||
- while (bar->generation != state + 4);
|
||
+ while (gen != state + BAR_INCR);
|
||
}
|
||
|
||
void
|
||
@@ -120,3 +124,86 @@ gomp_team_barrier_wait (gomp_barrier_t *
|
||
{
|
||
gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
|
||
}
|
||
+
|
||
+void
|
||
+gomp_team_barrier_wait_final (gomp_barrier_t *bar)
|
||
+{
|
||
+ gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
|
||
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
|
||
+ bar->awaited_final = bar->total;
|
||
+ gomp_team_barrier_wait_end (bar, state);
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
|
||
+ gomp_barrier_state_t state)
|
||
+{
|
||
+ unsigned int generation, gen;
|
||
+
|
||
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
|
||
+ {
|
||
+ /* Next time we'll be awaiting TOTAL threads again. */
|
||
+ /* BAR_CANCELLED should never be set in state here, because
|
||
+ cancellation means that at least one of the threads has been
|
||
+ cancelled, thus on a cancellable barrier we should never see
|
||
+ all threads to arrive. */
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
+ struct gomp_team *team = thr->ts.team;
|
||
+
|
||
+ bar->awaited = bar->total;
|
||
+ team->work_share_cancelled = 0;
|
||
+ if (__builtin_expect (team->task_count, 0))
|
||
+ {
|
||
+ gomp_barrier_handle_tasks (state);
|
||
+ state &= ~BAR_WAS_LAST;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ state += BAR_INCR - BAR_WAS_LAST;
|
||
+ bar->generation = state;
|
||
+ futex_wake ((int *) &bar->generation, INT_MAX);
|
||
+ return false;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (__builtin_expect (state & BAR_CANCELLED, 0))
|
||
+ return true;
|
||
+
|
||
+ generation = state;
|
||
+ do
|
||
+ {
|
||
+ do_wait ((int *) &bar->generation, generation);
|
||
+ gen = bar->generation;
|
||
+ if (__builtin_expect (gen & BAR_CANCELLED, 0))
|
||
+ return true;
|
||
+ if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
|
||
+ {
|
||
+ gomp_barrier_handle_tasks (state);
|
||
+ gen = bar->generation;
|
||
+ }
|
||
+ generation |= gen & BAR_WAITING_FOR_TASK;
|
||
+ }
|
||
+ while (gen != state + BAR_INCR);
|
||
+
|
||
+ return false;
|
||
+}
|
||
+
|
||
+bool
|
||
+gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
|
||
+{
|
||
+ return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
|
||
+}
|
||
+
|
||
+void
|
||
+gomp_team_barrier_cancel (struct gomp_team *team)
|
||
+{
|
||
+ gomp_mutex_lock (&team->task_lock);
|
||
+ if (team->barrier.generation & BAR_CANCELLED)
|
||
+ {
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ return;
|
||
+ }
|
||
+ team->barrier.generation |= BAR_CANCELLED;
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ futex_wake ((int *) &team->barrier.generation, INT_MAX);
|
||
+}
|
||
--- libgomp/config/linux/proc.c.jj 2014-05-15 11:39:34.000780199 +0200
|
||
+++ libgomp/config/linux/proc.c 2014-05-15 13:12:46.487894100 +0200
|
||
@@ -31,6 +31,7 @@
|
||
#endif
|
||
#include "libgomp.h"
|
||
#include "proc.h"
|
||
+#include <errno.h>
|
||
#include <stdlib.h>
|
||
#include <unistd.h>
|
||
#ifdef HAVE_GETLOADAVG
|
||
@@ -40,19 +41,28 @@
|
||
#endif
|
||
|
||
#ifdef HAVE_PTHREAD_AFFINITY_NP
|
||
+unsigned long gomp_cpuset_size;
|
||
+static unsigned long gomp_get_cpuset_size;
|
||
+cpu_set_t *gomp_cpusetp;
|
||
+
|
||
unsigned long
|
||
-gomp_cpuset_popcount (cpu_set_t *cpusetp)
|
||
+gomp_cpuset_popcount (unsigned long cpusetsize, cpu_set_t *cpusetp)
|
||
{
|
||
-#ifdef CPU_COUNT
|
||
- /* glibc 2.6 and above provide a macro for this. */
|
||
- return CPU_COUNT (cpusetp);
|
||
+#ifdef CPU_COUNT_S
|
||
+ /* glibc 2.7 and above provide a macro for this. */
|
||
+ return CPU_COUNT_S (cpusetsize, cpusetp);
|
||
#else
|
||
+#ifdef CPU_COUNT
|
||
+ if (cpusetsize == sizeof (cpu_set_t))
|
||
+ /* glibc 2.6 and above provide a macro for this. */
|
||
+ return CPU_COUNT (cpusetp);
|
||
+#endif
|
||
size_t i;
|
||
unsigned long ret = 0;
|
||
- extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)];
|
||
+ extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)
|
||
+ ? 1 : -1] __attribute__((unused));
|
||
|
||
- (void) check;
|
||
- for (i = 0; i < sizeof (*cpusetp) / sizeof (cpusetp->__bits[0]); i++)
|
||
+ for (i = 0; i < cpusetsize / sizeof (cpusetp->__bits[0]); i++)
|
||
{
|
||
unsigned long int mask = cpusetp->__bits[i];
|
||
if (mask == 0)
|
||
@@ -71,16 +81,63 @@ void
|
||
gomp_init_num_threads (void)
|
||
{
|
||
#ifdef HAVE_PTHREAD_AFFINITY_NP
|
||
- cpu_set_t cpuset;
|
||
+#if defined (_SC_NPROCESSORS_CONF) && defined (CPU_ALLOC_SIZE)
|
||
+ gomp_cpuset_size = sysconf (_SC_NPROCESSORS_CONF);
|
||
+ gomp_cpuset_size = CPU_ALLOC_SIZE (gomp_cpuset_size);
|
||
+#else
|
||
+ gomp_cpuset_size = sizeof (cpu_set_t);
|
||
+#endif
|
||
|
||
- if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset) == 0)
|
||
+ gomp_cpusetp = (cpu_set_t *) gomp_malloc (gomp_cpuset_size);
|
||
+ do
|
||
{
|
||
- /* Count only the CPUs this process can use. */
|
||
- gomp_global_icv.nthreads_var = gomp_cpuset_popcount (&cpuset);
|
||
- if (gomp_global_icv.nthreads_var == 0)
|
||
- gomp_global_icv.nthreads_var = 1;
|
||
- return;
|
||
+ int ret = pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
|
||
+ gomp_cpusetp);
|
||
+ if (ret == 0)
|
||
+ {
|
||
+ /* Count only the CPUs this process can use. */
|
||
+ gomp_global_icv.nthreads_var
|
||
+ = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp);
|
||
+ if (gomp_global_icv.nthreads_var == 0)
|
||
+ break;
|
||
+ gomp_get_cpuset_size = gomp_cpuset_size;
|
||
+#ifdef CPU_ALLOC_SIZE
|
||
+ unsigned long i;
|
||
+ for (i = gomp_cpuset_size * 8; i; i--)
|
||
+ if (CPU_ISSET_S (i - 1, gomp_cpuset_size, gomp_cpusetp))
|
||
+ break;
|
||
+ gomp_cpuset_size = CPU_ALLOC_SIZE (i);
|
||
+#endif
|
||
+ return;
|
||
+ }
|
||
+ if (ret != EINVAL)
|
||
+ break;
|
||
+#ifdef CPU_ALLOC_SIZE
|
||
+ if (gomp_cpuset_size < sizeof (cpu_set_t))
|
||
+ gomp_cpuset_size = sizeof (cpu_set_t);
|
||
+ else
|
||
+ gomp_cpuset_size = gomp_cpuset_size * 2;
|
||
+ if (gomp_cpuset_size < 8 * sizeof (cpu_set_t))
|
||
+ gomp_cpusetp
|
||
+ = (cpu_set_t *) gomp_realloc (gomp_cpusetp, gomp_cpuset_size);
|
||
+ else
|
||
+ {
|
||
+ /* Avoid gomp_fatal if too large memory allocation would be
|
||
+ requested, e.g. kernel returning EINVAL all the time. */
|
||
+ void *p = realloc (gomp_cpusetp, gomp_cpuset_size);
|
||
+ if (p == NULL)
|
||
+ break;
|
||
+ gomp_cpusetp = (cpu_set_t *) p;
|
||
+ }
|
||
+#else
|
||
+ break;
|
||
+#endif
|
||
}
|
||
+ while (1);
|
||
+ gomp_cpuset_size = 0;
|
||
+ gomp_global_icv.nthreads_var = 1;
|
||
+ free (gomp_cpusetp);
|
||
+ gomp_cpusetp = NULL;
|
||
#endif
|
||
#ifdef _SC_NPROCESSORS_ONLN
|
||
gomp_global_icv.nthreads_var = sysconf (_SC_NPROCESSORS_ONLN);
|
||
@@ -91,15 +148,14 @@ static int
|
||
get_num_procs (void)
|
||
{
|
||
#ifdef HAVE_PTHREAD_AFFINITY_NP
|
||
- cpu_set_t cpuset;
|
||
-
|
||
- if (gomp_cpu_affinity == NULL)
|
||
+ if (gomp_places_list == NULL)
|
||
{
|
||
/* Count only the CPUs this process can use. */
|
||
- if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset),
|
||
- &cpuset) == 0)
|
||
+ if (gomp_cpusetp
|
||
+ && pthread_getaffinity_np (pthread_self (), gomp_get_cpuset_size,
|
||
+ gomp_cpusetp) == 0)
|
||
{
|
||
- int ret = gomp_cpuset_popcount (&cpuset);
|
||
+ int ret = gomp_cpuset_popcount (gomp_get_cpuset_size, gomp_cpusetp);
|
||
return ret != 0 ? ret : 1;
|
||
}
|
||
}
|
||
--- libgomp/config/linux/bar.h.jj 2009-04-14 15:41:02.000000000 +0200
|
||
+++ libgomp/config/linux/bar.h 2014-05-15 15:00:19.995379742 +0200
|
||
@@ -38,13 +38,25 @@ typedef struct
|
||
unsigned total __attribute__((aligned (64)));
|
||
unsigned generation;
|
||
unsigned awaited __attribute__((aligned (64)));
|
||
+ unsigned awaited_final;
|
||
} gomp_barrier_t;
|
||
+
|
||
typedef unsigned int gomp_barrier_state_t;
|
||
|
||
+/* The generation field contains a counter in the high bits, with a few
|
||
+ low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
|
||
+ share space because WAS_LAST is never stored back to generation. */
|
||
+#define BAR_TASK_PENDING 1
|
||
+#define BAR_WAS_LAST 1
|
||
+#define BAR_WAITING_FOR_TASK 2
|
||
+#define BAR_CANCELLED 4
|
||
+#define BAR_INCR 8
|
||
+
|
||
static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
|
||
{
|
||
bar->total = count;
|
||
bar->awaited = count;
|
||
+ bar->awaited_final = count;
|
||
bar->generation = 0;
|
||
}
|
||
|
||
@@ -62,24 +74,52 @@ extern void gomp_barrier_wait (gomp_barr
|
||
extern void gomp_barrier_wait_last (gomp_barrier_t *);
|
||
extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
|
||
extern void gomp_team_barrier_wait (gomp_barrier_t *);
|
||
+extern void gomp_team_barrier_wait_final (gomp_barrier_t *);
|
||
extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
|
||
gomp_barrier_state_t);
|
||
+extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
|
||
+extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
|
||
+ gomp_barrier_state_t);
|
||
extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
|
||
+struct gomp_team;
|
||
+extern void gomp_team_barrier_cancel (struct gomp_team *);
|
||
|
||
static inline gomp_barrier_state_t
|
||
gomp_barrier_wait_start (gomp_barrier_t *bar)
|
||
{
|
||
- unsigned int ret = bar->generation & ~3;
|
||
+ unsigned int ret = bar->generation;
|
||
+ ret &= -BAR_INCR | BAR_CANCELLED;
|
||
/* Do we need any barrier here or is __sync_add_and_fetch acting
|
||
as the needed LoadLoad barrier already? */
|
||
- ret += __sync_add_and_fetch (&bar->awaited, -1) == 0;
|
||
+ if (__sync_add_and_fetch (&bar->awaited, -1) == 0)
|
||
+ ret |= BAR_WAS_LAST;
|
||
return ret;
|
||
}
|
||
|
||
+static inline gomp_barrier_state_t
|
||
+gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
|
||
+{
|
||
+ return gomp_barrier_wait_start (bar);
|
||
+}
|
||
+
|
||
+/* This is like gomp_barrier_wait_start, except it decrements
|
||
+ bar->awaited_final rather than bar->awaited and should be used
|
||
+ for the gomp_team_end barrier only. */
|
||
+static inline gomp_barrier_state_t
|
||
+gomp_barrier_wait_final_start (gomp_barrier_t *bar)
|
||
+{
|
||
+ unsigned int ret = bar->generation;
|
||
+ ret &= -BAR_INCR | BAR_CANCELLED;
|
||
+ /* See above gomp_barrier_wait_start comment. */
|
||
+ if (__sync_add_and_fetch (&bar->awaited_final, -1) == 0)
|
||
+ ret |= BAR_WAS_LAST;
|
||
+ return ret;
|
||
+}
|
||
+
|
||
static inline bool
|
||
gomp_barrier_last_thread (gomp_barrier_state_t state)
|
||
{
|
||
- return state & 1;
|
||
+ return state & BAR_WAS_LAST;
|
||
}
|
||
|
||
/* All the inlines below must be called with team->task_lock
|
||
@@ -88,31 +128,37 @@ gomp_barrier_last_thread (gomp_barrier_s
|
||
static inline void
|
||
gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
|
||
{
|
||
- bar->generation |= 1;
|
||
+ bar->generation |= BAR_TASK_PENDING;
|
||
}
|
||
|
||
static inline void
|
||
gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
|
||
{
|
||
- bar->generation &= ~1;
|
||
+ bar->generation &= ~BAR_TASK_PENDING;
|
||
}
|
||
|
||
static inline void
|
||
gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
|
||
{
|
||
- bar->generation |= 2;
|
||
+ bar->generation |= BAR_WAITING_FOR_TASK;
|
||
}
|
||
|
||
static inline bool
|
||
gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
|
||
{
|
||
- return (bar->generation & 2) != 0;
|
||
+ return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
|
||
+}
|
||
+
|
||
+static inline bool
|
||
+gomp_team_barrier_cancelled (gomp_barrier_t *bar)
|
||
+{
|
||
+ return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
|
||
}
|
||
|
||
static inline void
|
||
gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||
{
|
||
- bar->generation = (state & ~3) + 4;
|
||
+ bar->generation = (state & -BAR_INCR) + BAR_INCR;
|
||
}
|
||
|
||
#endif /* GOMP_BARRIER_H */
|
||
--- libgomp/work.c.jj 2009-04-14 15:40:59.000000000 +0200
|
||
+++ libgomp/work.c 2014-05-15 13:12:53.903857207 +0200
|
||
@@ -221,7 +221,10 @@ gomp_work_share_end (void)
|
||
if (gomp_barrier_last_thread (bstate))
|
||
{
|
||
if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
|
||
- free_work_share (team, thr->ts.last_work_share);
|
||
+ {
|
||
+ team->work_shares_to_free = thr->ts.work_share;
|
||
+ free_work_share (team, thr->ts.last_work_share);
|
||
+ }
|
||
}
|
||
|
||
gomp_team_barrier_wait_end (&team->barrier, bstate);
|
||
@@ -229,6 +232,32 @@ gomp_work_share_end (void)
|
||
}
|
||
|
||
/* The current thread is done with its current work sharing construct.
|
||
+ This version implies a cancellable barrier at the end of the work-share. */
|
||
+
|
||
+bool
|
||
+gomp_work_share_end_cancel (void)
|
||
+{
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
+ struct gomp_team *team = thr->ts.team;
|
||
+ gomp_barrier_state_t bstate;
|
||
+
|
||
+ /* Cancellable work sharing constructs cannot be orphaned. */
|
||
+ bstate = gomp_barrier_wait_cancel_start (&team->barrier);
|
||
+
|
||
+ if (gomp_barrier_last_thread (bstate))
|
||
+ {
|
||
+ if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
|
||
+ {
|
||
+ team->work_shares_to_free = thr->ts.work_share;
|
||
+ free_work_share (team, thr->ts.last_work_share);
|
||
+ }
|
||
+ }
|
||
+ thr->ts.last_work_share = NULL;
|
||
+
|
||
+ return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate);
|
||
+}
|
||
+
|
||
+/* The current thread is done with its current work sharing construct.
|
||
This version does NOT imply a barrier at the end of the work-share. */
|
||
|
||
void
|
||
@@ -259,6 +288,9 @@ gomp_work_share_end_nowait (void)
|
||
#endif
|
||
|
||
if (completed == team->nthreads)
|
||
- free_work_share (team, thr->ts.last_work_share);
|
||
+ {
|
||
+ team->work_shares_to_free = thr->ts.work_share;
|
||
+ free_work_share (team, thr->ts.last_work_share);
|
||
+ }
|
||
thr->ts.last_work_share = NULL;
|
||
}
|
||
--- libgomp/barrier.c.jj 2009-04-14 15:41:11.000000000 +0200
|
||
+++ libgomp/barrier.c 2014-05-15 13:12:46.027898917 +0200
|
||
@@ -39,3 +39,15 @@ GOMP_barrier (void)
|
||
|
||
gomp_team_barrier_wait (&team->barrier);
|
||
}
|
||
+
|
||
+bool
|
||
+GOMP_barrier_cancel (void)
|
||
+{
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
+ struct gomp_team *team = thr->ts.team;
|
||
+
|
||
+ /* The compiler transforms to barrier_cancel when it sees that the
|
||
+ barrier is within a construct that can cancel. Thus we should
|
||
+ never have an orphaned cancellable barrier. */
|
||
+ return gomp_team_barrier_wait_cancel (&team->barrier);
|
||
+}
|
||
--- libgomp/libgomp_g.h.jj 2014-05-15 11:39:33.593782372 +0200
|
||
+++ libgomp/libgomp_g.h 2014-05-15 13:17:57.845227007 +0200
|
||
@@ -33,6 +33,7 @@
|
||
/* barrier.c */
|
||
|
||
extern void GOMP_barrier (void);
|
||
+extern bool GOMP_barrier_cancel (void);
|
||
|
||
/* critical.c */
|
||
|
||
@@ -76,9 +77,22 @@ extern void GOMP_parallel_loop_guided_st
|
||
unsigned, long, long, long, long);
|
||
extern void GOMP_parallel_loop_runtime_start (void (*)(void *), void *,
|
||
unsigned, long, long, long);
|
||
+extern void GOMP_parallel_loop_static (void (*)(void *), void *,
|
||
+ unsigned, long, long, long, long,
|
||
+ unsigned);
|
||
+extern void GOMP_parallel_loop_dynamic (void (*)(void *), void *,
|
||
+ unsigned, long, long, long, long,
|
||
+ unsigned);
|
||
+extern void GOMP_parallel_loop_guided (void (*)(void *), void *,
|
||
+ unsigned, long, long, long, long,
|
||
+ unsigned);
|
||
+extern void GOMP_parallel_loop_runtime (void (*)(void *), void *,
|
||
+ unsigned, long, long, long,
|
||
+ unsigned);
|
||
|
||
extern void GOMP_loop_end (void);
|
||
extern void GOMP_loop_end_nowait (void);
|
||
+extern bool GOMP_loop_end_cancel (void);
|
||
|
||
/* loop_ull.c */
|
||
|
||
@@ -158,12 +172,18 @@ extern void GOMP_ordered_end (void);
|
||
extern void GOMP_parallel_start (void (*) (void *), void *, unsigned);
|
||
extern void GOMP_parallel_end (void);
|
||
|
||
+extern void GOMP_parallel (void (*) (void *), void *, unsigned, unsigned);
|
||
+extern bool GOMP_cancel (int, bool);
|
||
+extern bool GOMP_cancellation_point (int);
|
||
+
|
||
/* task.c */
|
||
|
||
extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *),
|
||
- long, long, bool, unsigned);
|
||
+ long, long, bool, unsigned, void **);
|
||
extern void GOMP_taskwait (void);
|
||
extern void GOMP_taskyield (void);
|
||
+extern void GOMP_taskgroup_start (void);
|
||
+extern void GOMP_taskgroup_end (void);
|
||
extern int omp_in_final (void);
|
||
|
||
/* sections.c */
|
||
@@ -172,8 +192,11 @@ extern unsigned GOMP_sections_start (uns
|
||
extern unsigned GOMP_sections_next (void);
|
||
extern void GOMP_parallel_sections_start (void (*) (void *), void *,
|
||
unsigned, unsigned);
|
||
+extern void GOMP_parallel_sections (void (*) (void *), void *,
|
||
+ unsigned, unsigned, unsigned);
|
||
extern void GOMP_sections_end (void);
|
||
extern void GOMP_sections_end_nowait (void);
|
||
+extern bool GOMP_sections_end_cancel (void);
|
||
|
||
/* single.c */
|
||
|
||
@@ -181,4 +204,15 @@ extern bool GOMP_single_start (void);
|
||
extern void *GOMP_single_copy_start (void);
|
||
extern void GOMP_single_copy_end (void *);
|
||
|
||
+/* target.c */
|
||
+
|
||
+extern void GOMP_target (int, void (*) (void *), const void *,
|
||
+ size_t, void **, size_t *, unsigned char *);
|
||
+extern void GOMP_target_data (int, const void *,
|
||
+ size_t, void **, size_t *, unsigned char *);
|
||
+extern void GOMP_target_end_data (void);
|
||
+extern void GOMP_target_update (int, const void *,
|
||
+ size_t, void **, size_t *, unsigned char *);
|
||
+extern void GOMP_teams (unsigned int, unsigned int);
|
||
+
|
||
#endif /* LIBGOMP_G_H */
|
||
--- libgomp/parallel.c.jj 2014-05-15 11:39:34.076779789 +0200
|
||
+++ libgomp/parallel.c 2014-05-15 13:26:04.367640970 +0200
|
||
@@ -37,18 +37,19 @@
|
||
unsigned
|
||
gomp_resolve_num_threads (unsigned specified, unsigned count)
|
||
{
|
||
- struct gomp_thread *thread = gomp_thread();
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
struct gomp_task_icv *icv;
|
||
unsigned threads_requested, max_num_threads, num_threads;
|
||
- unsigned long remaining;
|
||
+ unsigned long busy;
|
||
+ struct gomp_thread_pool *pool;
|
||
|
||
icv = gomp_icv (false);
|
||
|
||
if (specified == 1)
|
||
return 1;
|
||
- else if (thread->ts.active_level >= 1 && !icv->nest_var)
|
||
+ else if (thr->ts.active_level >= 1 && !icv->nest_var)
|
||
return 1;
|
||
- else if (thread->ts.active_level >= gomp_max_active_levels_var)
|
||
+ else if (thr->ts.active_level >= gomp_max_active_levels_var)
|
||
return 1;
|
||
|
||
/* If NUM_THREADS not specified, use nthreads_var. */
|
||
@@ -72,30 +73,46 @@ gomp_resolve_num_threads (unsigned speci
|
||
max_num_threads = count;
|
||
}
|
||
|
||
- /* ULONG_MAX stands for infinity. */
|
||
- if (__builtin_expect (gomp_thread_limit_var == ULONG_MAX, 1)
|
||
+ /* UINT_MAX stands for infinity. */
|
||
+ if (__builtin_expect (icv->thread_limit_var == UINT_MAX, 1)
|
||
|| max_num_threads == 1)
|
||
return max_num_threads;
|
||
|
||
+ /* The threads_busy counter lives in thread_pool, if there
|
||
+ isn't a thread_pool yet, there must be just one thread
|
||
+ in the contention group. If thr->team is NULL, this isn't
|
||
+ nested parallel, so there is just one thread in the
|
||
+ contention group as well, no need to handle it atomically. */
|
||
+ pool = thr->thread_pool;
|
||
+ if (thr->ts.team == NULL)
|
||
+ {
|
||
+ num_threads = max_num_threads;
|
||
+ if (num_threads > icv->thread_limit_var)
|
||
+ num_threads = icv->thread_limit_var;
|
||
+ if (pool)
|
||
+ pool->threads_busy = num_threads;
|
||
+ return num_threads;
|
||
+ }
|
||
+
|
||
#ifdef HAVE_SYNC_BUILTINS
|
||
do
|
||
{
|
||
- remaining = gomp_remaining_threads_count;
|
||
+ busy = pool->threads_busy;
|
||
num_threads = max_num_threads;
|
||
- if (num_threads > remaining)
|
||
- num_threads = remaining + 1;
|
||
+ if (icv->thread_limit_var - busy + 1 < num_threads)
|
||
+ num_threads = icv->thread_limit_var - busy + 1;
|
||
}
|
||
- while (__sync_val_compare_and_swap (&gomp_remaining_threads_count,
|
||
- remaining, remaining - num_threads + 1)
|
||
- != remaining);
|
||
+ while (__sync_val_compare_and_swap (&pool->threads_busy,
|
||
+ busy, busy + num_threads - 1)
|
||
+ != busy);
|
||
#else
|
||
- gomp_mutex_lock (&gomp_remaining_threads_lock);
|
||
+ gomp_mutex_lock (&gomp_managed_threads_lock);
|
||
num_threads = max_num_threads;
|
||
- remaining = gomp_remaining_threads_count;
|
||
- if (num_threads > remaining)
|
||
- num_threads = remaining + 1;
|
||
- gomp_remaining_threads_count -= num_threads - 1;
|
||
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
|
||
+ busy = pool->threads_busy;
|
||
+ if (icv->thread_limit_var - busy + 1 < num_threads)
|
||
+ num_threads = icv->thread_limit_var - busy + 1;
|
||
+ pool->threads_busy += num_threads - 1;
|
||
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
|
||
#endif
|
||
|
||
return num_threads;
|
||
@@ -105,31 +122,113 @@ void
|
||
GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
|
||
{
|
||
num_threads = gomp_resolve_num_threads (num_threads, 0);
|
||
- gomp_team_start (fn, data, num_threads, gomp_new_team (num_threads));
|
||
+ gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads));
|
||
}
|
||
|
||
void
|
||
GOMP_parallel_end (void)
|
||
{
|
||
- if (__builtin_expect (gomp_thread_limit_var != ULONG_MAX, 0))
|
||
+ struct gomp_task_icv *icv = gomp_icv (false);
|
||
+ if (__builtin_expect (icv->thread_limit_var != UINT_MAX, 0))
|
||
{
|
||
struct gomp_thread *thr = gomp_thread ();
|
||
struct gomp_team *team = thr->ts.team;
|
||
- if (team && team->nthreads > 1)
|
||
+ unsigned int nthreads = team ? team->nthreads : 1;
|
||
+ gomp_team_end ();
|
||
+ if (nthreads > 1)
|
||
{
|
||
+ /* If not nested, there is just one thread in the
|
||
+ contention group left, no need for atomicity. */
|
||
+ if (thr->ts.team == NULL)
|
||
+ thr->thread_pool->threads_busy = 1;
|
||
+ else
|
||
+ {
|
||
#ifdef HAVE_SYNC_BUILTINS
|
||
- __sync_fetch_and_add (&gomp_remaining_threads_count,
|
||
- 1UL - team->nthreads);
|
||
+ __sync_fetch_and_add (&thr->thread_pool->threads_busy,
|
||
+ 1UL - nthreads);
|
||
#else
|
||
- gomp_mutex_lock (&gomp_remaining_threads_lock);
|
||
- gomp_remaining_threads_count -= team->nthreads - 1;
|
||
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
|
||
+ gomp_mutex_lock (&gomp_managed_threads_lock);
|
||
+ thr->thread_pool->threads_busy -= nthreads - 1;
|
||
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
|
||
#endif
|
||
+ }
|
||
}
|
||
}
|
||
- gomp_team_end ();
|
||
+ else
|
||
+ gomp_team_end ();
|
||
+}
|
||
+ialias (GOMP_parallel_end)
|
||
+
|
||
+void
|
||
+GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, unsigned int flags)
|
||
+{
|
||
+ num_threads = gomp_resolve_num_threads (num_threads, 0);
|
||
+ gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads));
|
||
+ fn (data);
|
||
+ ialias_call (GOMP_parallel_end) ();
|
||
+}
|
||
+
|
||
+bool
|
||
+GOMP_cancellation_point (int which)
|
||
+{
|
||
+ if (!gomp_cancel_var)
|
||
+ return false;
|
||
+
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
+ struct gomp_team *team = thr->ts.team;
|
||
+ if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
|
||
+ {
|
||
+ if (team == NULL)
|
||
+ return false;
|
||
+ return team->work_share_cancelled != 0;
|
||
+ }
|
||
+ else if (which & GOMP_CANCEL_TASKGROUP)
|
||
+ {
|
||
+ if (thr->task->taskgroup && thr->task->taskgroup->cancelled)
|
||
+ return true;
|
||
+ /* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
|
||
+ as #pragma omp cancel parallel also cancels all explicit
|
||
+ tasks. */
|
||
+ }
|
||
+ if (team)
|
||
+ return gomp_team_barrier_cancelled (&team->barrier);
|
||
+ return false;
|
||
}
|
||
+ialias (GOMP_cancellation_point)
|
||
+
|
||
+bool
|
||
+GOMP_cancel (int which, bool do_cancel)
|
||
+{
|
||
+ if (!gomp_cancel_var)
|
||
+ return false;
|
||
+
|
||
+ if (!do_cancel)
|
||
+ return ialias_call (GOMP_cancellation_point) (which);
|
||
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
+ struct gomp_team *team = thr->ts.team;
|
||
+ if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
|
||
+ {
|
||
+ /* In orphaned worksharing region, all we want to cancel
|
||
+ is current thread. */
|
||
+ if (team != NULL)
|
||
+ team->work_share_cancelled = 1;
|
||
+ return true;
|
||
+ }
|
||
+ else if (which & GOMP_CANCEL_TASKGROUP)
|
||
+ {
|
||
+ if (thr->task->taskgroup && !thr->task->taskgroup->cancelled)
|
||
+ {
|
||
+ gomp_mutex_lock (&team->task_lock);
|
||
+ thr->task->taskgroup->cancelled = true;
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ }
|
||
+ return true;
|
||
+ }
|
||
+ team->team_cancelled = 1;
|
||
+ gomp_team_barrier_cancel (team);
|
||
+ return true;
|
||
+}
|
||
|
||
/* The public OpenMP API for thread and team related inquiries. */
|
||
|
||
--- libgomp/libgomp.map.jj 2014-05-15 11:39:33.606782302 +0200
|
||
+++ libgomp/libgomp.map 2014-05-15 13:12:46.508895170 +0200
|
||
@@ -108,6 +108,27 @@ OMP_3.1 {
|
||
omp_in_final_;
|
||
} OMP_3.0;
|
||
|
||
+OMP_4.0 {
|
||
+ global:
|
||
+ omp_get_cancellation;
|
||
+ omp_get_cancellation_;
|
||
+ omp_get_proc_bind;
|
||
+ omp_get_proc_bind_;
|
||
+ omp_set_default_device;
|
||
+ omp_set_default_device_;
|
||
+ omp_set_default_device_8_;
|
||
+ omp_get_default_device;
|
||
+ omp_get_default_device_;
|
||
+ omp_get_num_devices;
|
||
+ omp_get_num_devices_;
|
||
+ omp_get_num_teams;
|
||
+ omp_get_num_teams_;
|
||
+ omp_get_team_num;
|
||
+ omp_get_team_num_;
|
||
+ omp_is_initial_device;
|
||
+ omp_is_initial_device_;
|
||
+} OMP_3.1;
|
||
+
|
||
GOMP_1.0 {
|
||
global:
|
||
GOMP_atomic_end;
|
||
@@ -179,3 +200,25 @@ GOMP_3.0 {
|
||
global:
|
||
GOMP_taskyield;
|
||
} GOMP_2.0;
|
||
+
|
||
+GOMP_4.0 {
|
||
+ global:
|
||
+ GOMP_barrier_cancel;
|
||
+ GOMP_cancel;
|
||
+ GOMP_cancellation_point;
|
||
+ GOMP_loop_end_cancel;
|
||
+ GOMP_parallel_loop_dynamic;
|
||
+ GOMP_parallel_loop_guided;
|
||
+ GOMP_parallel_loop_runtime;
|
||
+ GOMP_parallel_loop_static;
|
||
+ GOMP_parallel_sections;
|
||
+ GOMP_parallel;
|
||
+ GOMP_sections_end_cancel;
|
||
+ GOMP_taskgroup_start;
|
||
+ GOMP_taskgroup_end;
|
||
+ GOMP_target;
|
||
+ GOMP_target_data;
|
||
+ GOMP_target_end_data;
|
||
+ GOMP_target_update;
|
||
+ GOMP_teams;
|
||
+} GOMP_3.0;
|
||
--- libgomp/env.c.jj 2014-05-15 11:39:33.634782155 +0200
|
||
+++ libgomp/env.c 2014-05-15 13:12:46.507895135 +0200
|
||
@@ -30,6 +30,10 @@
|
||
#include "libgomp_f.h"
|
||
#include <ctype.h>
|
||
#include <stdlib.h>
|
||
+#include <stdio.h>
|
||
+#ifdef HAVE_INTTYPES_H
|
||
+# include <inttypes.h> /* For PRIu64. */
|
||
+#endif
|
||
#ifdef STRING_WITH_STRINGS
|
||
# include <string.h>
|
||
# include <strings.h>
|
||
@@ -51,23 +55,28 @@
|
||
|
||
struct gomp_task_icv gomp_global_icv = {
|
||
.nthreads_var = 1,
|
||
+ .thread_limit_var = UINT_MAX,
|
||
.run_sched_var = GFS_DYNAMIC,
|
||
.run_sched_modifier = 1,
|
||
+ .default_device_var = 0,
|
||
.dyn_var = false,
|
||
- .nest_var = false
|
||
+ .nest_var = false,
|
||
+ .bind_var = omp_proc_bind_false,
|
||
+ .target_data = NULL
|
||
};
|
||
|
||
-unsigned short *gomp_cpu_affinity;
|
||
-size_t gomp_cpu_affinity_len;
|
||
unsigned long gomp_max_active_levels_var = INT_MAX;
|
||
-unsigned long gomp_thread_limit_var = ULONG_MAX;
|
||
-unsigned long gomp_remaining_threads_count;
|
||
+bool gomp_cancel_var = false;
|
||
#ifndef HAVE_SYNC_BUILTINS
|
||
-gomp_mutex_t gomp_remaining_threads_lock;
|
||
+gomp_mutex_t gomp_managed_threads_lock;
|
||
#endif
|
||
unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1;
|
||
unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
|
||
unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
|
||
+char *gomp_bind_var_list;
|
||
+unsigned long gomp_bind_var_list_len;
|
||
+void **gomp_places_list;
|
||
+unsigned long gomp_places_list_len;
|
||
|
||
/* Parse the OMP_SCHEDULE environment variable. */
|
||
|
||
@@ -179,6 +188,24 @@ parse_unsigned_long (const char *name, u
|
||
return false;
|
||
}
|
||
|
||
+/* Parse a positive int environment variable. Return true if one was
|
||
+ present and it was successfully parsed. */
|
||
+
|
||
+static bool
|
||
+parse_int (const char *name, int *pvalue, bool allow_zero)
|
||
+{
|
||
+ unsigned long value;
|
||
+ if (!parse_unsigned_long (name, &value, allow_zero))
|
||
+ return false;
|
||
+ if (value > INT_MAX)
|
||
+ {
|
||
+ gomp_error ("Invalid value for environment variable %s", name);
|
||
+ return false;
|
||
+ }
|
||
+ *pvalue = (int) value;
|
||
+ return true;
|
||
+}
|
||
+
|
||
/* Parse an unsigned long list environment variable. Return true if one was
|
||
present and it was successfully parsed. */
|
||
|
||
@@ -268,6 +295,416 @@ parse_unsigned_long_list (const char *na
|
||
return false;
|
||
}
|
||
|
||
+/* Parse environment variable set to a boolean or list of omp_proc_bind_t
|
||
+ enum values. Return true if one was present and it was successfully
|
||
+ parsed. */
|
||
+
|
||
+static bool
|
||
+parse_bind_var (const char *name, char *p1stvalue,
|
||
+ char **pvalues, unsigned long *pnvalues)
|
||
+{
|
||
+ char *env;
|
||
+ char value = omp_proc_bind_false, *values = NULL;
|
||
+ int i;
|
||
+ static struct proc_bind_kinds
|
||
+ {
|
||
+ const char name[7];
|
||
+ const char len;
|
||
+ omp_proc_bind_t kind;
|
||
+ } kinds[] =
|
||
+ {
|
||
+ { "false", 5, omp_proc_bind_false },
|
||
+ { "true", 4, omp_proc_bind_true },
|
||
+ { "master", 6, omp_proc_bind_master },
|
||
+ { "close", 5, omp_proc_bind_close },
|
||
+ { "spread", 6, omp_proc_bind_spread }
|
||
+ };
|
||
+
|
||
+ env = getenv (name);
|
||
+ if (env == NULL)
|
||
+ return false;
|
||
+
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env == '\0')
|
||
+ goto invalid;
|
||
+
|
||
+ for (i = 0; i < 5; i++)
|
||
+ if (strncasecmp (env, kinds[i].name, kinds[i].len) == 0)
|
||
+ {
|
||
+ value = kinds[i].kind;
|
||
+ env += kinds[i].len;
|
||
+ break;
|
||
+ }
|
||
+ if (i == 5)
|
||
+ goto invalid;
|
||
+
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env != '\0')
|
||
+ {
|
||
+ if (*env == ',')
|
||
+ {
|
||
+ unsigned long nvalues = 0, nalloced = 0;
|
||
+
|
||
+ if (value == omp_proc_bind_false
|
||
+ || value == omp_proc_bind_true)
|
||
+ goto invalid;
|
||
+
|
||
+ do
|
||
+ {
|
||
+ env++;
|
||
+ if (nvalues == nalloced)
|
||
+ {
|
||
+ char *n;
|
||
+ nalloced = nalloced ? nalloced * 2 : 16;
|
||
+ n = realloc (values, nalloced);
|
||
+ if (n == NULL)
|
||
+ {
|
||
+ free (values);
|
||
+ gomp_error ("Out of memory while trying to parse"
|
||
+ " environment variable %s", name);
|
||
+ return false;
|
||
+ }
|
||
+ values = n;
|
||
+ if (nvalues == 0)
|
||
+ values[nvalues++] = value;
|
||
+ }
|
||
+
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env == '\0')
|
||
+ goto invalid;
|
||
+
|
||
+ for (i = 2; i < 5; i++)
|
||
+ if (strncasecmp (env, kinds[i].name, kinds[i].len) == 0)
|
||
+ {
|
||
+ value = kinds[i].kind;
|
||
+ env += kinds[i].len;
|
||
+ break;
|
||
+ }
|
||
+ if (i == 5)
|
||
+ goto invalid;
|
||
+
|
||
+ values[nvalues++] = value;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env == '\0')
|
||
+ break;
|
||
+ if (*env != ',')
|
||
+ goto invalid;
|
||
+ }
|
||
+ while (1);
|
||
+ *p1stvalue = values[0];
|
||
+ *pvalues = values;
|
||
+ *pnvalues = nvalues;
|
||
+ return true;
|
||
+ }
|
||
+ goto invalid;
|
||
+ }
|
||
+
|
||
+ *p1stvalue = value;
|
||
+ return true;
|
||
+
|
||
+ invalid:
|
||
+ free (values);
|
||
+ gomp_error ("Invalid value for environment variable %s", name);
|
||
+ return false;
|
||
+}
|
||
+
|
||
+static bool
|
||
+parse_one_place (char **envp, bool *negatep, unsigned long *lenp,
|
||
+ long *stridep)
|
||
+{
|
||
+ char *env = *envp, *start;
|
||
+ void *p = gomp_places_list ? gomp_places_list[gomp_places_list_len] : NULL;
|
||
+ unsigned long len = 1;
|
||
+ long stride = 1;
|
||
+ int pass;
|
||
+ bool any_negate = false;
|
||
+ *negatep = false;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env == '!')
|
||
+ {
|
||
+ *negatep = true;
|
||
+ ++env;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ }
|
||
+ if (*env != '{')
|
||
+ return false;
|
||
+ ++env;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ start = env;
|
||
+ for (pass = 0; pass < (any_negate ? 2 : 1); pass++)
|
||
+ {
|
||
+ env = start;
|
||
+ do
|
||
+ {
|
||
+ unsigned long this_num, this_len = 1;
|
||
+ long this_stride = 1;
|
||
+ bool this_negate = (*env == '!');
|
||
+ if (this_negate)
|
||
+ {
|
||
+ if (gomp_places_list)
|
||
+ any_negate = true;
|
||
+ ++env;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ }
|
||
+
|
||
+ errno = 0;
|
||
+ this_num = strtoul (env, &env, 10);
|
||
+ if (errno)
|
||
+ return false;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env == ':')
|
||
+ {
|
||
+ ++env;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ errno = 0;
|
||
+ this_len = strtoul (env, &env, 10);
|
||
+ if (errno || this_len == 0)
|
||
+ return false;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env == ':')
|
||
+ {
|
||
+ ++env;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ errno = 0;
|
||
+ this_stride = strtol (env, &env, 10);
|
||
+ if (errno)
|
||
+ return false;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ }
|
||
+ }
|
||
+ if (this_negate && this_len != 1)
|
||
+ return false;
|
||
+ if (gomp_places_list && pass == this_negate)
|
||
+ {
|
||
+ if (this_negate)
|
||
+ {
|
||
+ if (!gomp_affinity_remove_cpu (p, this_num))
|
||
+ return false;
|
||
+ }
|
||
+ else if (!gomp_affinity_add_cpus (p, this_num, this_len,
|
||
+ this_stride, false))
|
||
+ return false;
|
||
+ }
|
||
+ if (*env == '}')
|
||
+ break;
|
||
+ if (*env != ',')
|
||
+ return false;
|
||
+ ++env;
|
||
+ }
|
||
+ while (1);
|
||
+ }
|
||
+
|
||
+ ++env;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env == ':')
|
||
+ {
|
||
+ ++env;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ errno = 0;
|
||
+ len = strtoul (env, &env, 10);
|
||
+ if (errno || len == 0 || len >= 65536)
|
||
+ return false;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env == ':')
|
||
+ {
|
||
+ ++env;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ errno = 0;
|
||
+ stride = strtol (env, &env, 10);
|
||
+ if (errno)
|
||
+ return false;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ }
|
||
+ }
|
||
+ if (*negatep && len != 1)
|
||
+ return false;
|
||
+ *envp = env;
|
||
+ *lenp = len;
|
||
+ *stridep = stride;
|
||
+ return true;
|
||
+}
|
||
+
|
||
+static bool
|
||
+parse_places_var (const char *name, bool ignore)
|
||
+{
|
||
+ char *env = getenv (name), *end;
|
||
+ bool any_negate = false;
|
||
+ int level = 0;
|
||
+ unsigned long count = 0;
|
||
+ if (env == NULL)
|
||
+ return false;
|
||
+
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env == '\0')
|
||
+ goto invalid;
|
||
+
|
||
+ if (strncasecmp (env, "threads", 7) == 0)
|
||
+ {
|
||
+ env += 7;
|
||
+ level = 1;
|
||
+ }
|
||
+ else if (strncasecmp (env, "cores", 5) == 0)
|
||
+ {
|
||
+ env += 5;
|
||
+ level = 2;
|
||
+ }
|
||
+ else if (strncasecmp (env, "sockets", 7) == 0)
|
||
+ {
|
||
+ env += 7;
|
||
+ level = 3;
|
||
+ }
|
||
+ if (level)
|
||
+ {
|
||
+ count = ULONG_MAX;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env != '\0')
|
||
+ {
|
||
+ if (*env++ != '(')
|
||
+ goto invalid;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+
|
||
+ errno = 0;
|
||
+ count = strtoul (env, &end, 10);
|
||
+ if (errno)
|
||
+ goto invalid;
|
||
+ env = end;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env != ')')
|
||
+ goto invalid;
|
||
+ ++env;
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env != '\0')
|
||
+ goto invalid;
|
||
+ }
|
||
+
|
||
+ if (ignore)
|
||
+ return false;
|
||
+
|
||
+ return gomp_affinity_init_level (level, count, false);
|
||
+ }
|
||
+
|
||
+ count = 0;
|
||
+ end = env;
|
||
+ do
|
||
+ {
|
||
+ bool negate;
|
||
+ unsigned long len;
|
||
+ long stride;
|
||
+ if (!parse_one_place (&end, &negate, &len, &stride))
|
||
+ goto invalid;
|
||
+ if (negate)
|
||
+ {
|
||
+ if (!any_negate)
|
||
+ count++;
|
||
+ any_negate = true;
|
||
+ }
|
||
+ else
|
||
+ count += len;
|
||
+ if (count > 65536)
|
||
+ goto invalid;
|
||
+ if (*end == '\0')
|
||
+ break;
|
||
+ if (*end != ',')
|
||
+ goto invalid;
|
||
+ end++;
|
||
+ }
|
||
+ while (1);
|
||
+
|
||
+ if (ignore)
|
||
+ return false;
|
||
+
|
||
+ gomp_places_list_len = 0;
|
||
+ gomp_places_list = gomp_affinity_alloc (count, false);
|
||
+ if (gomp_places_list == NULL)
|
||
+ return false;
|
||
+
|
||
+ do
|
||
+ {
|
||
+ bool negate;
|
||
+ unsigned long len;
|
||
+ long stride;
|
||
+ gomp_affinity_init_place (gomp_places_list[gomp_places_list_len]);
|
||
+ if (!parse_one_place (&env, &negate, &len, &stride))
|
||
+ goto invalid;
|
||
+ if (negate)
|
||
+ {
|
||
+ void *p;
|
||
+ for (count = 0; count < gomp_places_list_len; count++)
|
||
+ if (gomp_affinity_same_place
|
||
+ (gomp_places_list[count],
|
||
+ gomp_places_list[gomp_places_list_len]))
|
||
+ break;
|
||
+ if (count == gomp_places_list_len)
|
||
+ {
|
||
+ gomp_error ("Trying to remove a non-existing place from list "
|
||
+ "of places");
|
||
+ goto invalid;
|
||
+ }
|
||
+ p = gomp_places_list[count];
|
||
+ memmove (&gomp_places_list[count],
|
||
+ &gomp_places_list[count + 1],
|
||
+ (gomp_places_list_len - count - 1) * sizeof (void *));
|
||
+ --gomp_places_list_len;
|
||
+ gomp_places_list[gomp_places_list_len] = p;
|
||
+ }
|
||
+ else if (len == 1)
|
||
+ ++gomp_places_list_len;
|
||
+ else
|
||
+ {
|
||
+ for (count = 0; count < len - 1; count++)
|
||
+ if (!gomp_affinity_copy_place
|
||
+ (gomp_places_list[gomp_places_list_len + count + 1],
|
||
+ gomp_places_list[gomp_places_list_len + count],
|
||
+ stride))
|
||
+ goto invalid;
|
||
+ gomp_places_list_len += len;
|
||
+ }
|
||
+ if (*env == '\0')
|
||
+ break;
|
||
+ env++;
|
||
+ }
|
||
+ while (1);
|
||
+
|
||
+ if (gomp_places_list_len == 0)
|
||
+ {
|
||
+ gomp_error ("All places have been removed");
|
||
+ goto invalid;
|
||
+ }
|
||
+ if (!gomp_affinity_finalize_place_list (false))
|
||
+ goto invalid;
|
||
+ return true;
|
||
+
|
||
+ invalid:
|
||
+ free (gomp_places_list);
|
||
+ gomp_places_list = NULL;
|
||
+ gomp_places_list_len = 0;
|
||
+ gomp_error ("Invalid value for environment variable %s", name);
|
||
+ return false;
|
||
+}
|
||
+
|
||
/* Parse the OMP_STACKSIZE environment varible. Return true if one was
|
||
present and it was successfully parsed. */
|
||
|
||
@@ -473,86 +910,95 @@ parse_wait_policy (void)
|
||
present and it was successfully parsed. */
|
||
|
||
static bool
|
||
-parse_affinity (void)
|
||
+parse_affinity (bool ignore)
|
||
{
|
||
- char *env, *end;
|
||
+ char *env, *end, *start;
|
||
+ int pass;
|
||
unsigned long cpu_beg, cpu_end, cpu_stride;
|
||
- unsigned short *cpus = NULL;
|
||
- size_t allocated = 0, used = 0, needed;
|
||
+ size_t count = 0, needed;
|
||
|
||
env = getenv ("GOMP_CPU_AFFINITY");
|
||
if (env == NULL)
|
||
return false;
|
||
|
||
- do
|
||
+ start = env;
|
||
+ for (pass = 0; pass < 2; pass++)
|
||
{
|
||
- while (*env == ' ' || *env == '\t')
|
||
- env++;
|
||
-
|
||
- cpu_beg = strtoul (env, &end, 0);
|
||
- cpu_end = cpu_beg;
|
||
- cpu_stride = 1;
|
||
- if (env == end || cpu_beg >= 65536)
|
||
- goto invalid;
|
||
+ env = start;
|
||
+ if (pass == 1)
|
||
+ {
|
||
+ if (ignore)
|
||
+ return false;
|
||
|
||
- env = end;
|
||
- if (*env == '-')
|
||
+ gomp_places_list_len = 0;
|
||
+ gomp_places_list = gomp_affinity_alloc (count, true);
|
||
+ if (gomp_places_list == NULL)
|
||
+ return false;
|
||
+ }
|
||
+ do
|
||
{
|
||
- cpu_end = strtoul (++env, &end, 0);
|
||
- if (env == end || cpu_end >= 65536 || cpu_end < cpu_beg)
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+
|
||
+ errno = 0;
|
||
+ cpu_beg = strtoul (env, &end, 0);
|
||
+ if (errno || cpu_beg >= 65536)
|
||
goto invalid;
|
||
+ cpu_end = cpu_beg;
|
||
+ cpu_stride = 1;
|
||
|
||
env = end;
|
||
- if (*env == ':')
|
||
+ if (*env == '-')
|
||
{
|
||
- cpu_stride = strtoul (++env, &end, 0);
|
||
- if (env == end || cpu_stride == 0 || cpu_stride >= 65536)
|
||
+ errno = 0;
|
||
+ cpu_end = strtoul (++env, &end, 0);
|
||
+ if (errno || cpu_end >= 65536 || cpu_end < cpu_beg)
|
||
goto invalid;
|
||
|
||
env = end;
|
||
- }
|
||
- }
|
||
+ if (*env == ':')
|
||
+ {
|
||
+ errno = 0;
|
||
+ cpu_stride = strtoul (++env, &end, 0);
|
||
+ if (errno || cpu_stride == 0 || cpu_stride >= 65536)
|
||
+ goto invalid;
|
||
|
||
- needed = (cpu_end - cpu_beg) / cpu_stride + 1;
|
||
- if (used + needed >= allocated)
|
||
- {
|
||
- unsigned short *new_cpus;
|
||
+ env = end;
|
||
+ }
|
||
+ }
|
||
|
||
- if (allocated < 64)
|
||
- allocated = 64;
|
||
- if (allocated > needed)
|
||
- allocated <<= 1;
|
||
+ needed = (cpu_end - cpu_beg) / cpu_stride + 1;
|
||
+ if (pass == 0)
|
||
+ count += needed;
|
||
else
|
||
- allocated += 2 * needed;
|
||
- new_cpus = realloc (cpus, allocated * sizeof (unsigned short));
|
||
- if (new_cpus == NULL)
|
||
{
|
||
- free (cpus);
|
||
- gomp_error ("not enough memory to store GOMP_CPU_AFFINITY list");
|
||
- return false;
|
||
+ while (needed--)
|
||
+ {
|
||
+ void *p = gomp_places_list[gomp_places_list_len];
|
||
+ gomp_affinity_init_place (p);
|
||
+ if (gomp_affinity_add_cpus (p, cpu_beg, 1, 0, true))
|
||
+ ++gomp_places_list_len;
|
||
+ cpu_beg += cpu_stride;
|
||
+ }
|
||
}
|
||
|
||
- cpus = new_cpus;
|
||
- }
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
|
||
- while (needed--)
|
||
- {
|
||
- cpus[used++] = cpu_beg;
|
||
- cpu_beg += cpu_stride;
|
||
+ if (*env == ',')
|
||
+ env++;
|
||
+ else if (*env == '\0')
|
||
+ break;
|
||
}
|
||
-
|
||
- while (*env == ' ' || *env == '\t')
|
||
- env++;
|
||
-
|
||
- if (*env == ',')
|
||
- env++;
|
||
- else if (*env == '\0')
|
||
- break;
|
||
+ while (1);
|
||
}
|
||
- while (1);
|
||
|
||
- gomp_cpu_affinity = cpus;
|
||
- gomp_cpu_affinity_len = used;
|
||
+ if (gomp_places_list_len == 0)
|
||
+ {
|
||
+ free (gomp_places_list);
|
||
+ gomp_places_list = NULL;
|
||
+ return false;
|
||
+ }
|
||
return true;
|
||
|
||
invalid:
|
||
@@ -560,12 +1006,160 @@ parse_affinity (void)
|
||
return false;
|
||
}
|
||
|
||
+
|
||
+static void
|
||
+handle_omp_display_env (unsigned long stacksize, int wait_policy)
|
||
+{
|
||
+ const char *env;
|
||
+ bool display = false;
|
||
+ bool verbose = false;
|
||
+ int i;
|
||
+
|
||
+ env = getenv ("OMP_DISPLAY_ENV");
|
||
+ if (env == NULL)
|
||
+ return;
|
||
+
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (strncasecmp (env, "true", 4) == 0)
|
||
+ {
|
||
+ display = true;
|
||
+ env += 4;
|
||
+ }
|
||
+ else if (strncasecmp (env, "false", 5) == 0)
|
||
+ {
|
||
+ display = false;
|
||
+ env += 5;
|
||
+ }
|
||
+ else if (strncasecmp (env, "verbose", 7) == 0)
|
||
+ {
|
||
+ display = true;
|
||
+ verbose = true;
|
||
+ env += 7;
|
||
+ }
|
||
+ else
|
||
+ env = "X";
|
||
+ while (isspace ((unsigned char) *env))
|
||
+ ++env;
|
||
+ if (*env != '\0')
|
||
+ gomp_error ("Invalid value for environment variable OMP_DISPLAY_ENV");
|
||
+
|
||
+ if (!display)
|
||
+ return;
|
||
+
|
||
+ fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr);
|
||
+
|
||
+ fputs (" _OPENMP = '201307'\n", stderr);
|
||
+ fprintf (stderr, " OMP_DYNAMIC = '%s'\n",
|
||
+ gomp_global_icv.dyn_var ? "TRUE" : "FALSE");
|
||
+ fprintf (stderr, " OMP_NESTED = '%s'\n",
|
||
+ gomp_global_icv.nest_var ? "TRUE" : "FALSE");
|
||
+
|
||
+ fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var);
|
||
+ for (i = 1; i < gomp_nthreads_var_list_len; i++)
|
||
+ fprintf (stderr, ",%lu", gomp_nthreads_var_list[i]);
|
||
+ fputs ("'\n", stderr);
|
||
+
|
||
+ fprintf (stderr, " OMP_SCHEDULE = '");
|
||
+ switch (gomp_global_icv.run_sched_var)
|
||
+ {
|
||
+ case GFS_RUNTIME:
|
||
+ fputs ("RUNTIME", stderr);
|
||
+ break;
|
||
+ case GFS_STATIC:
|
||
+ fputs ("STATIC", stderr);
|
||
+ break;
|
||
+ case GFS_DYNAMIC:
|
||
+ fputs ("DYNAMIC", stderr);
|
||
+ break;
|
||
+ case GFS_GUIDED:
|
||
+ fputs ("GUIDED", stderr);
|
||
+ break;
|
||
+ case GFS_AUTO:
|
||
+ fputs ("AUTO", stderr);
|
||
+ break;
|
||
+ }
|
||
+ fputs ("'\n", stderr);
|
||
+
|
||
+ fputs (" OMP_PROC_BIND = '", stderr);
|
||
+ switch (gomp_global_icv.bind_var)
|
||
+ {
|
||
+ case omp_proc_bind_false:
|
||
+ fputs ("FALSE", stderr);
|
||
+ break;
|
||
+ case omp_proc_bind_true:
|
||
+ fputs ("TRUE", stderr);
|
||
+ break;
|
||
+ case omp_proc_bind_master:
|
||
+ fputs ("MASTER", stderr);
|
||
+ break;
|
||
+ case omp_proc_bind_close:
|
||
+ fputs ("CLOSE", stderr);
|
||
+ break;
|
||
+ case omp_proc_bind_spread:
|
||
+ fputs ("SPREAD", stderr);
|
||
+ break;
|
||
+ }
|
||
+ for (i = 1; i < gomp_bind_var_list_len; i++)
|
||
+ switch (gomp_bind_var_list[i])
|
||
+ {
|
||
+ case omp_proc_bind_master:
|
||
+ fputs (",MASTER", stderr);
|
||
+ break;
|
||
+ case omp_proc_bind_close:
|
||
+ fputs (",CLOSE", stderr);
|
||
+ break;
|
||
+ case omp_proc_bind_spread:
|
||
+ fputs (",SPREAD", stderr);
|
||
+ break;
|
||
+ }
|
||
+ fputs ("'\n", stderr);
|
||
+ fputs (" OMP_PLACES = '", stderr);
|
||
+ for (i = 0; i < gomp_places_list_len; i++)
|
||
+ {
|
||
+ fputs ("{", stderr);
|
||
+ gomp_affinity_print_place (gomp_places_list[i]);
|
||
+ fputs (i + 1 == gomp_places_list_len ? "}" : "},", stderr);
|
||
+ }
|
||
+ fputs ("'\n", stderr);
|
||
+
|
||
+ fprintf (stderr, " OMP_STACKSIZE = '%lu'\n", stacksize);
|
||
+
|
||
+ /* GOMP's default value is actually neither active nor passive. */
|
||
+ fprintf (stderr, " OMP_WAIT_POLICY = '%s'\n",
|
||
+ wait_policy > 0 ? "ACTIVE" : "PASSIVE");
|
||
+ fprintf (stderr, " OMP_THREAD_LIMIT = '%u'\n",
|
||
+ gomp_global_icv.thread_limit_var);
|
||
+ fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%lu'\n",
|
||
+ gomp_max_active_levels_var);
|
||
+
|
||
+ fprintf (stderr, " OMP_CANCELLATION = '%s'\n",
|
||
+ gomp_cancel_var ? "TRUE" : "FALSE");
|
||
+ fprintf (stderr, " OMP_DEFAULT_DEVICE = '%d'\n",
|
||
+ gomp_global_icv.default_device_var);
|
||
+
|
||
+ if (verbose)
|
||
+ {
|
||
+ fputs (" GOMP_CPU_AFFINITY = ''\n", stderr);
|
||
+ fprintf (stderr, " GOMP_STACKSIZE = '%lu'\n", stacksize);
|
||
+#ifdef HAVE_INTTYPES_H
|
||
+ fprintf (stderr, " GOMP_SPINCOUNT = '%"PRIu64"'\n",
|
||
+ (uint64_t) gomp_spin_count_var);
|
||
+#else
|
||
+ fprintf (stderr, " GOMP_SPINCOUNT = '%lu'\n",
|
||
+ (unsigned long) gomp_spin_count_var);
|
||
+#endif
|
||
+ }
|
||
+
|
||
+ fputs ("OPENMP DISPLAY ENVIRONMENT END\n", stderr);
|
||
+}
|
||
+
|
||
+
|
||
static void __attribute__((constructor))
|
||
initialize_env (void)
|
||
{
|
||
- unsigned long stacksize;
|
||
+ unsigned long thread_limit_var, stacksize;
|
||
int wait_policy;
|
||
- bool bind_var = false;
|
||
|
||
/* Do a compile time check that mkomp_h.pl did good job. */
|
||
omp_check_defines ();
|
||
@@ -573,14 +1167,17 @@ initialize_env (void)
|
||
parse_schedule ();
|
||
parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
|
||
parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
|
||
- parse_boolean ("OMP_PROC_BIND", &bind_var);
|
||
+ parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var);
|
||
+ parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
|
||
parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
|
||
true);
|
||
- parse_unsigned_long ("OMP_THREAD_LIMIT", &gomp_thread_limit_var, false);
|
||
- if (gomp_thread_limit_var != ULONG_MAX)
|
||
- gomp_remaining_threads_count = gomp_thread_limit_var - 1;
|
||
+ if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false))
|
||
+ {
|
||
+ gomp_global_icv.thread_limit_var
|
||
+ = thread_limit_var > INT_MAX ? UINT_MAX : thread_limit_var;
|
||
+ }
|
||
#ifndef HAVE_SYNC_BUILTINS
|
||
- gomp_mutex_init (&gomp_remaining_threads_lock);
|
||
+ gomp_mutex_init (&gomp_managed_threads_lock);
|
||
#endif
|
||
gomp_init_num_threads ();
|
||
gomp_available_cpus = gomp_global_icv.nthreads_var;
|
||
@@ -589,7 +1186,34 @@ initialize_env (void)
|
||
&gomp_nthreads_var_list,
|
||
&gomp_nthreads_var_list_len))
|
||
gomp_global_icv.nthreads_var = gomp_available_cpus;
|
||
- if (parse_affinity () || bind_var)
|
||
+ bool ignore = false;
|
||
+ if (parse_bind_var ("OMP_PROC_BIND",
|
||
+ &gomp_global_icv.bind_var,
|
||
+ &gomp_bind_var_list,
|
||
+ &gomp_bind_var_list_len)
|
||
+ && gomp_global_icv.bind_var == omp_proc_bind_false)
|
||
+ ignore = true;
|
||
+ /* Make sure OMP_PLACES and GOMP_CPU_AFFINITY env vars are always
|
||
+ parsed if present in the environment. If OMP_PROC_BIND was set
|
||
+ explictly to false, don't populate places list though. If places
|
||
+ list was successfully set from OMP_PLACES, only parse but don't process
|
||
+ GOMP_CPU_AFFINITY. If OMP_PROC_BIND was not set in the environment,
|
||
+ default to OMP_PROC_BIND=true if OMP_PLACES or GOMP_CPU_AFFINITY
|
||
+ was successfully parsed into a places list, otherwise to
|
||
+ OMP_PROC_BIND=false. */
|
||
+ if (parse_places_var ("OMP_PLACES", ignore))
|
||
+ {
|
||
+ if (gomp_global_icv.bind_var == omp_proc_bind_false)
|
||
+ gomp_global_icv.bind_var = true;
|
||
+ ignore = true;
|
||
+ }
|
||
+ if (parse_affinity (ignore))
|
||
+ {
|
||
+ if (gomp_global_icv.bind_var == omp_proc_bind_false)
|
||
+ gomp_global_icv.bind_var = true;
|
||
+ ignore = true;
|
||
+ }
|
||
+ if (gomp_global_icv.bind_var != omp_proc_bind_false)
|
||
gomp_init_affinity ();
|
||
wait_policy = parse_wait_policy ();
|
||
if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var))
|
||
@@ -640,6 +1264,8 @@ initialize_env (void)
|
||
if (err != 0)
|
||
gomp_error ("Stack size change failed: %s", strerror (err));
|
||
}
|
||
+
|
||
+ handle_omp_display_env (stacksize, wait_policy);
|
||
}
|
||
|
||
|
||
@@ -723,7 +1349,8 @@ omp_get_max_threads (void)
|
||
int
|
||
omp_get_thread_limit (void)
|
||
{
|
||
- return gomp_thread_limit_var > INT_MAX ? INT_MAX : gomp_thread_limit_var;
|
||
+ struct gomp_task_icv *icv = gomp_icv (false);
|
||
+ return icv->thread_limit_var > INT_MAX ? INT_MAX : icv->thread_limit_var;
|
||
}
|
||
|
||
void
|
||
@@ -739,6 +1366,60 @@ omp_get_max_active_levels (void)
|
||
return gomp_max_active_levels_var;
|
||
}
|
||
|
||
+int
|
||
+omp_get_cancellation (void)
|
||
+{
|
||
+ return gomp_cancel_var;
|
||
+}
|
||
+
|
||
+omp_proc_bind_t
|
||
+omp_get_proc_bind (void)
|
||
+{
|
||
+ struct gomp_task_icv *icv = gomp_icv (false);
|
||
+ return icv->bind_var;
|
||
+}
|
||
+
|
||
+void
|
||
+omp_set_default_device (int device_num)
|
||
+{
|
||
+ struct gomp_task_icv *icv = gomp_icv (true);
|
||
+ icv->default_device_var = device_num >= 0 ? device_num : 0;
|
||
+}
|
||
+
|
||
+int
|
||
+omp_get_default_device (void)
|
||
+{
|
||
+ struct gomp_task_icv *icv = gomp_icv (false);
|
||
+ return icv->default_device_var;
|
||
+}
|
||
+
|
||
+int
|
||
+omp_get_num_devices (void)
|
||
+{
|
||
+ return gomp_get_num_devices ();
|
||
+}
|
||
+
|
||
+int
|
||
+omp_get_num_teams (void)
|
||
+{
|
||
+ /* Hardcoded to 1 on host, MIC, HSAIL? Maybe variable on PTX. */
|
||
+ return 1;
|
||
+}
|
||
+
|
||
+int
|
||
+omp_get_team_num (void)
|
||
+{
|
||
+ /* Hardcoded to 0 on host, MIC, HSAIL? Maybe variable on PTX. */
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+int
|
||
+omp_is_initial_device (void)
|
||
+{
|
||
+ /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */
|
||
+ return 1;
|
||
+}
|
||
+
|
||
ialias (omp_set_dynamic)
|
||
ialias (omp_set_nested)
|
||
ialias (omp_set_num_threads)
|
||
@@ -750,3 +1431,11 @@ ialias (omp_get_max_threads)
|
||
ialias (omp_get_thread_limit)
|
||
ialias (omp_set_max_active_levels)
|
||
ialias (omp_get_max_active_levels)
|
||
+ialias (omp_get_cancellation)
|
||
+ialias (omp_get_proc_bind)
|
||
+ialias (omp_set_default_device)
|
||
+ialias (omp_get_default_device)
|
||
+ialias (omp_get_num_devices)
|
||
+ialias (omp_get_num_teams)
|
||
+ialias (omp_get_team_num)
|
||
+ialias (omp_is_initial_device)
|
||
--- libgomp/libgomp.h.jj 2014-05-15 11:39:33.640782119 +0200
|
||
+++ libgomp/libgomp.h 2014-08-06 11:50:36.614893741 +0200
|
||
@@ -39,6 +39,7 @@
|
||
|
||
#include <pthread.h>
|
||
#include <stdbool.h>
|
||
+#include <stdlib.h>
|
||
|
||
#ifdef HAVE_ATTRIBUTE_VISIBILITY
|
||
# pragma GCC visibility push(hidden)
|
||
@@ -190,6 +191,10 @@ struct gomp_team_state
|
||
/* Active nesting level. Only active parallel regions are counted. */
|
||
unsigned active_level;
|
||
|
||
+ /* Place-partition-var, offset and length into gomp_places_list array. */
|
||
+ unsigned place_partition_off;
|
||
+ unsigned place_partition_len;
|
||
+
|
||
#ifdef HAVE_SYNC_BUILTINS
|
||
/* Number of single stmts encountered. */
|
||
unsigned long single_count;
|
||
@@ -203,30 +208,40 @@ struct gomp_team_state
|
||
unsigned long static_trip;
|
||
};
|
||
|
||
-/* These are the OpenMP 3.0 Internal Control Variables described in
|
||
+struct target_mem_desc;
|
||
+
|
||
+/* These are the OpenMP 4.0 Internal Control Variables described in
|
||
section 2.3.1. Those described as having one copy per task are
|
||
stored within the structure; those described as having one copy
|
||
for the whole program are (naturally) global variables. */
|
||
-
|
||
+
|
||
struct gomp_task_icv
|
||
{
|
||
unsigned long nthreads_var;
|
||
enum gomp_schedule_type run_sched_var;
|
||
int run_sched_modifier;
|
||
+ int default_device_var;
|
||
+ unsigned int thread_limit_var;
|
||
bool dyn_var;
|
||
bool nest_var;
|
||
+ char bind_var;
|
||
+ /* Internal ICV. */
|
||
+ struct target_mem_desc *target_data;
|
||
};
|
||
|
||
extern struct gomp_task_icv gomp_global_icv;
|
||
-extern unsigned long gomp_thread_limit_var;
|
||
-extern unsigned long gomp_remaining_threads_count;
|
||
#ifndef HAVE_SYNC_BUILTINS
|
||
-extern gomp_mutex_t gomp_remaining_threads_lock;
|
||
+extern gomp_mutex_t gomp_managed_threads_lock;
|
||
#endif
|
||
extern unsigned long gomp_max_active_levels_var;
|
||
+extern bool gomp_cancel_var;
|
||
extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
|
||
extern unsigned long gomp_available_cpus, gomp_managed_threads;
|
||
extern unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
|
||
+extern char *gomp_bind_var_list;
|
||
+extern unsigned long gomp_bind_var_list_len;
|
||
+extern void **gomp_places_list;
|
||
+extern unsigned long gomp_places_list_len;
|
||
|
||
enum gomp_task_kind
|
||
{
|
||
@@ -236,6 +251,39 @@ enum gomp_task_kind
|
||
GOMP_TASK_TIED
|
||
};
|
||
|
||
+struct gomp_task;
|
||
+struct gomp_taskgroup;
|
||
+struct htab;
|
||
+
|
||
+struct gomp_task_depend_entry
|
||
+{
|
||
+ void *addr;
|
||
+ struct gomp_task_depend_entry *next;
|
||
+ struct gomp_task_depend_entry *prev;
|
||
+ struct gomp_task *task;
|
||
+ bool is_in;
|
||
+ bool redundant;
|
||
+ bool redundant_out;
|
||
+};
|
||
+
|
||
+struct gomp_dependers_vec
|
||
+{
|
||
+ size_t n_elem;
|
||
+ size_t allocated;
|
||
+ struct gomp_task *elem[];
|
||
+};
|
||
+
|
||
+/* Used when in GOMP_taskwait or in gomp_task_maybe_wait_for_dependencies. */
|
||
+
|
||
+struct gomp_taskwait
|
||
+{
|
||
+ bool in_taskwait;
|
||
+ bool in_depend_wait;
|
||
+ size_t n_depend;
|
||
+ struct gomp_task *last_parent_depends_on;
|
||
+ gomp_sem_t taskwait_sem;
|
||
+};
|
||
+
|
||
/* This structure describes a "task" to be run by a thread. */
|
||
|
||
struct gomp_task
|
||
@@ -246,14 +294,33 @@ struct gomp_task
|
||
struct gomp_task *prev_child;
|
||
struct gomp_task *next_queue;
|
||
struct gomp_task *prev_queue;
|
||
+ struct gomp_task *next_taskgroup;
|
||
+ struct gomp_task *prev_taskgroup;
|
||
+ struct gomp_taskgroup *taskgroup;
|
||
+ struct gomp_dependers_vec *dependers;
|
||
+ struct htab *depend_hash;
|
||
+ struct gomp_taskwait *taskwait;
|
||
+ size_t depend_count;
|
||
+ size_t num_dependees;
|
||
struct gomp_task_icv icv;
|
||
void (*fn) (void *);
|
||
void *fn_data;
|
||
enum gomp_task_kind kind;
|
||
- bool in_taskwait;
|
||
bool in_tied_task;
|
||
bool final_task;
|
||
- gomp_sem_t taskwait_sem;
|
||
+ bool copy_ctors_done;
|
||
+ bool parent_depends_on;
|
||
+ struct gomp_task_depend_entry depend[];
|
||
+};
|
||
+
|
||
+struct gomp_taskgroup
|
||
+{
|
||
+ struct gomp_taskgroup *prev;
|
||
+ struct gomp_task *children;
|
||
+ bool in_taskgroup_wait;
|
||
+ bool cancelled;
|
||
+ gomp_sem_t taskgroup_sem;
|
||
+ size_t num_children;
|
||
};
|
||
|
||
/* This structure describes a "team" of threads. These are the threads
|
||
@@ -282,6 +349,12 @@ struct gomp_team
|
||
of the threads in the team. */
|
||
gomp_sem_t **ordered_release;
|
||
|
||
+ /* List of work shares on which gomp_fini_work_share hasn't been
|
||
+ called yet. If the team hasn't been cancelled, this should be
|
||
+ equal to each thr->ts.work_share, but otherwise it can be a possibly
|
||
+ long list of workshares. */
|
||
+ struct gomp_work_share *work_shares_to_free;
|
||
+
|
||
/* List of gomp_work_share structs chained through next_free fields.
|
||
This is populated and taken off only by the first thread in the
|
||
team encountering a new work sharing construct, in a critical
|
||
@@ -313,8 +386,20 @@ struct gomp_team
|
||
|
||
gomp_mutex_t task_lock;
|
||
struct gomp_task *task_queue;
|
||
- int task_count;
|
||
- int task_running_count;
|
||
+ /* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team. */
|
||
+ unsigned int task_count;
|
||
+ /* Number of GOMP_TASK_WAITING tasks currently waiting to be scheduled. */
|
||
+ unsigned int task_queued_count;
|
||
+ /* Number of GOMP_TASK_{WAITING,TIED} tasks currently running
|
||
+ directly in gomp_barrier_handle_tasks; tasks spawned
|
||
+ from e.g. GOMP_taskwait or GOMP_taskgroup_end don't count, even when
|
||
+ that is called from a task run from gomp_barrier_handle_tasks.
|
||
+ task_running_count should be always <= team->nthreads,
|
||
+ and if current task isn't in_tied_task, then it will be
|
||
+ even < team->nthreads. */
|
||
+ unsigned int task_running_count;
|
||
+ int work_share_cancelled;
|
||
+ int team_cancelled;
|
||
|
||
/* This array contains structures for implicit tasks. */
|
||
struct gomp_task implicit_task[];
|
||
@@ -339,7 +424,11 @@ struct gomp_thread
|
||
/* This semaphore is used for ordered loops. */
|
||
gomp_sem_t release;
|
||
|
||
- /* user pthread thread pool */
|
||
+ /* Place this thread is bound to plus one, or zero if not bound
|
||
+ to any place. */
|
||
+ unsigned int place;
|
||
+
|
||
+ /* User pthread thread pool */
|
||
struct gomp_thread_pool *thread_pool;
|
||
};
|
||
|
||
@@ -352,11 +441,23 @@ struct gomp_thread_pool
|
||
unsigned threads_size;
|
||
unsigned threads_used;
|
||
struct gomp_team *last_team;
|
||
+ /* Number of threads running in this contention group. */
|
||
+ unsigned long threads_busy;
|
||
|
||
/* This barrier holds and releases threads waiting in threads. */
|
||
gomp_barrier_t threads_dock;
|
||
};
|
||
|
||
+enum gomp_cancel_kind
|
||
+{
|
||
+ GOMP_CANCEL_PARALLEL = 1,
|
||
+ GOMP_CANCEL_LOOP = 2,
|
||
+ GOMP_CANCEL_FOR = GOMP_CANCEL_LOOP,
|
||
+ GOMP_CANCEL_DO = GOMP_CANCEL_LOOP,
|
||
+ GOMP_CANCEL_SECTIONS = 4,
|
||
+ GOMP_CANCEL_TASKGROUP = 8
|
||
+};
|
||
+
|
||
/* ... and here is that TLS data. */
|
||
|
||
#ifdef HAVE_TLS
|
||
@@ -391,17 +492,22 @@ static inline struct gomp_task_icv *gomp
|
||
/* The attributes to be used during thread creation. */
|
||
extern pthread_attr_t gomp_thread_attr;
|
||
|
||
-/* Other variables. */
|
||
-
|
||
-extern unsigned short *gomp_cpu_affinity;
|
||
-extern size_t gomp_cpu_affinity_len;
|
||
-
|
||
/* Function prototypes. */
|
||
|
||
/* affinity.c */
|
||
|
||
extern void gomp_init_affinity (void);
|
||
-extern void gomp_init_thread_affinity (pthread_attr_t *);
|
||
+extern void gomp_init_thread_affinity (pthread_attr_t *, unsigned int);
|
||
+extern void **gomp_affinity_alloc (unsigned long, bool);
|
||
+extern void gomp_affinity_init_place (void *);
|
||
+extern bool gomp_affinity_add_cpus (void *, unsigned long, unsigned long,
|
||
+ long, bool);
|
||
+extern bool gomp_affinity_remove_cpu (void *, unsigned long);
|
||
+extern bool gomp_affinity_copy_place (void *, void *, long);
|
||
+extern bool gomp_affinity_same_place (void *, void *);
|
||
+extern bool gomp_affinity_finalize_place_list (bool);
|
||
+extern bool gomp_affinity_init_level (int, unsigned long, bool);
|
||
+extern void gomp_affinity_print_place (void *);
|
||
|
||
/* alloc.c */
|
||
|
||
@@ -475,15 +581,21 @@ extern void gomp_barrier_handle_tasks (g
|
||
static void inline
|
||
gomp_finish_task (struct gomp_task *task)
|
||
{
|
||
- gomp_sem_destroy (&task->taskwait_sem);
|
||
+ if (__builtin_expect (task->depend_hash != NULL, 0))
|
||
+ free (task->depend_hash);
|
||
}
|
||
|
||
/* team.c */
|
||
|
||
extern struct gomp_team *gomp_new_team (unsigned);
|
||
extern void gomp_team_start (void (*) (void *), void *, unsigned,
|
||
- struct gomp_team *);
|
||
+ unsigned, struct gomp_team *);
|
||
extern void gomp_team_end (void);
|
||
+extern void gomp_free_thread (void *);
|
||
+
|
||
+/* target.c */
|
||
+
|
||
+extern int gomp_get_num_devices (void);
|
||
|
||
/* work.c */
|
||
|
||
@@ -491,6 +603,7 @@ extern void gomp_init_work_share (struct
|
||
extern void gomp_fini_work_share (struct gomp_work_share *);
|
||
extern bool gomp_work_share_start (bool);
|
||
extern void gomp_work_share_end (void);
|
||
+extern bool gomp_work_share_end_cancel (void);
|
||
extern void gomp_work_share_end_nowait (void);
|
||
|
||
static inline void
|
||
@@ -513,6 +626,26 @@ gomp_work_share_init_done (void)
|
||
#define _LIBGOMP_OMP_LOCK_DEFINED 1
|
||
#include "omp.h.in"
|
||
|
||
+typedef enum omp_proc_bind_t
|
||
+{
|
||
+ omp_proc_bind_false = 0,
|
||
+ omp_proc_bind_true = 1,
|
||
+ omp_proc_bind_master = 2,
|
||
+ omp_proc_bind_close = 3,
|
||
+ omp_proc_bind_spread = 4
|
||
+} omp_proc_bind_t;
|
||
+
|
||
+extern int omp_get_cancellation (void) __GOMP_NOTHROW;
|
||
+extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW;
|
||
+
|
||
+extern void omp_set_default_device (int) __GOMP_NOTHROW;
|
||
+extern int omp_get_default_device (void) __GOMP_NOTHROW;
|
||
+extern int omp_get_num_devices (void) __GOMP_NOTHROW;
|
||
+extern int omp_get_num_teams (void) __GOMP_NOTHROW;
|
||
+extern int omp_get_team_num (void) __GOMP_NOTHROW;
|
||
+
|
||
+extern int omp_is_initial_device (void) __GOMP_NOTHROW;
|
||
+
|
||
#if !defined (HAVE_ATTRIBUTE_VISIBILITY) \
|
||
|| !defined (HAVE_ATTRIBUTE_ALIAS) \
|
||
|| !defined (HAVE_AS_SYMVER_DIRECTIVE) \
|
||
@@ -568,11 +701,19 @@ extern int gomp_test_nest_lock_25 (omp_n
|
||
#endif
|
||
|
||
#ifdef HAVE_ATTRIBUTE_ALIAS
|
||
+# define ialias_ulp ialias_str1(__USER_LABEL_PREFIX__)
|
||
+# define ialias_str1(x) ialias_str2(x)
|
||
+# define ialias_str2(x) #x
|
||
# define ialias(fn) \
|
||
extern __typeof (fn) gomp_ialias_##fn \
|
||
__attribute__ ((alias (#fn))) attribute_hidden;
|
||
+# define ialias_redirect(fn) \
|
||
+ extern __typeof (fn) fn __asm__ (ialias_ulp "gomp_ialias_" #fn) attribute_hidden;
|
||
+# define ialias_call(fn) gomp_ialias_ ## fn
|
||
#else
|
||
# define ialias(fn)
|
||
+# define ialias_redirect(fn)
|
||
+# define ialias_call(fn) fn
|
||
#endif
|
||
|
||
#endif /* LIBGOMP_H */
|
||
--- libgomp/loop.c.jj 2009-04-14 15:40:59.000000000 +0200
|
||
+++ libgomp/loop.c 2014-05-15 13:12:46.509895204 +0200
|
||
@@ -439,14 +439,14 @@ static void
|
||
gomp_parallel_loop_start (void (*fn) (void *), void *data,
|
||
unsigned num_threads, long start, long end,
|
||
long incr, enum gomp_schedule_type sched,
|
||
- long chunk_size)
|
||
+ long chunk_size, unsigned int flags)
|
||
{
|
||
struct gomp_team *team;
|
||
|
||
num_threads = gomp_resolve_num_threads (num_threads, 0);
|
||
team = gomp_new_team (num_threads);
|
||
gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
|
||
- gomp_team_start (fn, data, num_threads, team);
|
||
+ gomp_team_start (fn, data, num_threads, flags, team);
|
||
}
|
||
|
||
void
|
||
@@ -455,7 +455,7 @@ GOMP_parallel_loop_static_start (void (*
|
||
long incr, long chunk_size)
|
||
{
|
||
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
||
- GFS_STATIC, chunk_size);
|
||
+ GFS_STATIC, chunk_size, 0);
|
||
}
|
||
|
||
void
|
||
@@ -464,7 +464,7 @@ GOMP_parallel_loop_dynamic_start (void (
|
||
long incr, long chunk_size)
|
||
{
|
||
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
||
- GFS_DYNAMIC, chunk_size);
|
||
+ GFS_DYNAMIC, chunk_size, 0);
|
||
}
|
||
|
||
void
|
||
@@ -473,7 +473,7 @@ GOMP_parallel_loop_guided_start (void (*
|
||
long incr, long chunk_size)
|
||
{
|
||
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
||
- GFS_GUIDED, chunk_size);
|
||
+ GFS_GUIDED, chunk_size, 0);
|
||
}
|
||
|
||
void
|
||
@@ -483,11 +483,59 @@ GOMP_parallel_loop_runtime_start (void (
|
||
{
|
||
struct gomp_task_icv *icv = gomp_icv (false);
|
||
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
||
- icv->run_sched_var, icv->run_sched_modifier);
|
||
+ icv->run_sched_var, icv->run_sched_modifier, 0);
|
||
+}
|
||
+
|
||
+ialias_redirect (GOMP_parallel_end)
|
||
+
|
||
+void
|
||
+GOMP_parallel_loop_static (void (*fn) (void *), void *data,
|
||
+ unsigned num_threads, long start, long end,
|
||
+ long incr, long chunk_size, unsigned flags)
|
||
+{
|
||
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
||
+ GFS_STATIC, chunk_size, flags);
|
||
+ fn (data);
|
||
+ GOMP_parallel_end ();
|
||
+}
|
||
+
|
||
+void
|
||
+GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
|
||
+ unsigned num_threads, long start, long end,
|
||
+ long incr, long chunk_size, unsigned flags)
|
||
+{
|
||
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
||
+ GFS_DYNAMIC, chunk_size, flags);
|
||
+ fn (data);
|
||
+ GOMP_parallel_end ();
|
||
+}
|
||
+
|
||
+void
|
||
+GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
|
||
+ unsigned num_threads, long start, long end,
|
||
+ long incr, long chunk_size, unsigned flags)
|
||
+{
|
||
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
||
+ GFS_GUIDED, chunk_size, flags);
|
||
+ fn (data);
|
||
+ GOMP_parallel_end ();
|
||
+}
|
||
+
|
||
+void
|
||
+GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
|
||
+ unsigned num_threads, long start, long end,
|
||
+ long incr, unsigned flags)
|
||
+{
|
||
+ struct gomp_task_icv *icv = gomp_icv (false);
|
||
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
||
+ icv->run_sched_var, icv->run_sched_modifier,
|
||
+ flags);
|
||
+ fn (data);
|
||
+ GOMP_parallel_end ();
|
||
}
|
||
|
||
/* The GOMP_loop_end* routines are called after the thread is told that
|
||
- all loop iterations are complete. This first version synchronizes
|
||
+ all loop iterations are complete. The first two versions synchronize
|
||
all threads; the nowait version does not. */
|
||
|
||
void
|
||
@@ -496,6 +544,12 @@ GOMP_loop_end (void)
|
||
gomp_work_share_end ();
|
||
}
|
||
|
||
+bool
|
||
+GOMP_loop_end_cancel (void)
|
||
+{
|
||
+ return gomp_work_share_end_cancel ();
|
||
+}
|
||
+
|
||
void
|
||
GOMP_loop_end_nowait (void)
|
||
{
|
||
--- libgomp/sections.c.jj 2014-04-24 21:36:03.346367722 +0200
|
||
+++ libgomp/sections.c 2014-05-15 13:12:46.510895235 +0200
|
||
@@ -139,11 +139,27 @@ GOMP_parallel_sections_start (void (*fn)
|
||
num_threads = gomp_resolve_num_threads (num_threads, count);
|
||
team = gomp_new_team (num_threads);
|
||
gomp_sections_init (&team->work_shares[0], count);
|
||
- gomp_team_start (fn, data, num_threads, team);
|
||
+ gomp_team_start (fn, data, num_threads, 0, team);
|
||
+}
|
||
+
|
||
+ialias_redirect (GOMP_parallel_end)
|
||
+
|
||
+void
|
||
+GOMP_parallel_sections (void (*fn) (void *), void *data,
|
||
+ unsigned num_threads, unsigned count, unsigned flags)
|
||
+{
|
||
+ struct gomp_team *team;
|
||
+
|
||
+ num_threads = gomp_resolve_num_threads (num_threads, count);
|
||
+ team = gomp_new_team (num_threads);
|
||
+ gomp_sections_init (&team->work_shares[0], count);
|
||
+ gomp_team_start (fn, data, num_threads, flags, team);
|
||
+ fn (data);
|
||
+ GOMP_parallel_end ();
|
||
}
|
||
|
||
/* The GOMP_section_end* routines are called after the thread is told
|
||
- that all sections are complete. This first version synchronizes
|
||
+ that all sections are complete. The first two versions synchronize
|
||
all threads; the nowait version does not. */
|
||
|
||
void
|
||
@@ -152,6 +168,12 @@ GOMP_sections_end (void)
|
||
gomp_work_share_end ();
|
||
}
|
||
|
||
+bool
|
||
+GOMP_sections_end_cancel (void)
|
||
+{
|
||
+ return gomp_work_share_end_cancel ();
|
||
+}
|
||
+
|
||
void
|
||
GOMP_sections_end_nowait (void)
|
||
{
|
||
--- libgomp/team.c.jj 2014-05-15 11:39:33.625782203 +0200
|
||
+++ libgomp/team.c 2014-05-15 13:12:46.513895332 +0200
|
||
@@ -54,6 +54,7 @@ struct gomp_thread_start_data
|
||
struct gomp_team_state ts;
|
||
struct gomp_task *task;
|
||
struct gomp_thread_pool *thread_pool;
|
||
+ unsigned int place;
|
||
bool nested;
|
||
};
|
||
|
||
@@ -85,6 +86,7 @@ gomp_thread_start (void *xdata)
|
||
thr->thread_pool = data->thread_pool;
|
||
thr->ts = data->ts;
|
||
thr->task = data->task;
|
||
+ thr->place = data->place;
|
||
|
||
thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
|
||
|
||
@@ -99,7 +101,7 @@ gomp_thread_start (void *xdata)
|
||
gomp_barrier_wait (&team->barrier);
|
||
|
||
local_fn (local_data);
|
||
- gomp_team_barrier_wait (&team->barrier);
|
||
+ gomp_team_barrier_wait_final (&team->barrier);
|
||
gomp_finish_task (task);
|
||
gomp_barrier_wait_last (&team->barrier);
|
||
}
|
||
@@ -114,7 +116,7 @@ gomp_thread_start (void *xdata)
|
||
struct gomp_task *task = thr->task;
|
||
|
||
local_fn (local_data);
|
||
- gomp_team_barrier_wait (&team->barrier);
|
||
+ gomp_team_barrier_wait_final (&team->barrier);
|
||
gomp_finish_task (task);
|
||
|
||
gomp_barrier_wait (&pool->threads_dock);
|
||
@@ -127,6 +129,8 @@ gomp_thread_start (void *xdata)
|
||
}
|
||
|
||
gomp_sem_destroy (&thr->release);
|
||
+ thr->thread_pool = NULL;
|
||
+ thr->task = NULL;
|
||
return NULL;
|
||
}
|
||
|
||
@@ -150,6 +154,7 @@ gomp_new_team (unsigned nthreads)
|
||
#else
|
||
gomp_mutex_init (&team->work_share_list_free_lock);
|
||
#endif
|
||
+ team->work_shares_to_free = &team->work_shares[0];
|
||
gomp_init_work_share (&team->work_shares[0], false, nthreads);
|
||
team->work_shares[0].next_alloc = NULL;
|
||
team->work_share_list_free = NULL;
|
||
@@ -168,7 +173,10 @@ gomp_new_team (unsigned nthreads)
|
||
gomp_mutex_init (&team->task_lock);
|
||
team->task_queue = NULL;
|
||
team->task_count = 0;
|
||
+ team->task_queued_count = 0;
|
||
team->task_running_count = 0;
|
||
+ team->work_share_cancelled = 0;
|
||
+ team->team_cancelled = 0;
|
||
|
||
return team;
|
||
}
|
||
@@ -200,16 +208,19 @@ static struct gomp_thread_pool *gomp_new
|
||
static void
|
||
gomp_free_pool_helper (void *thread_pool)
|
||
{
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
struct gomp_thread_pool *pool
|
||
= (struct gomp_thread_pool *) thread_pool;
|
||
gomp_barrier_wait_last (&pool->threads_dock);
|
||
- gomp_sem_destroy (&gomp_thread ()->release);
|
||
+ gomp_sem_destroy (&thr->release);
|
||
+ thr->thread_pool = NULL;
|
||
+ thr->task = NULL;
|
||
pthread_exit (NULL);
|
||
}
|
||
|
||
/* Free a thread pool and release its threads. */
|
||
|
||
-static void
|
||
+void
|
||
gomp_free_thread (void *arg __attribute__((unused)))
|
||
{
|
||
struct gomp_thread *thr = gomp_thread ();
|
||
@@ -232,6 +243,15 @@ gomp_free_thread (void *arg __attribute_
|
||
gomp_barrier_wait (&pool->threads_dock);
|
||
/* Now it is safe to destroy the barrier and free the pool. */
|
||
gomp_barrier_destroy (&pool->threads_dock);
|
||
+
|
||
+#ifdef HAVE_SYNC_BUILTINS
|
||
+ __sync_fetch_and_add (&gomp_managed_threads,
|
||
+ 1L - pool->threads_used);
|
||
+#else
|
||
+ gomp_mutex_lock (&gomp_managed_threads_lock);
|
||
+ gomp_managed_threads -= pool->threads_used - 1L;
|
||
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
|
||
+#endif
|
||
}
|
||
free (pool->threads);
|
||
if (pool->last_team)
|
||
@@ -251,7 +271,7 @@ gomp_free_thread (void *arg __attribute_
|
||
|
||
void
|
||
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
|
||
- struct gomp_team *team)
|
||
+ unsigned flags, struct gomp_team *team)
|
||
{
|
||
struct gomp_thread_start_data *start_data;
|
||
struct gomp_thread *thr, *nthr;
|
||
@@ -262,17 +282,24 @@ gomp_team_start (void (*fn) (void *), vo
|
||
unsigned i, n, old_threads_used = 0;
|
||
pthread_attr_t thread_attr, *attr;
|
||
unsigned long nthreads_var;
|
||
+ char bind, bind_var;
|
||
+ unsigned int s = 0, rest = 0, p = 0, k = 0;
|
||
+ unsigned int affinity_count = 0;
|
||
+ struct gomp_thread **affinity_thr = NULL;
|
||
|
||
thr = gomp_thread ();
|
||
nested = thr->ts.team != NULL;
|
||
if (__builtin_expect (thr->thread_pool == NULL, 0))
|
||
{
|
||
thr->thread_pool = gomp_new_thread_pool ();
|
||
+ thr->thread_pool->threads_busy = nthreads;
|
||
pthread_setspecific (gomp_thread_destructor, thr);
|
||
}
|
||
pool = thr->thread_pool;
|
||
task = thr->task;
|
||
icv = task ? &task->icv : &gomp_global_icv;
|
||
+ if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
|
||
+ gomp_init_affinity ();
|
||
|
||
/* Always save the previous state, even if this isn't a nested team.
|
||
In particular, we should save any work share state from an outer
|
||
@@ -295,14 +322,90 @@ gomp_team_start (void (*fn) (void *), vo
|
||
if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
|
||
&& thr->ts.level < gomp_nthreads_var_list_len)
|
||
nthreads_var = gomp_nthreads_var_list[thr->ts.level];
|
||
+ bind_var = icv->bind_var;
|
||
+ if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
|
||
+ bind_var = flags & 7;
|
||
+ bind = bind_var;
|
||
+ if (__builtin_expect (gomp_bind_var_list != NULL, 0)
|
||
+ && thr->ts.level < gomp_bind_var_list_len)
|
||
+ bind_var = gomp_bind_var_list[thr->ts.level];
|
||
gomp_init_task (thr->task, task, icv);
|
||
team->implicit_task[0].icv.nthreads_var = nthreads_var;
|
||
+ team->implicit_task[0].icv.bind_var = bind_var;
|
||
|
||
if (nthreads == 1)
|
||
return;
|
||
|
||
i = 1;
|
||
|
||
+ if (__builtin_expect (gomp_places_list != NULL, 0))
|
||
+ {
|
||
+ /* Depending on chosen proc_bind model, set subpartition
|
||
+ for the master thread and initialize helper variables
|
||
+ P and optionally S, K and/or REST used by later place
|
||
+ computation for each additional thread. */
|
||
+ p = thr->place - 1;
|
||
+ switch (bind)
|
||
+ {
|
||
+ case omp_proc_bind_true:
|
||
+ case omp_proc_bind_close:
|
||
+ if (nthreads > thr->ts.place_partition_len)
|
||
+ {
|
||
+ /* T > P. S threads will be placed in each place,
|
||
+ and the final REM threads placed one by one
|
||
+ into the already occupied places. */
|
||
+ s = nthreads / thr->ts.place_partition_len;
|
||
+ rest = nthreads % thr->ts.place_partition_len;
|
||
+ }
|
||
+ else
|
||
+ s = 1;
|
||
+ k = 1;
|
||
+ break;
|
||
+ case omp_proc_bind_master:
|
||
+ /* Each thread will be bound to master's place. */
|
||
+ break;
|
||
+ case omp_proc_bind_spread:
|
||
+ if (nthreads <= thr->ts.place_partition_len)
|
||
+ {
|
||
+ /* T <= P. Each subpartition will have in between s
|
||
+ and s+1 places (subpartitions starting at or
|
||
+ after rest will have s places, earlier s+1 places),
|
||
+ each thread will be bound to the first place in
|
||
+ its subpartition (except for the master thread
|
||
+ that can be bound to another place in its
|
||
+ subpartition). */
|
||
+ s = thr->ts.place_partition_len / nthreads;
|
||
+ rest = thr->ts.place_partition_len % nthreads;
|
||
+ rest = (s + 1) * rest + thr->ts.place_partition_off;
|
||
+ if (p < rest)
|
||
+ {
|
||
+ p -= (p - thr->ts.place_partition_off) % (s + 1);
|
||
+ thr->ts.place_partition_len = s + 1;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ p -= (p - rest) % s;
|
||
+ thr->ts.place_partition_len = s;
|
||
+ }
|
||
+ thr->ts.place_partition_off = p;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* T > P. Each subpartition will have just a single
|
||
+ place and we'll place between s and s+1
|
||
+ threads into each subpartition. */
|
||
+ s = nthreads / thr->ts.place_partition_len;
|
||
+ rest = nthreads % thr->ts.place_partition_len;
|
||
+ thr->ts.place_partition_off = p;
|
||
+ thr->ts.place_partition_len = 1;
|
||
+ k = 1;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ bind = omp_proc_bind_false;
|
||
+
|
||
/* We only allow the reuse of idle threads for non-nested PARALLEL
|
||
regions. This appears to be implied by the semantics of
|
||
threadprivate variables, but perhaps that's reading too much into
|
||
@@ -333,47 +436,244 @@ gomp_team_start (void (*fn) (void *), vo
|
||
team will exit. */
|
||
pool->threads_used = nthreads;
|
||
|
||
+ /* If necessary, expand the size of the gomp_threads array. It is
|
||
+ expected that changes in the number of threads are rare, thus we
|
||
+ make no effort to expand gomp_threads_size geometrically. */
|
||
+ if (nthreads >= pool->threads_size)
|
||
+ {
|
||
+ pool->threads_size = nthreads + 1;
|
||
+ pool->threads
|
||
+ = gomp_realloc (pool->threads,
|
||
+ pool->threads_size
|
||
+ * sizeof (struct gomp_thread_data *));
|
||
+ }
|
||
+
|
||
/* Release existing idle threads. */
|
||
for (; i < n; ++i)
|
||
{
|
||
- nthr = pool->threads[i];
|
||
+ unsigned int place_partition_off = thr->ts.place_partition_off;
|
||
+ unsigned int place_partition_len = thr->ts.place_partition_len;
|
||
+ unsigned int place = 0;
|
||
+ if (__builtin_expect (gomp_places_list != NULL, 0))
|
||
+ {
|
||
+ switch (bind)
|
||
+ {
|
||
+ case omp_proc_bind_true:
|
||
+ case omp_proc_bind_close:
|
||
+ if (k == s)
|
||
+ {
|
||
+ ++p;
|
||
+ if (p == (team->prev_ts.place_partition_off
|
||
+ + team->prev_ts.place_partition_len))
|
||
+ p = team->prev_ts.place_partition_off;
|
||
+ k = 1;
|
||
+ if (i == nthreads - rest)
|
||
+ s = 1;
|
||
+ }
|
||
+ else
|
||
+ ++k;
|
||
+ break;
|
||
+ case omp_proc_bind_master:
|
||
+ break;
|
||
+ case omp_proc_bind_spread:
|
||
+ if (k == 0)
|
||
+ {
|
||
+ /* T <= P. */
|
||
+ if (p < rest)
|
||
+ p += s + 1;
|
||
+ else
|
||
+ p += s;
|
||
+ if (p == (team->prev_ts.place_partition_off
|
||
+ + team->prev_ts.place_partition_len))
|
||
+ p = team->prev_ts.place_partition_off;
|
||
+ place_partition_off = p;
|
||
+ if (p < rest)
|
||
+ place_partition_len = s + 1;
|
||
+ else
|
||
+ place_partition_len = s;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* T > P. */
|
||
+ if (k == s)
|
||
+ {
|
||
+ ++p;
|
||
+ if (p == (team->prev_ts.place_partition_off
|
||
+ + team->prev_ts.place_partition_len))
|
||
+ p = team->prev_ts.place_partition_off;
|
||
+ k = 1;
|
||
+ if (i == nthreads - rest)
|
||
+ s = 1;
|
||
+ }
|
||
+ else
|
||
+ ++k;
|
||
+ place_partition_off = p;
|
||
+ place_partition_len = 1;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+ if (affinity_thr != NULL
|
||
+ || (bind != omp_proc_bind_true
|
||
+ && pool->threads[i]->place != p + 1)
|
||
+ || pool->threads[i]->place <= place_partition_off
|
||
+ || pool->threads[i]->place > (place_partition_off
|
||
+ + place_partition_len))
|
||
+ {
|
||
+ unsigned int l;
|
||
+ if (affinity_thr == NULL)
|
||
+ {
|
||
+ unsigned int j;
|
||
+
|
||
+ if (team->prev_ts.place_partition_len > 64)
|
||
+ affinity_thr
|
||
+ = gomp_malloc (team->prev_ts.place_partition_len
|
||
+ * sizeof (struct gomp_thread *));
|
||
+ else
|
||
+ affinity_thr
|
||
+ = gomp_alloca (team->prev_ts.place_partition_len
|
||
+ * sizeof (struct gomp_thread *));
|
||
+ memset (affinity_thr, '\0',
|
||
+ team->prev_ts.place_partition_len
|
||
+ * sizeof (struct gomp_thread *));
|
||
+ for (j = i; j < old_threads_used; j++)
|
||
+ {
|
||
+ if (pool->threads[j]->place
|
||
+ > team->prev_ts.place_partition_off
|
||
+ && (pool->threads[j]->place
|
||
+ <= (team->prev_ts.place_partition_off
|
||
+ + team->prev_ts.place_partition_len)))
|
||
+ {
|
||
+ l = pool->threads[j]->place - 1
|
||
+ - team->prev_ts.place_partition_off;
|
||
+ pool->threads[j]->data = affinity_thr[l];
|
||
+ affinity_thr[l] = pool->threads[j];
|
||
+ }
|
||
+ pool->threads[j] = NULL;
|
||
+ }
|
||
+ if (nthreads > old_threads_used)
|
||
+ memset (&pool->threads[old_threads_used],
|
||
+ '\0', ((nthreads - old_threads_used)
|
||
+ * sizeof (struct gomp_thread *)));
|
||
+ n = nthreads;
|
||
+ affinity_count = old_threads_used - i;
|
||
+ }
|
||
+ if (affinity_count == 0)
|
||
+ break;
|
||
+ l = p;
|
||
+ if (affinity_thr[l - team->prev_ts.place_partition_off]
|
||
+ == NULL)
|
||
+ {
|
||
+ if (bind != omp_proc_bind_true)
|
||
+ continue;
|
||
+ for (l = place_partition_off;
|
||
+ l < place_partition_off + place_partition_len;
|
||
+ l++)
|
||
+ if (affinity_thr[l - team->prev_ts.place_partition_off]
|
||
+ != NULL)
|
||
+ break;
|
||
+ if (l == place_partition_off + place_partition_len)
|
||
+ continue;
|
||
+ }
|
||
+ nthr = affinity_thr[l - team->prev_ts.place_partition_off];
|
||
+ affinity_thr[l - team->prev_ts.place_partition_off]
|
||
+ = (struct gomp_thread *) nthr->data;
|
||
+ affinity_count--;
|
||
+ pool->threads[i] = nthr;
|
||
+ }
|
||
+ else
|
||
+ nthr = pool->threads[i];
|
||
+ place = p + 1;
|
||
+ }
|
||
+ else
|
||
+ nthr = pool->threads[i];
|
||
nthr->ts.team = team;
|
||
nthr->ts.work_share = &team->work_shares[0];
|
||
nthr->ts.last_work_share = NULL;
|
||
nthr->ts.team_id = i;
|
||
nthr->ts.level = team->prev_ts.level + 1;
|
||
nthr->ts.active_level = thr->ts.active_level;
|
||
+ nthr->ts.place_partition_off = place_partition_off;
|
||
+ nthr->ts.place_partition_len = place_partition_len;
|
||
#ifdef HAVE_SYNC_BUILTINS
|
||
nthr->ts.single_count = 0;
|
||
#endif
|
||
nthr->ts.static_trip = 0;
|
||
nthr->task = &team->implicit_task[i];
|
||
+ nthr->place = place;
|
||
gomp_init_task (nthr->task, task, icv);
|
||
team->implicit_task[i].icv.nthreads_var = nthreads_var;
|
||
+ team->implicit_task[i].icv.bind_var = bind_var;
|
||
nthr->fn = fn;
|
||
nthr->data = data;
|
||
team->ordered_release[i] = &nthr->release;
|
||
}
|
||
|
||
+ if (__builtin_expect (affinity_thr != NULL, 0))
|
||
+ {
|
||
+ /* If AFFINITY_THR is non-NULL just because we had to
|
||
+ permute some threads in the pool, but we've managed
|
||
+ to find exactly as many old threads as we'd find
|
||
+ without affinity, we don't need to handle this
|
||
+ specially anymore. */
|
||
+ if (nthreads <= old_threads_used
|
||
+ ? (affinity_count == old_threads_used - nthreads)
|
||
+ : (i == old_threads_used))
|
||
+ {
|
||
+ if (team->prev_ts.place_partition_len > 64)
|
||
+ free (affinity_thr);
|
||
+ affinity_thr = NULL;
|
||
+ affinity_count = 0;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ i = 1;
|
||
+ /* We are going to compute the places/subpartitions
|
||
+ again from the beginning. So, we need to reinitialize
|
||
+ vars modified by the switch (bind) above inside
|
||
+ of the loop, to the state they had after the initial
|
||
+ switch (bind). */
|
||
+ switch (bind)
|
||
+ {
|
||
+ case omp_proc_bind_true:
|
||
+ case omp_proc_bind_close:
|
||
+ if (nthreads > thr->ts.place_partition_len)
|
||
+ /* T > P. S has been changed, so needs
|
||
+ to be recomputed. */
|
||
+ s = nthreads / thr->ts.place_partition_len;
|
||
+ k = 1;
|
||
+ p = thr->place - 1;
|
||
+ break;
|
||
+ case omp_proc_bind_master:
|
||
+ /* No vars have been changed. */
|
||
+ break;
|
||
+ case omp_proc_bind_spread:
|
||
+ p = thr->ts.place_partition_off;
|
||
+ if (k != 0)
|
||
+ {
|
||
+ /* T > P. */
|
||
+ s = nthreads / team->prev_ts.place_partition_len;
|
||
+ k = 1;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ /* Increase the barrier threshold to make sure all new
|
||
+ threads and all the threads we're going to let die
|
||
+ arrive before the team is released. */
|
||
+ if (affinity_count)
|
||
+ gomp_barrier_reinit (&pool->threads_dock,
|
||
+ nthreads + affinity_count);
|
||
+ }
|
||
+ }
|
||
+
|
||
if (i == nthreads)
|
||
goto do_release;
|
||
|
||
- /* If necessary, expand the size of the gomp_threads array. It is
|
||
- expected that changes in the number of threads are rare, thus we
|
||
- make no effort to expand gomp_threads_size geometrically. */
|
||
- if (nthreads >= pool->threads_size)
|
||
- {
|
||
- pool->threads_size = nthreads + 1;
|
||
- pool->threads
|
||
- = gomp_realloc (pool->threads,
|
||
- pool->threads_size
|
||
- * sizeof (struct gomp_thread_data *));
|
||
- }
|
||
}
|
||
|
||
- if (__builtin_expect (nthreads > old_threads_used, 0))
|
||
+ if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
|
||
{
|
||
- long diff = (long) nthreads - (long) old_threads_used;
|
||
+ long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
|
||
|
||
if (old_threads_used == 0)
|
||
--diff;
|
||
@@ -381,14 +681,14 @@ gomp_team_start (void (*fn) (void *), vo
|
||
#ifdef HAVE_SYNC_BUILTINS
|
||
__sync_fetch_and_add (&gomp_managed_threads, diff);
|
||
#else
|
||
- gomp_mutex_lock (&gomp_remaining_threads_lock);
|
||
+ gomp_mutex_lock (&gomp_managed_threads_lock);
|
||
gomp_managed_threads += diff;
|
||
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
|
||
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
|
||
#endif
|
||
}
|
||
|
||
attr = &gomp_thread_attr;
|
||
- if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
|
||
+ if (__builtin_expect (gomp_places_list != NULL, 0))
|
||
{
|
||
size_t stacksize;
|
||
pthread_attr_init (&thread_attr);
|
||
@@ -402,11 +702,78 @@ gomp_team_start (void (*fn) (void *), vo
|
||
* (nthreads-i));
|
||
|
||
/* Launch new threads. */
|
||
- for (; i < nthreads; ++i, ++start_data)
|
||
+ for (; i < nthreads; ++i)
|
||
{
|
||
pthread_t pt;
|
||
int err;
|
||
|
||
+ start_data->ts.place_partition_off = thr->ts.place_partition_off;
|
||
+ start_data->ts.place_partition_len = thr->ts.place_partition_len;
|
||
+ start_data->place = 0;
|
||
+ if (__builtin_expect (gomp_places_list != NULL, 0))
|
||
+ {
|
||
+ switch (bind)
|
||
+ {
|
||
+ case omp_proc_bind_true:
|
||
+ case omp_proc_bind_close:
|
||
+ if (k == s)
|
||
+ {
|
||
+ ++p;
|
||
+ if (p == (team->prev_ts.place_partition_off
|
||
+ + team->prev_ts.place_partition_len))
|
||
+ p = team->prev_ts.place_partition_off;
|
||
+ k = 1;
|
||
+ if (i == nthreads - rest)
|
||
+ s = 1;
|
||
+ }
|
||
+ else
|
||
+ ++k;
|
||
+ break;
|
||
+ case omp_proc_bind_master:
|
||
+ break;
|
||
+ case omp_proc_bind_spread:
|
||
+ if (k == 0)
|
||
+ {
|
||
+ /* T <= P. */
|
||
+ if (p < rest)
|
||
+ p += s + 1;
|
||
+ else
|
||
+ p += s;
|
||
+ if (p == (team->prev_ts.place_partition_off
|
||
+ + team->prev_ts.place_partition_len))
|
||
+ p = team->prev_ts.place_partition_off;
|
||
+ start_data->ts.place_partition_off = p;
|
||
+ if (p < rest)
|
||
+ start_data->ts.place_partition_len = s + 1;
|
||
+ else
|
||
+ start_data->ts.place_partition_len = s;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* T > P. */
|
||
+ if (k == s)
|
||
+ {
|
||
+ ++p;
|
||
+ if (p == (team->prev_ts.place_partition_off
|
||
+ + team->prev_ts.place_partition_len))
|
||
+ p = team->prev_ts.place_partition_off;
|
||
+ k = 1;
|
||
+ if (i == nthreads - rest)
|
||
+ s = 1;
|
||
+ }
|
||
+ else
|
||
+ ++k;
|
||
+ start_data->ts.place_partition_off = p;
|
||
+ start_data->ts.place_partition_len = 1;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+ start_data->place = p + 1;
|
||
+ if (affinity_thr != NULL && pool->threads[i] != NULL)
|
||
+ continue;
|
||
+ gomp_init_thread_affinity (attr, p);
|
||
+ }
|
||
+
|
||
start_data->fn = fn;
|
||
start_data->fn_data = data;
|
||
start_data->ts.team = team;
|
||
@@ -422,18 +789,16 @@ gomp_team_start (void (*fn) (void *), vo
|
||
start_data->task = &team->implicit_task[i];
|
||
gomp_init_task (start_data->task, task, icv);
|
||
team->implicit_task[i].icv.nthreads_var = nthreads_var;
|
||
+ team->implicit_task[i].icv.bind_var = bind_var;
|
||
start_data->thread_pool = pool;
|
||
start_data->nested = nested;
|
||
|
||
- if (gomp_cpu_affinity != NULL)
|
||
- gomp_init_thread_affinity (attr);
|
||
-
|
||
- err = pthread_create (&pt, attr, gomp_thread_start, start_data);
|
||
+ err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
|
||
if (err != 0)
|
||
gomp_fatal ("Thread creation failed: %s", strerror (err));
|
||
}
|
||
|
||
- if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
|
||
+ if (__builtin_expect (gomp_places_list != NULL, 0))
|
||
pthread_attr_destroy (&thread_attr);
|
||
|
||
do_release:
|
||
@@ -442,21 +807,32 @@ gomp_team_start (void (*fn) (void *), vo
|
||
/* Decrease the barrier threshold to match the number of threads
|
||
that should arrive back at the end of this team. The extra
|
||
threads should be exiting. Note that we arrange for this test
|
||
- to never be true for nested teams. */
|
||
- if (__builtin_expect (nthreads < old_threads_used, 0))
|
||
+ to never be true for nested teams. If AFFINITY_COUNT is non-zero,
|
||
+ the barrier as well as gomp_managed_threads was temporarily
|
||
+ set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
|
||
+ AFFINITY_COUNT if non-zero will be always at least
|
||
+ OLD_THREADS_COUNT - NTHREADS. */
|
||
+ if (__builtin_expect (nthreads < old_threads_used, 0)
|
||
+ || __builtin_expect (affinity_count, 0))
|
||
{
|
||
long diff = (long) nthreads - (long) old_threads_used;
|
||
|
||
+ if (affinity_count)
|
||
+ diff = -affinity_count;
|
||
+
|
||
gomp_barrier_reinit (&pool->threads_dock, nthreads);
|
||
|
||
#ifdef HAVE_SYNC_BUILTINS
|
||
__sync_fetch_and_add (&gomp_managed_threads, diff);
|
||
#else
|
||
- gomp_mutex_lock (&gomp_remaining_threads_lock);
|
||
+ gomp_mutex_lock (&gomp_managed_threads_lock);
|
||
gomp_managed_threads += diff;
|
||
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
|
||
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
|
||
#endif
|
||
}
|
||
+ if (__builtin_expect (affinity_thr != NULL, 0)
|
||
+ && team->prev_ts.place_partition_len > 64)
|
||
+ free (affinity_thr);
|
||
}
|
||
|
||
|
||
@@ -469,9 +845,26 @@ gomp_team_end (void)
|
||
struct gomp_thread *thr = gomp_thread ();
|
||
struct gomp_team *team = thr->ts.team;
|
||
|
||
- /* This barrier handles all pending explicit threads. */
|
||
- gomp_team_barrier_wait (&team->barrier);
|
||
- gomp_fini_work_share (thr->ts.work_share);
|
||
+ /* This barrier handles all pending explicit threads.
|
||
+ As #pragma omp cancel parallel might get awaited count in
|
||
+ team->barrier in a inconsistent state, we need to use a different
|
||
+ counter here. */
|
||
+ gomp_team_barrier_wait_final (&team->barrier);
|
||
+ if (__builtin_expect (team->team_cancelled, 0))
|
||
+ {
|
||
+ struct gomp_work_share *ws = team->work_shares_to_free;
|
||
+ do
|
||
+ {
|
||
+ struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
|
||
+ if (next_ws == NULL)
|
||
+ gomp_ptrlock_set (&ws->next_ws, ws);
|
||
+ gomp_fini_work_share (ws);
|
||
+ ws = next_ws;
|
||
+ }
|
||
+ while (ws != NULL);
|
||
+ }
|
||
+ else
|
||
+ gomp_fini_work_share (thr->ts.work_share);
|
||
|
||
gomp_end_task ();
|
||
thr->ts = team->prev_ts;
|
||
@@ -481,9 +874,9 @@ gomp_team_end (void)
|
||
#ifdef HAVE_SYNC_BUILTINS
|
||
__sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
|
||
#else
|
||
- gomp_mutex_lock (&gomp_remaining_threads_lock);
|
||
+ gomp_mutex_lock (&gomp_managed_threads_lock);
|
||
gomp_managed_threads -= team->nthreads - 1L;
|
||
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
|
||
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
|
||
#endif
|
||
/* This barrier has gomp_barrier_wait_last counterparts
|
||
and ensures the team can be safely destroyed. */
|
||
@@ -524,8 +917,6 @@ gomp_team_end (void)
|
||
static void __attribute__((constructor))
|
||
initialize_team (void)
|
||
{
|
||
- struct gomp_thread *thr;
|
||
-
|
||
#ifndef HAVE_TLS
|
||
static struct gomp_thread initial_thread_tls_data;
|
||
|
||
@@ -535,13 +926,6 @@ initialize_team (void)
|
||
|
||
if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
|
||
gomp_fatal ("could not create thread pool destructor.");
|
||
-
|
||
-#ifdef HAVE_TLS
|
||
- thr = &gomp_tls_data;
|
||
-#else
|
||
- thr = &initial_thread_tls_data;
|
||
-#endif
|
||
- gomp_sem_init (&thr->release, 0);
|
||
}
|
||
|
||
static void __attribute__((destructor))
|
||
--- libgomp/testsuite/libgomp.fortran/lib1.f90.jj 2009-01-14 12:03:59.000000000 +0100
|
||
+++ libgomp/testsuite/libgomp.fortran/lib1.f90 2014-05-15 13:12:53.902857213 +0200
|
||
@@ -66,6 +66,7 @@
|
||
!$omp parallel reduction (.or.:l) if (.true.)
|
||
l = .not. omp_in_parallel ()
|
||
!$omp end parallel
|
||
+ if (l) call abort
|
||
|
||
e = omp_get_wtime ()
|
||
if (d .gt. e) call abort
|
||
--- libgomp/testsuite/libgomp.fortran/lib3.f.jj 2009-01-14 12:03:59.000000000 +0100
|
||
+++ libgomp/testsuite/libgomp.fortran/lib3.f 2014-05-15 13:12:53.903857207 +0200
|
||
@@ -66,6 +66,7 @@ C$OMP END PARALLEL
|
||
C$OMP PARALLEL REDUCTION (.OR.:L) IF (.TRUE.)
|
||
L = .NOT. OMP_IN_PARALLEL ()
|
||
C$OMP END PARALLEL
|
||
+ IF (L) CALL ABORT
|
||
|
||
E = OMP_GET_WTIME ()
|
||
IF (D .GT. E) CALL ABORT
|
||
--- libgomp/testsuite/libgomp.fortran/lib2.f.jj 2009-01-14 12:03:59.000000000 +0100
|
||
+++ libgomp/testsuite/libgomp.fortran/lib2.f 2014-05-15 13:12:53.902857213 +0200
|
||
@@ -66,6 +66,7 @@ C$OMP END PARALLEL
|
||
C$OMP PARALLEL REDUCTION (.OR.:L) IF (.TRUE.)
|
||
L = .NOT. OMP_IN_PARALLEL ()
|
||
C$OMP END PARALLEL
|
||
+ IF (L) CALL ABORT
|
||
|
||
E = OMP_GET_WTIME ()
|
||
IF (D .GT. E) CALL ABORT
|
||
--- libgomp/testsuite/libgomp.c/lib-1.c.jj 2009-01-14 12:03:59.000000000 +0100
|
||
+++ libgomp/testsuite/libgomp.c/lib-1.c 2014-05-15 13:12:53.886857306 +0200
|
||
@@ -85,6 +85,8 @@ main (void)
|
||
l = ! omp_in_parallel ();
|
||
#pragma omp parallel reduction (|:l) if (1)
|
||
l = ! omp_in_parallel ();
|
||
+ if (l)
|
||
+ abort ();
|
||
|
||
e = omp_get_wtime ();
|
||
if (d > e)
|
||
--- libgomp/Makefile.am.jj 2009-01-14 12:04:00.000000000 +0100
|
||
+++ libgomp/Makefile.am 2014-05-15 13:12:46.026898921 +0200
|
||
@@ -33,7 +33,7 @@ libgomp_la_LDFLAGS = $(libgomp_version_i
|
||
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
|
||
iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
|
||
task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
|
||
- time.c fortran.c affinity.c
|
||
+ time.c fortran.c affinity.c target.c
|
||
|
||
nodist_noinst_HEADERS = libgomp_f.h
|
||
nodist_libsubinclude_HEADERS = omp.h
|
||
--- libgomp/task.c.jj 2014-05-15 11:39:33.650782065 +0200
|
||
+++ libgomp/task.c 2014-08-06 11:59:40.869212552 +0200
|
||
@@ -29,6 +29,33 @@
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
|
||
+typedef struct gomp_task_depend_entry *hash_entry_type;
|
||
+
|
||
+static inline void *
|
||
+htab_alloc (size_t size)
|
||
+{
|
||
+ return gomp_malloc (size);
|
||
+}
|
||
+
|
||
+static inline void
|
||
+htab_free (void *ptr)
|
||
+{
|
||
+ free (ptr);
|
||
+}
|
||
+
|
||
+#include "hashtab.h"
|
||
+
|
||
+static inline hashval_t
|
||
+htab_hash (hash_entry_type element)
|
||
+{
|
||
+ return hash_pointer (element->addr);
|
||
+}
|
||
+
|
||
+static inline bool
|
||
+htab_eq (hash_entry_type x, hash_entry_type y)
|
||
+{
|
||
+ return x->addr == y->addr;
|
||
+}
|
||
|
||
/* Create a new task data structure. */
|
||
|
||
@@ -39,11 +66,16 @@ gomp_init_task (struct gomp_task *task,
|
||
task->parent = parent_task;
|
||
task->icv = *prev_icv;
|
||
task->kind = GOMP_TASK_IMPLICIT;
|
||
- task->in_taskwait = false;
|
||
+ task->taskwait = NULL;
|
||
task->in_tied_task = false;
|
||
task->final_task = false;
|
||
+ task->copy_ctors_done = false;
|
||
+ task->parent_depends_on = false;
|
||
task->children = NULL;
|
||
- gomp_sem_init (&task->taskwait_sem, 0);
|
||
+ task->taskgroup = NULL;
|
||
+ task->dependers = NULL;
|
||
+ task->depend_hash = NULL;
|
||
+ task->depend_count = 0;
|
||
}
|
||
|
||
/* Clean up a task, after completing it. */
|
||
@@ -72,13 +104,16 @@ gomp_clear_parent (struct gomp_task *chi
|
||
while (task != children);
|
||
}
|
||
|
||
+static void gomp_task_maybe_wait_for_dependencies (void **depend);
|
||
+
|
||
/* Called when encountering an explicit task directive. If IF_CLAUSE is
|
||
false, then we must not delay in executing the task. If UNTIED is true,
|
||
then the task may be executed by any member of the team. */
|
||
|
||
void
|
||
GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
||
- long arg_size, long arg_align, bool if_clause, unsigned flags)
|
||
+ long arg_size, long arg_align, bool if_clause, unsigned flags,
|
||
+ void **depend)
|
||
{
|
||
struct gomp_thread *thr = gomp_thread ();
|
||
struct gomp_team *team = thr->ts.team;
|
||
@@ -94,17 +129,35 @@ GOMP_task (void (*fn) (void *), void *da
|
||
flags &= ~1;
|
||
#endif
|
||
|
||
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
|
||
+ if (team
|
||
+ && (gomp_team_barrier_cancelled (&team->barrier)
|
||
+ || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
|
||
+ return;
|
||
+
|
||
if (!if_clause || team == NULL
|
||
|| (thr->task && thr->task->final_task)
|
||
|| team->task_count > 64 * team->nthreads)
|
||
{
|
||
struct gomp_task task;
|
||
|
||
+ /* If there are depend clauses and earlier deferred sibling tasks
|
||
+ with depend clauses, check if there isn't a dependency. If there
|
||
+ is, we need to wait for them. There is no need to handle
|
||
+ depend clauses for non-deferred tasks other than this, because
|
||
+ the parent task is suspended until the child task finishes and thus
|
||
+ it can't start further child tasks. */
|
||
+ if ((flags & 8) && thr->task && thr->task->depend_hash)
|
||
+ gomp_task_maybe_wait_for_dependencies (depend);
|
||
+
|
||
gomp_init_task (&task, thr->task, gomp_icv (false));
|
||
task.kind = GOMP_TASK_IFFALSE;
|
||
task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
|
||
if (thr->task)
|
||
- task.in_tied_task = thr->task->in_tied_task;
|
||
+ {
|
||
+ task.in_tied_task = thr->task->in_tied_task;
|
||
+ task.taskgroup = thr->task->taskgroup;
|
||
+ }
|
||
thr->task = &task;
|
||
if (__builtin_expect (cpyfn != NULL, 0))
|
||
{
|
||
@@ -128,27 +181,174 @@ GOMP_task (void (*fn) (void *), void *da
|
||
{
|
||
struct gomp_task *task;
|
||
struct gomp_task *parent = thr->task;
|
||
+ struct gomp_taskgroup *taskgroup = parent->taskgroup;
|
||
char *arg;
|
||
bool do_wake;
|
||
+ size_t depend_size = 0;
|
||
|
||
- task = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
|
||
- arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
|
||
+ if (flags & 8)
|
||
+ depend_size = ((uintptr_t) depend[0]
|
||
+ * sizeof (struct gomp_task_depend_entry));
|
||
+ task = gomp_malloc (sizeof (*task) + depend_size
|
||
+ + arg_size + arg_align - 1);
|
||
+ arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
|
||
& ~(uintptr_t) (arg_align - 1));
|
||
gomp_init_task (task, parent, gomp_icv (false));
|
||
task->kind = GOMP_TASK_IFFALSE;
|
||
task->in_tied_task = parent->in_tied_task;
|
||
+ task->taskgroup = taskgroup;
|
||
thr->task = task;
|
||
if (cpyfn)
|
||
- cpyfn (arg, data);
|
||
+ {
|
||
+ cpyfn (arg, data);
|
||
+ task->copy_ctors_done = true;
|
||
+ }
|
||
else
|
||
memcpy (arg, data, arg_size);
|
||
thr->task = parent;
|
||
task->kind = GOMP_TASK_WAITING;
|
||
task->fn = fn;
|
||
task->fn_data = arg;
|
||
- task->in_tied_task = true;
|
||
task->final_task = (flags & 2) >> 1;
|
||
gomp_mutex_lock (&team->task_lock);
|
||
+ /* If parallel or taskgroup has been cancelled, don't start new
|
||
+ tasks. */
|
||
+ if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
|
||
+ || (taskgroup && taskgroup->cancelled))
|
||
+ && !task->copy_ctors_done, 0))
|
||
+ {
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ gomp_finish_task (task);
|
||
+ free (task);
|
||
+ return;
|
||
+ }
|
||
+ if (taskgroup)
|
||
+ taskgroup->num_children++;
|
||
+ if (depend_size)
|
||
+ {
|
||
+ size_t ndepend = (uintptr_t) depend[0];
|
||
+ size_t nout = (uintptr_t) depend[1];
|
||
+ size_t i;
|
||
+ hash_entry_type ent;
|
||
+
|
||
+ task->depend_count = ndepend;
|
||
+ task->num_dependees = 0;
|
||
+ if (parent->depend_hash == NULL)
|
||
+ parent->depend_hash
|
||
+ = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
|
||
+ for (i = 0; i < ndepend; i++)
|
||
+ {
|
||
+ task->depend[i].addr = depend[2 + i];
|
||
+ task->depend[i].next = NULL;
|
||
+ task->depend[i].prev = NULL;
|
||
+ task->depend[i].task = task;
|
||
+ task->depend[i].is_in = i >= nout;
|
||
+ task->depend[i].redundant = false;
|
||
+ task->depend[i].redundant_out = false;
|
||
+
|
||
+ hash_entry_type *slot
|
||
+ = htab_find_slot (&parent->depend_hash, &task->depend[i],
|
||
+ INSERT);
|
||
+ hash_entry_type out = NULL, last = NULL;
|
||
+ if (*slot)
|
||
+ {
|
||
+ /* If multiple depends on the same task are the
|
||
+ same, all but the first one are redundant.
|
||
+ As inout/out come first, if any of them is
|
||
+ inout/out, it will win, which is the right
|
||
+ semantics. */
|
||
+ if ((*slot)->task == task)
|
||
+ {
|
||
+ task->depend[i].redundant = true;
|
||
+ continue;
|
||
+ }
|
||
+ for (ent = *slot; ent; ent = ent->next)
|
||
+ {
|
||
+ if (ent->redundant_out)
|
||
+ break;
|
||
+
|
||
+ last = ent;
|
||
+
|
||
+ /* depend(in:...) doesn't depend on earlier
|
||
+ depend(in:...). */
|
||
+ if (i >= nout && ent->is_in)
|
||
+ continue;
|
||
+
|
||
+ if (!ent->is_in)
|
||
+ out = ent;
|
||
+
|
||
+ struct gomp_task *tsk = ent->task;
|
||
+ if (tsk->dependers == NULL)
|
||
+ {
|
||
+ tsk->dependers
|
||
+ = gomp_malloc (sizeof (struct gomp_dependers_vec)
|
||
+ + 6 * sizeof (struct gomp_task *));
|
||
+ tsk->dependers->n_elem = 1;
|
||
+ tsk->dependers->allocated = 6;
|
||
+ tsk->dependers->elem[0] = task;
|
||
+ task->num_dependees++;
|
||
+ continue;
|
||
+ }
|
||
+ /* We already have some other dependency on tsk
|
||
+ from earlier depend clause. */
|
||
+ else if (tsk->dependers->n_elem
|
||
+ && (tsk->dependers->elem[tsk->dependers->n_elem
|
||
+ - 1]
|
||
+ == task))
|
||
+ continue;
|
||
+ else if (tsk->dependers->n_elem
|
||
+ == tsk->dependers->allocated)
|
||
+ {
|
||
+ tsk->dependers->allocated
|
||
+ = tsk->dependers->allocated * 2 + 2;
|
||
+ tsk->dependers
|
||
+ = gomp_realloc (tsk->dependers,
|
||
+ sizeof (struct gomp_dependers_vec)
|
||
+ + (tsk->dependers->allocated
|
||
+ * sizeof (struct gomp_task *)));
|
||
+ }
|
||
+ tsk->dependers->elem[tsk->dependers->n_elem++] = task;
|
||
+ task->num_dependees++;
|
||
+ }
|
||
+ task->depend[i].next = *slot;
|
||
+ (*slot)->prev = &task->depend[i];
|
||
+ }
|
||
+ *slot = &task->depend[i];
|
||
+
|
||
+ /* There is no need to store more than one depend({,in}out:)
|
||
+ task per address in the hash table chain for the purpose
|
||
+ of creation of deferred tasks, because each out
|
||
+ depends on all earlier outs, thus it is enough to record
|
||
+ just the last depend({,in}out:). For depend(in:), we need
|
||
+ to keep all of the previous ones not terminated yet, because
|
||
+ a later depend({,in}out:) might need to depend on all of
|
||
+ them. So, if the new task's clause is depend({,in}out:),
|
||
+ we know there is at most one other depend({,in}out:) clause
|
||
+ in the list (out). For non-deferred tasks we want to see
|
||
+ all outs, so they are moved to the end of the chain,
|
||
+ after first redundant_out entry all following entries
|
||
+ should be redundant_out. */
|
||
+ if (!task->depend[i].is_in && out)
|
||
+ {
|
||
+ if (out != last)
|
||
+ {
|
||
+ out->next->prev = out->prev;
|
||
+ out->prev->next = out->next;
|
||
+ out->next = last->next;
|
||
+ out->prev = last;
|
||
+ last->next = out;
|
||
+ if (out->next)
|
||
+ out->next->prev = out;
|
||
+ }
|
||
+ out->redundant_out = true;
|
||
+ }
|
||
+ }
|
||
+ if (task->num_dependees)
|
||
+ {
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ return;
|
||
+ }
|
||
+ }
|
||
if (parent->children)
|
||
{
|
||
task->next_child = parent->children;
|
||
@@ -162,6 +362,22 @@ GOMP_task (void (*fn) (void *), void *da
|
||
task->prev_child = task;
|
||
}
|
||
parent->children = task;
|
||
+ if (taskgroup)
|
||
+ {
|
||
+ if (taskgroup->children)
|
||
+ {
|
||
+ task->next_taskgroup = taskgroup->children;
|
||
+ task->prev_taskgroup = taskgroup->children->prev_taskgroup;
|
||
+ task->next_taskgroup->prev_taskgroup = task;
|
||
+ task->prev_taskgroup->next_taskgroup = task;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ task->next_taskgroup = task;
|
||
+ task->prev_taskgroup = task;
|
||
+ }
|
||
+ taskgroup->children = task;
|
||
+ }
|
||
if (team->task_queue)
|
||
{
|
||
task->next_queue = team->task_queue;
|
||
@@ -176,6 +392,7 @@ GOMP_task (void (*fn) (void *), void *da
|
||
team->task_queue = task;
|
||
}
|
||
++team->task_count;
|
||
+ ++team->task_queued_count;
|
||
gomp_team_barrier_set_task_pending (&team->barrier);
|
||
do_wake = team->task_running_count + !parent->in_tied_task
|
||
< team->nthreads;
|
||
@@ -185,6 +402,255 @@ GOMP_task (void (*fn) (void *), void *da
|
||
}
|
||
}
|
||
|
||
+static inline bool
|
||
+gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
|
||
+ struct gomp_taskgroup *taskgroup, struct gomp_team *team)
|
||
+{
|
||
+ if (parent)
|
||
+ {
|
||
+ if (parent->children == child_task)
|
||
+ parent->children = child_task->next_child;
|
||
+ if (__builtin_expect (child_task->parent_depends_on, 0)
|
||
+ && parent->taskwait->last_parent_depends_on == child_task)
|
||
+ {
|
||
+ if (child_task->prev_child->kind == GOMP_TASK_WAITING
|
||
+ && child_task->prev_child->parent_depends_on)
|
||
+ parent->taskwait->last_parent_depends_on = child_task->prev_child;
|
||
+ else
|
||
+ parent->taskwait->last_parent_depends_on = NULL;
|
||
+ }
|
||
+ }
|
||
+ if (taskgroup && taskgroup->children == child_task)
|
||
+ taskgroup->children = child_task->next_taskgroup;
|
||
+ child_task->prev_queue->next_queue = child_task->next_queue;
|
||
+ child_task->next_queue->prev_queue = child_task->prev_queue;
|
||
+ if (team->task_queue == child_task)
|
||
+ {
|
||
+ if (child_task->next_queue != child_task)
|
||
+ team->task_queue = child_task->next_queue;
|
||
+ else
|
||
+ team->task_queue = NULL;
|
||
+ }
|
||
+ child_task->kind = GOMP_TASK_TIED;
|
||
+ if (--team->task_queued_count == 0)
|
||
+ gomp_team_barrier_clear_task_pending (&team->barrier);
|
||
+ if ((gomp_team_barrier_cancelled (&team->barrier)
|
||
+ || (taskgroup && taskgroup->cancelled))
|
||
+ && !child_task->copy_ctors_done)
|
||
+ return true;
|
||
+ return false;
|
||
+}
|
||
+
|
||
+static void
|
||
+gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
|
||
+{
|
||
+ struct gomp_task *parent = child_task->parent;
|
||
+ size_t i;
|
||
+
|
||
+ for (i = 0; i < child_task->depend_count; i++)
|
||
+ if (!child_task->depend[i].redundant)
|
||
+ {
|
||
+ if (child_task->depend[i].next)
|
||
+ child_task->depend[i].next->prev = child_task->depend[i].prev;
|
||
+ if (child_task->depend[i].prev)
|
||
+ child_task->depend[i].prev->next = child_task->depend[i].next;
|
||
+ else
|
||
+ {
|
||
+ hash_entry_type *slot
|
||
+ = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
|
||
+ NO_INSERT);
|
||
+ if (*slot != &child_task->depend[i])
|
||
+ abort ();
|
||
+ if (child_task->depend[i].next)
|
||
+ *slot = child_task->depend[i].next;
|
||
+ else
|
||
+ htab_clear_slot (parent->depend_hash, slot);
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+static size_t
|
||
+gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
|
||
+ struct gomp_team *team)
|
||
+{
|
||
+ struct gomp_task *parent = child_task->parent;
|
||
+ size_t i, count = child_task->dependers->n_elem, ret = 0;
|
||
+ for (i = 0; i < count; i++)
|
||
+ {
|
||
+ struct gomp_task *task = child_task->dependers->elem[i];
|
||
+ if (--task->num_dependees != 0)
|
||
+ continue;
|
||
+
|
||
+ struct gomp_taskgroup *taskgroup = task->taskgroup;
|
||
+ if (parent)
|
||
+ {
|
||
+ if (parent->children)
|
||
+ {
|
||
+ /* If parent is in gomp_task_maybe_wait_for_dependencies
|
||
+ and it doesn't need to wait for this task, put it after
|
||
+ all ready to run tasks it needs to wait for. */
|
||
+ if (parent->taskwait && parent->taskwait->last_parent_depends_on
|
||
+ && !task->parent_depends_on)
|
||
+ {
|
||
+ struct gomp_task *last_parent_depends_on
|
||
+ = parent->taskwait->last_parent_depends_on;
|
||
+ task->next_child = last_parent_depends_on->next_child;
|
||
+ task->prev_child = last_parent_depends_on;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ task->next_child = parent->children;
|
||
+ task->prev_child = parent->children->prev_child;
|
||
+ parent->children = task;
|
||
+ }
|
||
+ task->next_child->prev_child = task;
|
||
+ task->prev_child->next_child = task;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ task->next_child = task;
|
||
+ task->prev_child = task;
|
||
+ parent->children = task;
|
||
+ }
|
||
+ if (parent->taskwait)
|
||
+ {
|
||
+ if (parent->taskwait->in_taskwait)
|
||
+ {
|
||
+ parent->taskwait->in_taskwait = false;
|
||
+ gomp_sem_post (&parent->taskwait->taskwait_sem);
|
||
+ }
|
||
+ else if (parent->taskwait->in_depend_wait)
|
||
+ {
|
||
+ parent->taskwait->in_depend_wait = false;
|
||
+ gomp_sem_post (&parent->taskwait->taskwait_sem);
|
||
+ }
|
||
+ if (parent->taskwait->last_parent_depends_on == NULL
|
||
+ && task->parent_depends_on)
|
||
+ parent->taskwait->last_parent_depends_on = task;
|
||
+ }
|
||
+ }
|
||
+ if (taskgroup)
|
||
+ {
|
||
+ if (taskgroup->children)
|
||
+ {
|
||
+ task->next_taskgroup = taskgroup->children;
|
||
+ task->prev_taskgroup = taskgroup->children->prev_taskgroup;
|
||
+ task->next_taskgroup->prev_taskgroup = task;
|
||
+ task->prev_taskgroup->next_taskgroup = task;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ task->next_taskgroup = task;
|
||
+ task->prev_taskgroup = task;
|
||
+ }
|
||
+ taskgroup->children = task;
|
||
+ if (taskgroup->in_taskgroup_wait)
|
||
+ {
|
||
+ taskgroup->in_taskgroup_wait = false;
|
||
+ gomp_sem_post (&taskgroup->taskgroup_sem);
|
||
+ }
|
||
+ }
|
||
+ if (team->task_queue)
|
||
+ {
|
||
+ task->next_queue = team->task_queue;
|
||
+ task->prev_queue = team->task_queue->prev_queue;
|
||
+ task->next_queue->prev_queue = task;
|
||
+ task->prev_queue->next_queue = task;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ task->next_queue = task;
|
||
+ task->prev_queue = task;
|
||
+ team->task_queue = task;
|
||
+ }
|
||
+ ++team->task_count;
|
||
+ ++team->task_queued_count;
|
||
+ ++ret;
|
||
+ }
|
||
+ free (child_task->dependers);
|
||
+ child_task->dependers = NULL;
|
||
+ if (ret > 1)
|
||
+ gomp_team_barrier_set_task_pending (&team->barrier);
|
||
+ return ret;
|
||
+}
|
||
+
|
||
+static inline size_t
|
||
+gomp_task_run_post_handle_depend (struct gomp_task *child_task,
|
||
+ struct gomp_team *team)
|
||
+{
|
||
+ if (child_task->depend_count == 0)
|
||
+ return 0;
|
||
+
|
||
+ /* If parent is gone already, the hash table is freed and nothing
|
||
+ will use the hash table anymore, no need to remove anything from it. */
|
||
+ if (child_task->parent != NULL)
|
||
+ gomp_task_run_post_handle_depend_hash (child_task);
|
||
+
|
||
+ if (child_task->dependers == NULL)
|
||
+ return 0;
|
||
+
|
||
+ return gomp_task_run_post_handle_dependers (child_task, team);
|
||
+}
|
||
+
|
||
+static inline void
|
||
+gomp_task_run_post_remove_parent (struct gomp_task *child_task)
|
||
+{
|
||
+ struct gomp_task *parent = child_task->parent;
|
||
+ if (parent == NULL)
|
||
+ return;
|
||
+ if (__builtin_expect (child_task->parent_depends_on, 0)
|
||
+ && --parent->taskwait->n_depend == 0
|
||
+ && parent->taskwait->in_depend_wait)
|
||
+ {
|
||
+ parent->taskwait->in_depend_wait = false;
|
||
+ gomp_sem_post (&parent->taskwait->taskwait_sem);
|
||
+ }
|
||
+ child_task->prev_child->next_child = child_task->next_child;
|
||
+ child_task->next_child->prev_child = child_task->prev_child;
|
||
+ if (parent->children != child_task)
|
||
+ return;
|
||
+ if (child_task->next_child != child_task)
|
||
+ parent->children = child_task->next_child;
|
||
+ else
|
||
+ {
|
||
+ parent->children = NULL;
|
||
+ if (parent->taskwait && parent->taskwait->in_taskwait)
|
||
+ {
|
||
+ parent->taskwait->in_taskwait = false;
|
||
+ gomp_sem_post (&parent->taskwait->taskwait_sem);
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+static inline void
|
||
+gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
|
||
+{
|
||
+ struct gomp_taskgroup *taskgroup = child_task->taskgroup;
|
||
+ if (taskgroup == NULL)
|
||
+ return;
|
||
+ child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup;
|
||
+ child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup;
|
||
+ if (taskgroup->num_children > 1)
|
||
+ --taskgroup->num_children;
|
||
+ else
|
||
+ {
|
||
+ taskgroup->num_children = 0;
|
||
+ }
|
||
+ if (taskgroup->children != child_task)
|
||
+ return;
|
||
+ if (child_task->next_taskgroup != child_task)
|
||
+ taskgroup->children = child_task->next_taskgroup;
|
||
+ else
|
||
+ {
|
||
+ taskgroup->children = NULL;
|
||
+ if (taskgroup->in_taskgroup_wait)
|
||
+ {
|
||
+ taskgroup->in_taskgroup_wait = false;
|
||
+ gomp_sem_post (&taskgroup->taskgroup_sem);
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
void
|
||
gomp_barrier_handle_tasks (gomp_barrier_state_t state)
|
||
{
|
||
@@ -193,6 +659,7 @@ gomp_barrier_handle_tasks (gomp_barrier_
|
||
struct gomp_task *task = thr->task;
|
||
struct gomp_task *child_task = NULL;
|
||
struct gomp_task *to_free = NULL;
|
||
+ int do_wake = 0;
|
||
|
||
gomp_mutex_lock (&team->task_lock);
|
||
if (gomp_barrier_last_thread (state))
|
||
@@ -209,26 +676,31 @@ gomp_barrier_handle_tasks (gomp_barrier_
|
||
|
||
while (1)
|
||
{
|
||
+ bool cancelled = false;
|
||
if (team->task_queue != NULL)
|
||
{
|
||
- struct gomp_task *parent;
|
||
-
|
||
child_task = team->task_queue;
|
||
- parent = child_task->parent;
|
||
- if (parent && parent->children == child_task)
|
||
- parent->children = child_task->next_child;
|
||
- child_task->prev_queue->next_queue = child_task->next_queue;
|
||
- child_task->next_queue->prev_queue = child_task->prev_queue;
|
||
- if (child_task->next_queue != child_task)
|
||
- team->task_queue = child_task->next_queue;
|
||
- else
|
||
- team->task_queue = NULL;
|
||
- child_task->kind = GOMP_TASK_TIED;
|
||
+ cancelled = gomp_task_run_pre (child_task, child_task->parent,
|
||
+ child_task->taskgroup, team);
|
||
+ if (__builtin_expect (cancelled, 0))
|
||
+ {
|
||
+ if (to_free)
|
||
+ {
|
||
+ gomp_finish_task (to_free);
|
||
+ free (to_free);
|
||
+ to_free = NULL;
|
||
+ }
|
||
+ goto finish_cancelled;
|
||
+ }
|
||
team->task_running_count++;
|
||
- if (team->task_count == team->task_running_count)
|
||
- gomp_team_barrier_clear_task_pending (&team->barrier);
|
||
+ child_task->in_tied_task = true;
|
||
}
|
||
gomp_mutex_unlock (&team->task_lock);
|
||
+ if (do_wake)
|
||
+ {
|
||
+ gomp_team_barrier_wake (&team->barrier, do_wake);
|
||
+ do_wake = 0;
|
||
+ }
|
||
if (to_free)
|
||
{
|
||
gomp_finish_task (to_free);
|
||
@@ -246,33 +718,29 @@ gomp_barrier_handle_tasks (gomp_barrier_
|
||
gomp_mutex_lock (&team->task_lock);
|
||
if (child_task)
|
||
{
|
||
- struct gomp_task *parent = child_task->parent;
|
||
- if (parent)
|
||
- {
|
||
- child_task->prev_child->next_child = child_task->next_child;
|
||
- child_task->next_child->prev_child = child_task->prev_child;
|
||
- if (parent->children == child_task)
|
||
- {
|
||
- if (child_task->next_child != child_task)
|
||
- parent->children = child_task->next_child;
|
||
- else
|
||
- {
|
||
- parent->children = NULL;
|
||
- if (parent->in_taskwait)
|
||
- gomp_sem_post (&parent->taskwait_sem);
|
||
- }
|
||
- }
|
||
- }
|
||
+ finish_cancelled:;
|
||
+ size_t new_tasks
|
||
+ = gomp_task_run_post_handle_depend (child_task, team);
|
||
+ gomp_task_run_post_remove_parent (child_task);
|
||
gomp_clear_parent (child_task->children);
|
||
+ gomp_task_run_post_remove_taskgroup (child_task);
|
||
to_free = child_task;
|
||
child_task = NULL;
|
||
- team->task_running_count--;
|
||
+ if (!cancelled)
|
||
+ team->task_running_count--;
|
||
+ if (new_tasks > 1)
|
||
+ {
|
||
+ do_wake = team->nthreads - team->task_running_count;
|
||
+ if (do_wake > new_tasks)
|
||
+ do_wake = new_tasks;
|
||
+ }
|
||
if (--team->task_count == 0
|
||
&& gomp_team_barrier_waiting_for_tasks (&team->barrier))
|
||
{
|
||
gomp_team_barrier_done (&team->barrier, state);
|
||
gomp_mutex_unlock (&team->task_lock);
|
||
gomp_team_barrier_wake (&team->barrier, 0);
|
||
+ gomp_mutex_lock (&team->task_lock);
|
||
}
|
||
}
|
||
}
|
||
@@ -288,45 +756,65 @@ GOMP_taskwait (void)
|
||
struct gomp_task *task = thr->task;
|
||
struct gomp_task *child_task = NULL;
|
||
struct gomp_task *to_free = NULL;
|
||
+ struct gomp_taskwait taskwait;
|
||
+ int do_wake = 0;
|
||
|
||
if (task == NULL || task->children == NULL)
|
||
return;
|
||
+ memset (&taskwait, 0, sizeof (taskwait));
|
||
gomp_mutex_lock (&team->task_lock);
|
||
while (1)
|
||
{
|
||
+ bool cancelled = false;
|
||
if (task->children == NULL)
|
||
{
|
||
+ bool destroy_taskwait = task->taskwait != NULL;
|
||
+ task->taskwait = NULL;
|
||
gomp_mutex_unlock (&team->task_lock);
|
||
if (to_free)
|
||
{
|
||
gomp_finish_task (to_free);
|
||
free (to_free);
|
||
}
|
||
+ if (destroy_taskwait)
|
||
+ gomp_sem_destroy (&taskwait.taskwait_sem);
|
||
return;
|
||
}
|
||
if (task->children->kind == GOMP_TASK_WAITING)
|
||
{
|
||
child_task = task->children;
|
||
- task->children = child_task->next_child;
|
||
- child_task->prev_queue->next_queue = child_task->next_queue;
|
||
- child_task->next_queue->prev_queue = child_task->prev_queue;
|
||
- if (team->task_queue == child_task)
|
||
+ cancelled
|
||
+ = gomp_task_run_pre (child_task, task, child_task->taskgroup,
|
||
+ team);
|
||
+ if (__builtin_expect (cancelled, 0))
|
||
{
|
||
- if (child_task->next_queue != child_task)
|
||
- team->task_queue = child_task->next_queue;
|
||
- else
|
||
- team->task_queue = NULL;
|
||
+ if (to_free)
|
||
+ {
|
||
+ gomp_finish_task (to_free);
|
||
+ free (to_free);
|
||
+ to_free = NULL;
|
||
+ }
|
||
+ goto finish_cancelled;
|
||
}
|
||
- child_task->kind = GOMP_TASK_TIED;
|
||
- team->task_running_count++;
|
||
- if (team->task_count == team->task_running_count)
|
||
- gomp_team_barrier_clear_task_pending (&team->barrier);
|
||
}
|
||
else
|
||
- /* All tasks we are waiting for are already running
|
||
- in other threads. Wait for them. */
|
||
- task->in_taskwait = true;
|
||
+ {
|
||
+ /* All tasks we are waiting for are already running
|
||
+ in other threads. Wait for them. */
|
||
+ if (task->taskwait == NULL)
|
||
+ {
|
||
+ taskwait.in_depend_wait = false;
|
||
+ gomp_sem_init (&taskwait.taskwait_sem, 0);
|
||
+ task->taskwait = &taskwait;
|
||
+ }
|
||
+ taskwait.in_taskwait = true;
|
||
+ }
|
||
gomp_mutex_unlock (&team->task_lock);
|
||
+ if (do_wake)
|
||
+ {
|
||
+ gomp_team_barrier_wake (&team->barrier, do_wake);
|
||
+ do_wake = 0;
|
||
+ }
|
||
if (to_free)
|
||
{
|
||
gomp_finish_task (to_free);
|
||
@@ -340,14 +828,178 @@ GOMP_taskwait (void)
|
||
thr->task = task;
|
||
}
|
||
else
|
||
+ gomp_sem_wait (&taskwait.taskwait_sem);
|
||
+ gomp_mutex_lock (&team->task_lock);
|
||
+ if (child_task)
|
||
{
|
||
- gomp_sem_wait (&task->taskwait_sem);
|
||
- task->in_taskwait = false;
|
||
+ finish_cancelled:;
|
||
+ size_t new_tasks
|
||
+ = gomp_task_run_post_handle_depend (child_task, team);
|
||
+ child_task->prev_child->next_child = child_task->next_child;
|
||
+ child_task->next_child->prev_child = child_task->prev_child;
|
||
+ if (task->children == child_task)
|
||
+ {
|
||
+ if (child_task->next_child != child_task)
|
||
+ task->children = child_task->next_child;
|
||
+ else
|
||
+ task->children = NULL;
|
||
+ }
|
||
+ gomp_clear_parent (child_task->children);
|
||
+ gomp_task_run_post_remove_taskgroup (child_task);
|
||
+ to_free = child_task;
|
||
+ child_task = NULL;
|
||
+ team->task_count--;
|
||
+ if (new_tasks > 1)
|
||
+ {
|
||
+ do_wake = team->nthreads - team->task_running_count
|
||
+ - !task->in_tied_task;
|
||
+ if (do_wake > new_tasks)
|
||
+ do_wake = new_tasks;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/* This is like GOMP_taskwait, but we only wait for tasks that the
|
||
+ upcoming task depends on. */
|
||
+
|
||
+static void
|
||
+gomp_task_maybe_wait_for_dependencies (void **depend)
|
||
+{
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
+ struct gomp_task *task = thr->task;
|
||
+ struct gomp_team *team = thr->ts.team;
|
||
+ struct gomp_task_depend_entry elem, *ent = NULL;
|
||
+ struct gomp_taskwait taskwait;
|
||
+ struct gomp_task *last_parent_depends_on = NULL;
|
||
+ size_t ndepend = (uintptr_t) depend[0];
|
||
+ size_t nout = (uintptr_t) depend[1];
|
||
+ size_t i;
|
||
+ size_t num_awaited = 0;
|
||
+ struct gomp_task *child_task = NULL;
|
||
+ struct gomp_task *to_free = NULL;
|
||
+ int do_wake = 0;
|
||
+
|
||
+ gomp_mutex_lock (&team->task_lock);
|
||
+ for (i = 0; i < ndepend; i++)
|
||
+ {
|
||
+ elem.addr = depend[i + 2];
|
||
+ ent = htab_find (task->depend_hash, &elem);
|
||
+ for (; ent; ent = ent->next)
|
||
+ if (i >= nout && ent->is_in)
|
||
+ continue;
|
||
+ else
|
||
+ {
|
||
+ struct gomp_task *tsk = ent->task;
|
||
+ if (!tsk->parent_depends_on)
|
||
+ {
|
||
+ tsk->parent_depends_on = true;
|
||
+ ++num_awaited;
|
||
+ if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
|
||
+ {
|
||
+ /* If a task we need to wait for is not already
|
||
+ running and is ready to be scheduled, move it
|
||
+ to front, so that we run it as soon as possible. */
|
||
+ if (last_parent_depends_on)
|
||
+ {
|
||
+ tsk->prev_child->next_child = tsk->next_child;
|
||
+ tsk->next_child->prev_child = tsk->prev_child;
|
||
+ tsk->prev_child = last_parent_depends_on;
|
||
+ tsk->next_child = last_parent_depends_on->next_child;
|
||
+ tsk->prev_child->next_child = tsk;
|
||
+ tsk->next_child->prev_child = tsk;
|
||
+ }
|
||
+ else if (tsk != task->children)
|
||
+ {
|
||
+ tsk->prev_child->next_child = tsk->next_child;
|
||
+ tsk->next_child->prev_child = tsk->prev_child;
|
||
+ tsk->prev_child = task->children;
|
||
+ tsk->next_child = task->children->next_child;
|
||
+ task->children = tsk;
|
||
+ tsk->prev_child->next_child = tsk;
|
||
+ tsk->next_child->prev_child = tsk;
|
||
+ }
|
||
+ last_parent_depends_on = tsk;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ if (num_awaited == 0)
|
||
+ {
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ memset (&taskwait, 0, sizeof (taskwait));
|
||
+ taskwait.n_depend = num_awaited;
|
||
+ taskwait.last_parent_depends_on = last_parent_depends_on;
|
||
+ gomp_sem_init (&taskwait.taskwait_sem, 0);
|
||
+ task->taskwait = &taskwait;
|
||
+
|
||
+ while (1)
|
||
+ {
|
||
+ bool cancelled = false;
|
||
+ if (taskwait.n_depend == 0)
|
||
+ {
|
||
+ task->taskwait = NULL;
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ if (to_free)
|
||
+ {
|
||
+ gomp_finish_task (to_free);
|
||
+ free (to_free);
|
||
+ }
|
||
+ gomp_sem_destroy (&taskwait.taskwait_sem);
|
||
return;
|
||
}
|
||
+ if (task->children->kind == GOMP_TASK_WAITING)
|
||
+ {
|
||
+ child_task = task->children;
|
||
+ cancelled
|
||
+ = gomp_task_run_pre (child_task, task, child_task->taskgroup,
|
||
+ team);
|
||
+ if (__builtin_expect (cancelled, 0))
|
||
+ {
|
||
+ if (to_free)
|
||
+ {
|
||
+ gomp_finish_task (to_free);
|
||
+ free (to_free);
|
||
+ to_free = NULL;
|
||
+ }
|
||
+ goto finish_cancelled;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ /* All tasks we are waiting for are already running
|
||
+ in other threads. Wait for them. */
|
||
+ taskwait.in_depend_wait = true;
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ if (do_wake)
|
||
+ {
|
||
+ gomp_team_barrier_wake (&team->barrier, do_wake);
|
||
+ do_wake = 0;
|
||
+ }
|
||
+ if (to_free)
|
||
+ {
|
||
+ gomp_finish_task (to_free);
|
||
+ free (to_free);
|
||
+ to_free = NULL;
|
||
+ }
|
||
+ if (child_task)
|
||
+ {
|
||
+ thr->task = child_task;
|
||
+ child_task->fn (child_task->fn_data);
|
||
+ thr->task = task;
|
||
+ }
|
||
+ else
|
||
+ gomp_sem_wait (&taskwait.taskwait_sem);
|
||
gomp_mutex_lock (&team->task_lock);
|
||
if (child_task)
|
||
{
|
||
+ finish_cancelled:;
|
||
+ size_t new_tasks
|
||
+ = gomp_task_run_post_handle_depend (child_task, team);
|
||
+ if (child_task->parent_depends_on)
|
||
+ --taskwait.n_depend;
|
||
child_task->prev_child->next_child = child_task->next_child;
|
||
child_task->next_child->prev_child = child_task->prev_child;
|
||
if (task->children == child_task)
|
||
@@ -358,10 +1010,17 @@ GOMP_taskwait (void)
|
||
task->children = NULL;
|
||
}
|
||
gomp_clear_parent (child_task->children);
|
||
+ gomp_task_run_post_remove_taskgroup (child_task);
|
||
to_free = child_task;
|
||
child_task = NULL;
|
||
team->task_count--;
|
||
- team->task_running_count--;
|
||
+ if (new_tasks > 1)
|
||
+ {
|
||
+ do_wake = team->nthreads - team->task_running_count
|
||
+ - !task->in_tied_task;
|
||
+ if (do_wake > new_tasks)
|
||
+ do_wake = new_tasks;
|
||
+ }
|
||
}
|
||
}
|
||
}
|
||
@@ -374,6 +1033,149 @@ GOMP_taskyield (void)
|
||
/* Nothing at the moment. */
|
||
}
|
||
|
||
+void
|
||
+GOMP_taskgroup_start (void)
|
||
+{
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
+ struct gomp_team *team = thr->ts.team;
|
||
+ struct gomp_task *task = thr->task;
|
||
+ struct gomp_taskgroup *taskgroup;
|
||
+
|
||
+ /* If team is NULL, all tasks are executed as
|
||
+ GOMP_TASK_IFFALSE tasks and thus all children tasks of
|
||
+ taskgroup and their descendant tasks will be finished
|
||
+ by the time GOMP_taskgroup_end is called. */
|
||
+ if (team == NULL)
|
||
+ return;
|
||
+ taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
|
||
+ taskgroup->prev = task->taskgroup;
|
||
+ taskgroup->children = NULL;
|
||
+ taskgroup->in_taskgroup_wait = false;
|
||
+ taskgroup->cancelled = false;
|
||
+ taskgroup->num_children = 0;
|
||
+ gomp_sem_init (&taskgroup->taskgroup_sem, 0);
|
||
+ task->taskgroup = taskgroup;
|
||
+}
|
||
+
|
||
+void
|
||
+GOMP_taskgroup_end (void)
|
||
+{
|
||
+ struct gomp_thread *thr = gomp_thread ();
|
||
+ struct gomp_team *team = thr->ts.team;
|
||
+ struct gomp_task *task = thr->task;
|
||
+ struct gomp_taskgroup *taskgroup;
|
||
+ struct gomp_task *child_task = NULL;
|
||
+ struct gomp_task *to_free = NULL;
|
||
+ int do_wake = 0;
|
||
+
|
||
+ if (team == NULL)
|
||
+ return;
|
||
+ taskgroup = task->taskgroup;
|
||
+
|
||
+ gomp_mutex_lock (&team->task_lock);
|
||
+
|
||
+ if (taskgroup->num_children == 0)
|
||
+ {
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ goto finish;
|
||
+ }
|
||
+
|
||
+ while (1)
|
||
+ {
|
||
+ bool cancelled = false;
|
||
+ if (taskgroup->children == NULL)
|
||
+ {
|
||
+ if (taskgroup->num_children)
|
||
+ {
|
||
+ if (task->children == NULL)
|
||
+ goto do_wait;
|
||
+ child_task = task->children;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ if (to_free)
|
||
+ {
|
||
+ gomp_finish_task (to_free);
|
||
+ free (to_free);
|
||
+ }
|
||
+ goto finish;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ child_task = taskgroup->children;
|
||
+ if (child_task->kind == GOMP_TASK_WAITING)
|
||
+ {
|
||
+ cancelled
|
||
+ = gomp_task_run_pre (child_task, child_task->parent, taskgroup,
|
||
+ team);
|
||
+ if (__builtin_expect (cancelled, 0))
|
||
+ {
|
||
+ if (to_free)
|
||
+ {
|
||
+ gomp_finish_task (to_free);
|
||
+ free (to_free);
|
||
+ to_free = NULL;
|
||
+ }
|
||
+ goto finish_cancelled;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ child_task = NULL;
|
||
+ do_wait:
|
||
+ /* All tasks we are waiting for are already running
|
||
+ in other threads. Wait for them. */
|
||
+ taskgroup->in_taskgroup_wait = true;
|
||
+ }
|
||
+ gomp_mutex_unlock (&team->task_lock);
|
||
+ if (do_wake)
|
||
+ {
|
||
+ gomp_team_barrier_wake (&team->barrier, do_wake);
|
||
+ do_wake = 0;
|
||
+ }
|
||
+ if (to_free)
|
||
+ {
|
||
+ gomp_finish_task (to_free);
|
||
+ free (to_free);
|
||
+ to_free = NULL;
|
||
+ }
|
||
+ if (child_task)
|
||
+ {
|
||
+ thr->task = child_task;
|
||
+ child_task->fn (child_task->fn_data);
|
||
+ thr->task = task;
|
||
+ }
|
||
+ else
|
||
+ gomp_sem_wait (&taskgroup->taskgroup_sem);
|
||
+ gomp_mutex_lock (&team->task_lock);
|
||
+ if (child_task)
|
||
+ {
|
||
+ finish_cancelled:;
|
||
+ size_t new_tasks
|
||
+ = gomp_task_run_post_handle_depend (child_task, team);
|
||
+ gomp_task_run_post_remove_parent (child_task);
|
||
+ gomp_clear_parent (child_task->children);
|
||
+ gomp_task_run_post_remove_taskgroup (child_task);
|
||
+ to_free = child_task;
|
||
+ child_task = NULL;
|
||
+ team->task_count--;
|
||
+ if (new_tasks > 1)
|
||
+ {
|
||
+ do_wake = team->nthreads - team->task_running_count
|
||
+ - !task->in_tied_task;
|
||
+ if (do_wake > new_tasks)
|
||
+ do_wake = new_tasks;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ finish:
|
||
+ task->taskgroup = taskgroup->prev;
|
||
+ gomp_sem_destroy (&taskgroup->taskgroup_sem);
|
||
+ free (taskgroup);
|
||
+}
|
||
+
|
||
int
|
||
omp_in_final (void)
|
||
{
|