Mercurial > hg > CbC > CbC_llvm
view openmp/tools/archer/ompt-tsan.cpp @ 171:66f3bfe93da9
git version 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 11:07:02 +0900 |
parents | 1d019706d866 |
children | 0572611fdcc8 |
line wrap: on
line source
/* * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer */ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for details. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #include <atomic> #include <cassert> #include <cstdlib> #include <cstring> #include <inttypes.h> #include <iostream> #include <mutex> #include <sstream> #include <stack> #include <list> #include <string> #include <iostream> #include <unordered_map> #include <vector> #if (defined __APPLE__ && defined __MACH__) #include <dlfcn.h> #endif #include <sys/resource.h> #include "omp-tools.h" static int runOnTsan; static int hasReductionCallback; class ArcherFlags { public: #if (LLVM_VERSION) >= 40 int flush_shadow; #endif int print_max_rss; int verbose; int enabled; ArcherFlags(const char *env) : #if (LLVM_VERSION) >= 40 flush_shadow(0), #endif print_max_rss(0), verbose(0), enabled(1) { if (env) { std::vector<std::string> tokens; std::string token; std::string str(env); std::istringstream iss(str); while (std::getline(iss, token, ' ')) tokens.push_back(token); for (std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); ++it) { #if (LLVM_VERSION) >= 40 if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow)) continue; #endif if (sscanf(it->c_str(), "print_max_rss=%d", &print_max_rss)) continue; if (sscanf(it->c_str(), "verbose=%d", &verbose)) continue; if (sscanf(it->c_str(), "enable=%d", &enabled)) continue; std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token << std::endl; } } } }; class TsanFlags { public: int ignore_noninstrumented_modules; TsanFlags(const char *env) : ignore_noninstrumented_modules(0) { if (env) { std::vector<std::string> tokens; std::string token; std::string str(env); std::istringstream iss(str); while (std::getline(iss, token, ' ')) tokens.push_back(token); for (std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); ++it) { // we are interested in ignore_noninstrumented_modules to print a // warning if (sscanf(it->c_str(), "ignore_noninstrumented_modules=%d", &ignore_noninstrumented_modules)) continue; } } } }; #if (LLVM_VERSION) >= 40 extern "C" { int __attribute__((weak)) __archer_get_omp_status(); void __attribute__((weak)) __tsan_flush_memory() {} } #endif ArcherFlags *archer_flags; // The following definitions are pasted from "llvm/Support/Compiler.h" to allow // the code // to be compiled with other compilers like gcc: #ifndef TsanHappensBefore // Thread Sanitizer is a tool that finds races in code. // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations . // tsan detects these exact functions by name. extern "C" { #if (defined __APPLE__ && defined __MACH__) static void AnnotateHappensAfter(const char *file, int line, const volatile void *cv) { void (*fptr)(const char *, int, const volatile void *); fptr = (void (*)(const char *, int, const volatile void *))dlsym( RTLD_DEFAULT, "AnnotateHappensAfter"); (*fptr)(file, line, cv); } static void AnnotateHappensBefore(const char *file, int line, const volatile void *cv) { void (*fptr)(const char *, int, const volatile void *); fptr = (void (*)(const char *, int, const volatile void *))dlsym( RTLD_DEFAULT, "AnnotateHappensBefore"); (*fptr)(file, line, cv); } static void AnnotateIgnoreWritesBegin(const char *file, int line) { void (*fptr)(const char *, int); fptr = (void (*)(const char *, int))dlsym(RTLD_DEFAULT, "AnnotateIgnoreWritesBegin"); (*fptr)(file, line); } static void AnnotateIgnoreWritesEnd(const char *file, int line) { void (*fptr)(const char *, int); fptr = (void (*)(const char *, int))dlsym(RTLD_DEFAULT, "AnnotateIgnoreWritesEnd"); (*fptr)(file, line); } static void AnnotateNewMemory(const char *file, int line, const volatile void *cv, size_t size) { void (*fptr)(const char *, int, const volatile void *, size_t); fptr = (void (*)(const char *, int, const volatile void *, size_t))dlsym( RTLD_DEFAULT, "AnnotateNewMemory"); (*fptr)(file, line, cv, size); } static int RunningOnValgrind() { int (*fptr)(); fptr = (int (*)())dlsym(RTLD_DEFAULT, "RunningOnValgrind"); if (fptr && fptr != RunningOnValgrind) runOnTsan = 0; return 0; } #else void __attribute__((weak)) AnnotateHappensAfter(const char *file, int line, const volatile void *cv) {} void __attribute__((weak)) AnnotateHappensBefore(const char *file, int line, const volatile void *cv) {} void __attribute__((weak)) AnnotateIgnoreWritesBegin(const char *file, int line) {} void __attribute__((weak)) AnnotateIgnoreWritesEnd(const char *file, int line) { } void __attribute__((weak)) AnnotateNewMemory(const char *file, int line, const volatile void *cv, size_t size) {} int __attribute__((weak)) RunningOnValgrind() { runOnTsan = 0; return 0; } void __attribute__((weak)) __tsan_func_entry(const void *call_pc) {} void __attribute__((weak)) __tsan_func_exit(void) {} #endif } // This marker is used to define a happens-before arc. The race detector will // infer an arc from the begin to the end when they share the same pointer // argument. #define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv) // This marker defines the destination of a happens-before arc. #define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv) // Ignore any races on writes between here and the next TsanIgnoreWritesEnd. #define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__) // Resume checking for racy writes. #define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__) // We don't really delete the clock for now #define TsanDeleteClock(cv) // newMemory #define TsanNewMemory(addr, size) \ AnnotateNewMemory(__FILE__, __LINE__, addr, size) #define TsanFreeMemory(addr, size) \ AnnotateNewMemory(__FILE__, __LINE__, addr, size) #endif // Function entry/exit #define TsanFuncEntry(pc) __tsan_func_entry(pc) #define TsanFuncExit() __tsan_func_exit() /// Required OMPT inquiry functions. static ompt_get_parallel_info_t ompt_get_parallel_info; static ompt_get_thread_data_t ompt_get_thread_data; typedef uint64_t ompt_tsan_clockid; static uint64_t my_next_id() { static uint64_t ID = 0; uint64_t ret = __sync_fetch_and_add(&ID, 1); return ret; } // Data structure to provide a threadsafe pool of reusable objects. // DataPool<Type of objects, Size of blockalloc> template <typename T, int N> struct DataPool { std::mutex DPMutex; std::stack<T *> DataPointer; std::list<void *> memory; int total; void newDatas() { // prefix the Data with a pointer to 'this', allows to return memory to // 'this', // without explicitly knowing the source. // // To reduce lock contention, we use thread local DataPools, but Data // objects move to other threads. // The strategy is to get objects from local pool. Only if the object moved // to another // thread, we might see a penalty on release (returnData). // For "single producer" pattern, a single thread creates tasks, these are // executed by other threads. // The master will have a high demand on TaskData, so return after use. struct pooldata { DataPool<T, N> *dp; T data; }; // We alloc without initialize the memory. We cannot call constructors. // Therfore use malloc! pooldata *datas = (pooldata *)malloc(sizeof(pooldata) * N); memory.push_back(datas); for (int i = 0; i < N; i++) { datas[i].dp = this; DataPointer.push(&(datas[i].data)); } total += N; } T *getData() { T *ret; DPMutex.lock(); if (DataPointer.empty()) newDatas(); ret = DataPointer.top(); DataPointer.pop(); DPMutex.unlock(); return ret; } void returnData(T *data) { DPMutex.lock(); DataPointer.push(data); DPMutex.unlock(); } void getDatas(int n, T **datas) { DPMutex.lock(); for (int i = 0; i < n; i++) { if (DataPointer.empty()) newDatas(); datas[i] = DataPointer.top(); DataPointer.pop(); } DPMutex.unlock(); } void returnDatas(int n, T **datas) { DPMutex.lock(); for (int i = 0; i < n; i++) { DataPointer.push(datas[i]); } DPMutex.unlock(); } DataPool() : DPMutex(), DataPointer(), total(0) {} ~DataPool() { // we assume all memory is returned when the thread finished / destructor is // called for (auto i : memory) if (i) free(i); } }; // This function takes care to return the data to the originating DataPool // A pointer to the originating DataPool is stored just before the actual data. template <typename T, int N> static void retData(void *data) { ((DataPool<T, N> **)data)[-1]->returnData((T *)data); } struct ParallelData; __thread DataPool<ParallelData, 4> *pdp; /// Data structure to store additional information for parallel regions. struct ParallelData { // Parallel fork is just another barrier, use Barrier[1] /// Two addresses for relationships with barriers. ompt_tsan_clockid Barrier[2]; const void *codePtr; void *GetParallelPtr() { return &(Barrier[1]); } void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); } ParallelData(const void *codeptr) : codePtr(codeptr) {} ~ParallelData() { TsanDeleteClock(&(Barrier[0])); TsanDeleteClock(&(Barrier[1])); } // overload new/delete to use DataPool for memory management. void *operator new(size_t size) { return pdp->getData(); } void operator delete(void *p, size_t) { retData<ParallelData, 4>(p); } }; static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) { return reinterpret_cast<ParallelData *>(parallel_data->ptr); } struct Taskgroup; __thread DataPool<Taskgroup, 4> *tgp; /// Data structure to support stacking of taskgroups and allow synchronization. struct Taskgroup { /// Its address is used for relationships of the taskgroup's task set. ompt_tsan_clockid Ptr; /// Reference to the parent taskgroup. Taskgroup *Parent; Taskgroup(Taskgroup *Parent) : Parent(Parent) {} ~Taskgroup() { TsanDeleteClock(&Ptr); } void *GetPtr() { return &Ptr; } // overload new/delete to use DataPool for memory management. void *operator new(size_t size) { return tgp->getData(); } void operator delete(void *p, size_t) { retData<Taskgroup, 4>(p); } }; struct TaskData; __thread DataPool<TaskData, 4> *tdp; /// Data structure to store additional information for tasks. struct TaskData { /// Its address is used for relationships of this task. ompt_tsan_clockid Task; /// Child tasks use its address to declare a relationship to a taskwait in /// this task. ompt_tsan_clockid Taskwait; /// Whether this task is currently executing a barrier. bool InBarrier; /// Whether this task is an included task. bool Included; /// Index of which barrier to use next. char BarrierIndex; /// Count how often this structure has been put into child tasks + 1. std::atomic_int RefCount; /// Reference to the parent that created this task. TaskData *Parent; /// Reference to the implicit task in the stack above this task. TaskData *ImplicitTask; /// Reference to the team of this task. ParallelData *Team; /// Reference to the current taskgroup that this task either belongs to or /// that it just created. Taskgroup *TaskGroup; /// Dependency information for this task. ompt_dependence_t *Dependencies; /// Number of dependency entries. unsigned DependencyCount; void *PrivateData; size_t PrivateDataSize; int execution; int freed; TaskData(TaskData *Parent) : InBarrier(false), Included(false), BarrierIndex(0), RefCount(1), Parent(Parent), ImplicitTask(nullptr), Team(Parent->Team), TaskGroup(nullptr), DependencyCount(0), execution(0), freed(0) { if (Parent != nullptr) { Parent->RefCount++; // Copy over pointer to taskgroup. This task may set up its own stack // but for now belongs to its parent's taskgroup. TaskGroup = Parent->TaskGroup; } } TaskData(ParallelData *Team = nullptr) : InBarrier(false), Included(false), BarrierIndex(0), RefCount(1), Parent(nullptr), ImplicitTask(this), Team(Team), TaskGroup(nullptr), DependencyCount(0), execution(1), freed(0) {} ~TaskData() { TsanDeleteClock(&Task); TsanDeleteClock(&Taskwait); } void *GetTaskPtr() { return &Task; } void *GetTaskwaitPtr() { return &Taskwait; } // overload new/delete to use DataPool for memory management. void *operator new(size_t size) { return tdp->getData(); } void operator delete(void *p, size_t) { retData<TaskData, 4>(p); } }; static inline TaskData *ToTaskData(ompt_data_t *task_data) { return reinterpret_cast<TaskData *>(task_data->ptr); } static inline void *ToInAddr(void *OutAddr) { // FIXME: This will give false negatives when a second variable lays directly // behind a variable that only has a width of 1 byte. // Another approach would be to "negate" the address or to flip the // first bit... return reinterpret_cast<char *>(OutAddr) + 1; } /// Store a mutex for each wait_id to resolve race condition with callbacks. std::unordered_map<ompt_wait_id_t, std::mutex> Locks; std::mutex LocksMutex; static void ompt_tsan_thread_begin(ompt_thread_t thread_type, ompt_data_t *thread_data) { pdp = new DataPool<ParallelData, 4>; TsanNewMemory(pdp, sizeof(pdp)); tgp = new DataPool<Taskgroup, 4>; TsanNewMemory(tgp, sizeof(tgp)); tdp = new DataPool<TaskData, 4>; TsanNewMemory(tdp, sizeof(tdp)); thread_data->value = my_next_id(); } static void ompt_tsan_thread_end(ompt_data_t *thread_data) { delete pdp; delete tgp; delete tdp; } /// OMPT event callbacks for handling parallel regions. static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data, const ompt_frame_t *parent_task_frame, ompt_data_t *parallel_data, uint32_t requested_team_size, int flag, const void *codeptr_ra) { ParallelData *Data = new ParallelData(codeptr_ra); parallel_data->ptr = Data; TsanHappensBefore(Data->GetParallelPtr()); } static void ompt_tsan_parallel_end(ompt_data_t *parallel_data, ompt_data_t *task_data, int flag, const void *codeptr_ra) { ParallelData *Data = ToParallelData(parallel_data); TsanHappensAfter(Data->GetBarrierPtr(0)); TsanHappensAfter(Data->GetBarrierPtr(1)); delete Data; #if (LLVM_VERSION >= 40) if (&__archer_get_omp_status) { if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow) __tsan_flush_memory(); } #endif } static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, unsigned int team_size, unsigned int thread_num, int type) { switch (endpoint) { case ompt_scope_begin: if (type & ompt_task_initial) { parallel_data->ptr = new ParallelData(nullptr); } task_data->ptr = new TaskData(ToParallelData(parallel_data)); TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr()); TsanFuncEntry(ToParallelData(parallel_data)->codePtr); break; case ompt_scope_end: TaskData *Data = ToTaskData(task_data); assert(Data->freed == 0 && "Implicit task end should only be called once!"); Data->freed = 1; assert(Data->RefCount == 1 && "All tasks should have finished at the implicit barrier!"); delete Data; TsanFuncExit(); break; } } static void ompt_tsan_sync_region(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra) { TaskData *Data = ToTaskData(task_data); switch (endpoint) { case ompt_scope_begin: TsanFuncEntry(codeptr_ra); switch (kind) { case ompt_sync_region_barrier_implementation: case ompt_sync_region_barrier_implicit: case ompt_sync_region_barrier_explicit: case ompt_sync_region_barrier: { char BarrierIndex = Data->BarrierIndex; TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex)); if (hasReductionCallback < ompt_set_always) { // We ignore writes inside the barrier. These would either occur during // 1. reductions performed by the runtime which are guaranteed to be // race-free. // 2. execution of another task. // For the latter case we will re-enable tracking in task_switch. Data->InBarrier = true; TsanIgnoreWritesBegin(); } break; } case ompt_sync_region_taskwait: break; case ompt_sync_region_taskgroup: Data->TaskGroup = new Taskgroup(Data->TaskGroup); break; default: break; } break; case ompt_scope_end: TsanFuncExit(); switch (kind) { case ompt_sync_region_barrier_implementation: case ompt_sync_region_barrier_implicit: case ompt_sync_region_barrier_explicit: case ompt_sync_region_barrier: { if (hasReductionCallback < ompt_set_always) { // We want to track writes after the barrier again. Data->InBarrier = false; TsanIgnoreWritesEnd(); } char BarrierIndex = Data->BarrierIndex; // Barrier will end after it has been entered by all threads. if (parallel_data) TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex)); // It is not guaranteed that all threads have exited this barrier before // we enter the next one. So we will use a different address. // We are however guaranteed that this current barrier is finished // by the time we exit the next one. So we can then reuse the first // address. Data->BarrierIndex = (BarrierIndex + 1) % 2; break; } case ompt_sync_region_taskwait: { if (Data->execution > 1) TsanHappensAfter(Data->GetTaskwaitPtr()); break; } case ompt_sync_region_taskgroup: { assert(Data->TaskGroup != nullptr && "Should have at least one taskgroup!"); TsanHappensAfter(Data->TaskGroup->GetPtr()); // Delete this allocated taskgroup, all descendent task are finished by // now. Taskgroup *Parent = Data->TaskGroup->Parent; delete Data->TaskGroup; Data->TaskGroup = Parent; break; } default: break; } break; } } static void ompt_tsan_reduction(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra) { switch (endpoint) { case ompt_scope_begin: switch (kind) { case ompt_sync_region_reduction: TsanIgnoreWritesBegin(); break; default: break; } break; case ompt_scope_end: switch (kind) { case ompt_sync_region_reduction: TsanIgnoreWritesEnd(); break; default: break; } break; } } /// OMPT event callbacks for handling tasks. static void ompt_tsan_task_create( ompt_data_t *parent_task_data, /* id of parent task */ const ompt_frame_t *parent_frame, /* frame data for parent task */ ompt_data_t *new_task_data, /* id of created task */ int type, int has_dependences, const void *codeptr_ra) /* pointer to outlined function */ { TaskData *Data; assert(new_task_data->ptr == NULL && "Task data should be initialized to NULL"); if (type & ompt_task_initial) { ompt_data_t *parallel_data; int team_size = 1; ompt_get_parallel_info(0, ¶llel_data, &team_size); ParallelData *PData = new ParallelData(nullptr); parallel_data->ptr = PData; Data = new TaskData(PData); new_task_data->ptr = Data; } else if (type & ompt_task_undeferred) { Data = new TaskData(ToTaskData(parent_task_data)); new_task_data->ptr = Data; Data->Included = true; } else if (type & ompt_task_explicit || type & ompt_task_target) { Data = new TaskData(ToTaskData(parent_task_data)); new_task_data->ptr = Data; // Use the newly created address. We cannot use a single address from the // parent because that would declare wrong relationships with other // sibling tasks that may be created before this task is started! TsanHappensBefore(Data->GetTaskPtr()); ToTaskData(parent_task_data)->execution++; } } static void ompt_tsan_task_schedule(ompt_data_t *first_task_data, ompt_task_status_t prior_task_status, ompt_data_t *second_task_data) { TaskData *FromTask = ToTaskData(first_task_data); TaskData *ToTask = ToTaskData(second_task_data); if (ToTask->Included && prior_task_status != ompt_task_complete) return; // No further synchronization for begin included tasks if (FromTask->Included && prior_task_status == ompt_task_complete) { // Just delete the task: while (FromTask != nullptr && --FromTask->RefCount == 0) { TaskData *Parent = FromTask->Parent; if (FromTask->DependencyCount > 0) { delete[] FromTask->Dependencies; } delete FromTask; FromTask = Parent; } return; } if (ToTask->execution == 0) { ToTask->execution++; // 1. Task will begin execution after it has been created. TsanHappensAfter(ToTask->GetTaskPtr()); for (unsigned i = 0; i < ToTask->DependencyCount; i++) { ompt_dependence_t *Dependency = &ToTask->Dependencies[i]; TsanHappensAfter(Dependency->variable.ptr); // in and inout dependencies are also blocked by prior in dependencies! if (Dependency->dependence_type == ompt_dependence_type_out || Dependency->dependence_type == ompt_dependence_type_inout) { TsanHappensAfter(ToInAddr(Dependency->variable.ptr)); } } } else { // 2. Task will resume after it has been switched away. TsanHappensAfter(ToTask->GetTaskPtr()); } if (prior_task_status != ompt_task_complete) { ToTask->ImplicitTask = FromTask->ImplicitTask; assert(ToTask->ImplicitTask != NULL && "A task belongs to a team and has an implicit task on the stack"); } // Task may be resumed at a later point in time. TsanHappensBefore(FromTask->GetTaskPtr()); if (hasReductionCallback < ompt_set_always && FromTask->InBarrier) { // We want to ignore writes in the runtime code during barriers, // but not when executing tasks with user code! TsanIgnoreWritesEnd(); } if (prior_task_status == ompt_task_complete) { // task finished // Task will finish before a barrier in the surrounding parallel region ... ParallelData *PData = FromTask->Team; TsanHappensBefore( PData->GetBarrierPtr(FromTask->ImplicitTask->BarrierIndex)); // ... and before an eventual taskwait by the parent thread. TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr()); if (FromTask->TaskGroup != nullptr) { // This task is part of a taskgroup, so it will finish before the // corresponding taskgroup_end. TsanHappensBefore(FromTask->TaskGroup->GetPtr()); } for (unsigned i = 0; i < FromTask->DependencyCount; i++) { ompt_dependence_t *Dependency = &FromTask->Dependencies[i]; // in dependencies block following inout and out dependencies! TsanHappensBefore(ToInAddr(Dependency->variable.ptr)); if (Dependency->dependence_type == ompt_dependence_type_out || Dependency->dependence_type == ompt_dependence_type_inout) { TsanHappensBefore(Dependency->variable.ptr); } } while (FromTask != nullptr && --FromTask->RefCount == 0) { TaskData *Parent = FromTask->Parent; if (FromTask->DependencyCount > 0) { delete[] FromTask->Dependencies; } delete FromTask; FromTask = Parent; } } if (hasReductionCallback < ompt_set_always && ToTask->InBarrier) { // We re-enter runtime code which currently performs a barrier. TsanIgnoreWritesBegin(); } } static void ompt_tsan_dependences(ompt_data_t *task_data, const ompt_dependence_t *deps, int ndeps) { if (ndeps > 0) { // Copy the data to use it in task_switch and task_end. TaskData *Data = ToTaskData(task_data); Data->Dependencies = new ompt_dependence_t[ndeps]; std::memcpy(Data->Dependencies, deps, sizeof(ompt_dependence_t) * ndeps); Data->DependencyCount = ndeps; // This callback is executed before this task is first started. TsanHappensBefore(Data->GetTaskPtr()); } } /// OMPT event callbacks for handling locking. static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra) { // Acquire our own lock to make sure that // 1. the previous release has finished. // 2. the next acquire doesn't start before we have finished our release. LocksMutex.lock(); std::mutex &Lock = Locks[wait_id]; LocksMutex.unlock(); Lock.lock(); TsanHappensAfter(&Lock); } static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra) { LocksMutex.lock(); std::mutex &Lock = Locks[wait_id]; LocksMutex.unlock(); TsanHappensBefore(&Lock); Lock.unlock(); } // callback , signature , variable to store result , required support level #define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \ do { \ ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \ result = ompt_set_callback(ompt_callback_##event, \ (ompt_callback_t)tsan_##event); \ if (result < level) \ printf("Registered callback '" #event "' is not supported at " #level " (%i)\n", \ result); \ } while (0) #define SET_CALLBACK_T(event, type) \ do { \ int res; \ SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \ } while (0) #define SET_CALLBACK(event) SET_CALLBACK_T(event, event) static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num, ompt_data_t *tool_data) { const char *options = getenv("TSAN_OPTIONS"); TsanFlags tsan_flags(options); ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); if (ompt_set_callback == NULL) { std::cerr << "Could not set callback, exiting..." << std::endl; std::exit(1); } ompt_get_parallel_info = (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info"); ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data"); if (ompt_get_parallel_info == NULL) { fprintf(stderr, "Could not get inquiry function 'ompt_get_parallel_info', " "exiting...\n"); exit(1); } SET_CALLBACK(thread_begin); SET_CALLBACK(thread_end); SET_CALLBACK(parallel_begin); SET_CALLBACK(implicit_task); SET_CALLBACK(sync_region); SET_CALLBACK(parallel_end); SET_CALLBACK(task_create); SET_CALLBACK(task_schedule); SET_CALLBACK(dependences); SET_CALLBACK_T(mutex_acquired, mutex); SET_CALLBACK_T(mutex_released, mutex); SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback, ompt_set_never); if (!tsan_flags.ignore_noninstrumented_modules) fprintf( stderr, "Warning: please export TSAN_OPTIONS='ignore_noninstrumented_modules=1' " "to avoid false positive reports from the OpenMP runtime.!\n"); return 1; // success } static void ompt_tsan_finalize(ompt_data_t *tool_data) { if (archer_flags->print_max_rss) { struct rusage end; getrusage(RUSAGE_SELF, &end); printf("MAX RSS[KBytes] during execution: %ld\n", end.ru_maxrss); } if (archer_flags) delete archer_flags; } extern "C" ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, const char *runtime_version) { const char *options = getenv("ARCHER_OPTIONS"); archer_flags = new ArcherFlags(options); if (!archer_flags->enabled) { if (archer_flags->verbose) std::cout << "Archer disabled, stopping operation" << std::endl; delete archer_flags; return NULL; } static ompt_start_tool_result_t ompt_start_tool_result = { &ompt_tsan_initialize, &ompt_tsan_finalize, {0}}; runOnTsan=1; RunningOnValgrind(); if (!runOnTsan) // if we are not running on TSAN, give a different tool the // chance to be loaded { if (archer_flags->verbose) std::cout << "Archer detected OpenMP application without TSan " "stopping operation" << std::endl; delete archer_flags; return NULL; } if (archer_flags->verbose) std::cout << "Archer detected OpenMP application with TSan, supplying " "OpenMP synchronization semantics" << std::endl; return &ompt_start_tool_result; }