Mercurial > hg > GearsTemplate
changeset 589:a4cab67624f7
remove old file
author | anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 16 Jan 2020 15:11:11 +0900 |
parents | 78e10562b210 |
children | 9146d6017f18 |
files | src/CMakeLists.txt src/include/allocate.h src/include/context.h src/include/origin_cs.h src/llrb/CMakeLists.txt src/llrb/allocate.c src/llrb/compare.c src/llrb/include/llrbContext.h src/llrb/include/stack.h src/llrb/llrb.c src/llrb/llrbContext.c src/llrb/main.c src/llrb/origin_cs.c src/llrb/stack.c src/test/CMakeLists.txt src/test/Cudasample_cpu.cu src/test/Cudasample_gpu.cu src/test/OpenCL_gpu.c src/test/helper_cuda.h src/test/helper_string.h src/test/main.cu src/test/multiply.cu src/test/test.c src/test/twice.cc src/test/vectorAddDrv.cc src/test/vectorAdd_kernel.cu |
diffstat | 26 files changed, 0 insertions(+), 4301 deletions(-) [+] |
line wrap: on
line diff
--- a/src/CMakeLists.txt Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -cmake_minimum_required(VERSION 3.3) - -# output compile log -set(CMAKE_VERBOSE_MAKEFILE 1) - -# set compiler -set(CMAKE_C_COMPILER $ENV{CBC_COMPILER}) - -# compile option -add_definitions("-Wall -g -O0") - -include_directories(include) -include_directories($ENV{CUDA_PATH}) -add_subdirectory(allocate) -add_subdirectory(list) -add_subdirectory(llrb) -add_subdirectory(synchronizedQueue) -add_subdirectory(parallel_execution)
--- a/src/include/allocate.h Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -__code allocate(); -__code meta_allocate(); - -__code allocate(struct Context* context) { - goto meta_allocate(context); -} - -__code meta_allocate(struct Context* context) { - context->data[++context->dataNum] = context->heap; - context->heap += context->data[0]->allocate.size; - - goto meta(context, context->next[--context->current]); -}
--- a/src/include/context.h Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -typedef struct Context_st { - void* ds; - void* mds; - void* ds_heap; - void* mds_heap; - __code (*next)(); - void* head; -} Context;
--- a/src/include/origin_cs.h Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -extern __code start_code(struct Context* context, enum Code next); -extern __code exit_code(struct Context* context); -extern __code meta(struct Context* context, enum Code next);
--- a/src/llrb/CMakeLists.txt Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -cmake_minimum_required(VERSION 2.8) - -add_definitions("-Wall -g -O0") - -set(CMAKE_C_COMPILER $ENV{CbC_Clang}/clang) - -include_directories(include) -add_executable(llrb - main.c - llrb.c - llrbContext.c - allocate.c - compare.c - stack.c - origin_cs.c -) - - -add_executable(llrb_with_put_verify - main.c - llrb.c - llrbContext.c - allocate.c - compare.c - stack.c - verifier/llrbContextWithVerifier.c - verifier/verify_put_cs.c -)
--- a/src/llrb/allocate.c Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -#include "llrbContext.h" - -void allocator(struct Context* context) { - context->data[++context->dataNum] = context->heap; - context->heap += context->data[Allocate]->allocate.size; -}
--- a/src/llrb/compare.c Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -#include "llrbContext.h" - -void compare(struct Context* context, struct Tree* tree, int key1, int key2) { - if (key1 == key2) { - tree->result = EQ; - } else if (key1 < key2) { - tree->result = GT; - } else { - tree->result = LT; - } -}
--- a/src/llrb/include/llrbContext.h Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,108 +0,0 @@ -/* Context definition for llrb example */ -#include "stack.h" - -#define ALLOCATE_SIZE 1000 - -enum Code { - Code1, - Code2, - Code3, - Code4, - Code5, - Find, - Not_find, - Code6, - Allocator, - Put, - Replace, - Insert, - Compare, - RotateL, - RotateR, - SetTree, - InsertCase1, - InsertCase2, - InsertCase3, - InsertCase4, - InsertCase4_1, - InsertCase4_2, - InsertCase5, - StackClear, - Get, - Search, - Delete, - Delete1, - Delete2, - Delete3, - Replace_d1, - Replace_d2, - FindMax1, - FindMax2, - DeleteCase1, - DeleteCase2, - DeleteCase3, - DeleteCase4, - DeleteCase5, - DeleteCase6, - Exit, -}; - -enum Relational { - EQ, - GT, - LT, -}; - -enum UniqueData { - Allocate, - Tree, - Node, -}; - -struct Context { - enum Code next; - int codeNum; - __code (**code) (struct Context*); - void* heapStart; - void* heap; - long heapLimit; - int dataNum; - stack_ptr code_stack; - stack_ptr node_stack; - union Data **data; -}; - -union Data { - struct Comparable { // inteface - enum Code compare; - union Data* data; - } compare; - struct Count { - enum Code next; - long i; - } count; - struct Tree { - enum Code next; - struct Node* root; - struct Node* current; - struct Node* deleted; - int result; - } tree; - struct Node { - // need to tree - enum Code next; - int key; // comparable data segment - int value; - struct Node* left; - struct Node* right; - // need to balancing - enum Color { - Red, - Black, - } color; - } node; - struct Allocate { - enum Code next; - long size; - } allocate; -};
--- a/src/llrb/include/stack.h Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -#include <stdlib.h> - -typedef struct { - size_t size; - int max; - int num; - void* data; -} stack, *stack_ptr; - -extern stack_ptr stack_init(); -extern stack_ptr stack_realloc(); -extern void stack_free(); -extern int stack_push(); -extern int stack_pop(); -extern int isMax(); -extern int isEmpty();
--- a/src/llrb/llrb.c Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,669 +0,0 @@ -#include <stdio.h> - -#include "llrbContext.h" -#include "origin_cs.h" - -extern void allocator(struct Context* context); -extern void compare(struct Context* context, struct Tree* tree, int key1, int key2); - -extern int num; - -__code put(struct Context* context, struct Tree* tree, struct Node* root, struct Allocate* allocate) { - allocate->size = sizeof(struct Node); - allocator(context); - - stack_push(context->code_stack, &context->next); - - context->next = StackClear; - stack_push(context->code_stack, &context->next); - - tree->root = &context->data[context->dataNum]->node; - - if (root) { - tree->current = root; - compare(context, tree, tree->current->key, context->data[Node]->node.key); - - goto meta(context, Replace); - } - - goto meta(context, Insert); -} - -__code put_stub(struct Context* context) { - goto put(context, &context->data[Tree]->tree, context->data[Tree]->tree.root, &context->data[Allocate]->allocate); -} - -__code replaceNode(struct Context* context, struct Tree* tree, struct Node* oldNode, struct Node* newNode, int result) { - *newNode = *oldNode; - stack_push(context->node_stack, &newNode); - - if (result == EQ) { - newNode->value = context->data[Node]->node.value; - - stack_pop(context->code_stack, &context->next); - goto meta(context, context->next); - } else if (result == GT) { - tree->current = oldNode->right; - newNode->right = context->heap; - } else { - tree->current = oldNode->left; - newNode->left = context->heap; - } - - context->data[Allocate]->allocate.size = sizeof(struct Node); - allocator(context); - - if (tree->current) { - compare(context, tree, tree->current->key, context->data[Node]->node.key); - goto meta(context, Replace); - } - - goto meta(context, Insert); -} - -__code replaceNode_stub(struct Context* context) { - goto replaceNode(context, &context->data[Tree]->tree, context->data[Tree]->tree.current, &context->data[context->dataNum]->node, context->data[Tree]->tree.result); -} - -__code insertNode(struct Context* context, struct Tree* tree, struct Node* node, struct Node* newNode) { - node->color = Red; - *newNode = *node; - - tree->current = newNode; - - goto meta(context, InsertCase1); -} - -__code insertNode_stub(struct Context* context) { - goto insertNode(context, &context->data[Tree]->tree, &context->data[Node]->node, &context->data[context->dataNum]->node); -} - -__code insertCase1(struct Context* context, struct Tree* tree, struct Node* current) { - if (!isEmpty(context->node_stack)) - goto meta(context, InsertCase2); - - tree->root->color = Black; - - stack_pop(context->code_stack, &context->next); - goto meta(context, context->next); -} - -__code insert1_stub(struct Context* context) { - goto insertCase1(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); -} - -__code insertCase2(struct Context* context, struct Node* current) { - struct Node* parent; - stack_pop(context->node_stack, &parent); - - if (parent->color == Black) { - stack_pop(context->code_stack, &context->next); - goto meta(context, context->next); - } - - stack_push(context->node_stack, &parent); - goto meta(context, InsertCase3); -} - -__code insert2_stub(struct Context* context) { - goto insertCase2(context, context->data[Tree]->tree.current); -} - -__code insertCase3(struct Context* context, struct Tree* tree, struct Node* current) { - struct Node* parent; - struct Node* uncle; - struct Node* grandparent; - - stack_pop(context->node_stack, &parent); - stack_pop(context->node_stack, &grandparent); - - if (grandparent->left == parent) - uncle = grandparent->right; - else - uncle = grandparent->left; - - if (uncle && (uncle->color == Red)) { - parent->color = Black; - uncle->color = Black; - grandparent->color = Red; - tree->current = grandparent; - goto meta(context, InsertCase1); - } - - stack_push(context->node_stack, &grandparent); - stack_push(context->node_stack, &parent); - - goto meta(context, InsertCase4); -} - -__code insert3_stub(struct Context* context) { - goto insertCase3(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); -} - -__code insertCase4(struct Context* context, struct Tree* tree, struct Node* current) { - struct Node* parent; - struct Node* grandparent; - - stack_pop(context->node_stack, &parent); - stack_pop(context->node_stack, &grandparent); - - stack_push(context->node_stack, &grandparent); - - tree->current = parent; - - if ((current == parent->right) && (parent == grandparent->left)) { - context->next = InsertCase4_1; - - stack_push(context->code_stack, &context->next); - goto meta(context, RotateL); - } else if ((current == parent->left) && (parent == grandparent->right)) { - context->next = InsertCase4_2; - - stack_push(context->code_stack, &context->next); - goto meta(context, RotateR); - } - - stack_push(context->node_stack, &parent); - tree->current = current; - goto meta(context, InsertCase5); -} - -__code insert4_stub(struct Context* context) { - goto insertCase4(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); -} - -__code insertCase4_1(struct Context* context, struct Tree* tree) { - stack_push(context->node_stack, &tree->current); - tree->current = tree->current->left; - goto meta(context, InsertCase5); -} - -__code insert4_1_stub(struct Context* context) { - goto insertCase4_1(context, &context->data[Tree]->tree); -} - -__code insertCase4_2(struct Context* context, struct Tree* tree) { - stack_push(context->node_stack, &tree->current); - tree->current = tree->current->right; - goto meta(context, InsertCase5); -} - -__code insert4_2_stub(struct Context* context) { - goto insertCase4_2(context, &context->data[Tree]->tree); -} - -__code insertCase5(struct Context* context, struct Tree* tree, struct Node* current) { - struct Node* parent; - struct Node* grandparent; - - stack_pop(context->node_stack, &parent); - stack_pop(context->node_stack, &grandparent); - - parent->color = Black; - grandparent->color = Red; - - tree->current = grandparent; - - if ((current == parent->left) && (parent == grandparent->left)) - goto meta(context, RotateR); - else - goto meta(context, RotateL); -} - -__code insert5_stub(struct Context* context) { - goto insertCase5(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); -} - -__code rotateLeft(struct Context* context, struct Node* node, struct Tree* tree) { - struct Node* tmp = node->right; - struct Node* parent = 0; - - stack_pop(context->node_stack, &parent); - - if (parent) { - if (node == parent->left) - parent->left = tmp; - else - parent->right = tmp; - } else { - tree->root = tmp; - } - - stack_push(context->node_stack, &parent); - - node->right = tmp->left; - tmp->left = node; - tree->current = tmp; - - stack_pop(context->code_stack, &context->next); - goto meta(context, context->next); -} - -__code rotateLeft_stub(struct Context* context) { - goto rotateLeft(context, context->data[Tree]->tree.current, &context->data[Tree]->tree); -} - -__code rotateRight(struct Context* context, struct Node* node, struct Tree* tree) { - struct Node* tmp = node->left; - struct Node* parent = 0; - - stack_pop(context->node_stack, &parent); - - if (parent) { - if (node == parent->left) - parent->left = tmp; - else - parent->right = tmp; - } else { - tree->root = tmp; - } - - stack_push(context->node_stack, &parent); - - node->left = tmp->right; - tmp->right = node; - tree->current = tmp; - - stack_pop(context->code_stack, &context->next); - goto meta(context, context->next); -} - -__code rotateRight_stub(struct Context* context) { - goto rotateRight(context, context->data[Tree]->tree.current, &context->data[Tree]->tree); -} - -__code stackClear(struct Context* context, stack_ptr node_stack, struct Tree* tree) { - if (stack_pop(node_stack, &tree->current) == 0) - goto meta(context, StackClear); - - tree->current = 0; - - stack_pop(context->code_stack, &context->next); - goto meta(context, context->next); -} - -__code stackClear_stub(struct Context* context) { - goto stackClear(context, context->node_stack, &context->data[Tree]->tree); -} - - -/* /\* __code get(struct Context* context, struct Tree* tree) { *\/ */ -/* /\* if (tree->root) { *\/ */ -/* /\* tree->current = tree->root; *\/ */ - -/* /\* goto meta(context, Search); *\/ */ -/* /\* } *\/ */ - -/* /\* stack_pop(context->code_stack, &context->next); *\/ */ -/* /\* goto meta(context, context->next); *\/ */ -/* /\* } *\/ */ - -/* /\* __code get_stub(struct Context* context) { *\/ */ -/* /\* goto get(context, &context->data[Tree]->tree); *\/ */ -/* /\* } *\/ */ - -/* /\* __code search(struct Context* context, struct Tree* tree, struct Node* node) { *\/ */ -/* /\* compare(context, tree, tree->current->key, node->key); *\/ */ - -/* /\* if (tree->result == EQ) { *\/ */ -/* /\* *node = *tree->current; *\/ */ - -/* /\* goto meta(context, context->next); *\/ */ -/* /\* } else if (tree->result == GT) { *\/ */ -/* /\* tree->current = tree->current->right; *\/ */ -/* /\* } else { *\/ */ -/* /\* tree->current = tree->current->left; *\/ */ -/* /\* } *\/ */ - -/* /\* if (tree->current) *\/ */ -/* /\* goto meta(context, Search); *\/ */ - -/* /\* stack_pop(context->code_stack, &context->next); *\/ */ -/* /\* goto meta(context, context->next); *\/ */ -/* /\* } *\/ */ - -/* /\* __code search_stub(struct Context* context) { *\/ */ -/* /\* goto search(context, &context->data[Tree]->tree, &context->data[Node]->node); *\/ */ -/* /\* } *\/ */ - -/* /\* __code delete(struct Context* context, struct Tree* tree) { *\/ */ -/* /\* if (tree->root) { *\/ */ -/* /\* stack_push(context->code_stack, &context->next); *\/ */ -/* /\* context->next = Delete1; *\/ */ -/* /\* goto meta(context, Get); *\/ */ -/* /\* } *\/ */ - -/* /\* goto meta(context, context->next); *\/ */ -/* /\* } *\/ */ - -/* /\* __code delete_stub(struct Context* context) { *\/ */ -/* /\* goto delete(context, &context->data[Tree]->tree); *\/ */ -/* /\* } *\/ */ - -/* /\* __code delete1(struct Context* context, struct Tree* tree, struct Allocate* allocate) { *\/ */ -/* /\* allocate->size = sizeof(struct Node); *\/ */ -/* /\* allocator(context); *\/ */ - -/* /\* struct Node* root = tree->root; *\/ */ - -/* /\* tree->root = &context->data[context->dataNum]->node; *\/ */ -/* /\* tree->current = root; *\/ */ - -/* /\* compare(context, tree, tree->current->key, context->data[Node]->node.key); *\/ */ - -/* /\* goto meta(context, Replace_d1); *\/ */ -/* /\* } *\/ */ - -/* /\* __code delete1_stub(struct Context* context) { *\/ */ -/* /\* goto delete1(context, &context->data[Tree]->tree, &context->data[Allocate]->allocate); *\/ */ -/* /\* } *\/ */ - -/* /\* __code delete2(struct Context* context, struct Node* current) { *\/ */ -/* /\* if (current->color == Black) { *\/ */ -/* /\* struct Node* child = current->right == NULL ? current->left : current->right; *\/ */ -/* /\* current->color = child == NULL ? Black : child->color; *\/ */ - -/* /\* goto meta(context, DeleteCase1); *\/ */ -/* /\* } *\/ */ - -/* /\* goto meta(context, Delete3); *\/ */ -/* /\* } *\/ */ - -/* /\* __code delete2_stub(struct Context* context) { *\/ */ -/* /\* goto delete2(context, context->data[Tree]->tree.current); *\/ */ -/* /\* } *\/ */ - -/* /\* __code delete3(struct Context* context, struct Tree* tree, struct Node* current) { *\/ */ -/* /\* struct Node* tmp = current->right == NULL ? current->left : current->right; *\/ */ - -/* /\* if (current->parent) { *\/ */ -/* /\* if (current == current->parent->left) *\/ */ -/* /\* current->parent->left = tmp; *\/ */ -/* /\* else *\/ */ -/* /\* current->parent->right = tmp; *\/ */ -/* /\* } else { *\/ */ -/* /\* tree->root = tmp; *\/ */ -/* /\* } *\/ */ - -/* /\* if (tmp) *\/ */ -/* /\* tmp->parent = current->parent; *\/ */ - -/* /\* if (current->parent == NULL && tmp) *\/ */ -/* /\* tmp->color = Black; *\/ */ - -/* /\* current == current->parent->left ? (current->parent->left = NULL) : (current->parent->right = NULL); *\/ */ - -/* /\* stack_pop(context->code_stack, &context->next); *\/ */ -/* /\* goto meta(context, context->next); *\/ */ -/* /\* } *\/ */ - -/* /\* __code delete3_stub(struct Context* context) { *\/ */ -/* /\* goto delete3(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); *\/ */ -/* /\* } *\/ */ - -/* /\* __code replaceNodeForDelete1(struct Context* context, struct Tree* tree, struct Node* oldNode, struct Node* newNode, int result) { *\/ */ -/* /\* *newNode = *oldNode; *\/ */ - -/* /\* if (result == EQ) *\/ */ -/* /\* goto meta(context, Replace_d2); *\/ */ -/* /\* else if (result == GT) *\/ */ -/* /\* tree->current = newNode->right; *\/ */ -/* /\* else *\/ */ -/* /\* tree->current = newNode->left; *\/ */ - -/* /\* tree->current->parent = newNode; *\/ */ - -/* /\* if (tree->current->left == NULL && tree->current->right == NULL) *\/ */ -/* /\* goto meta(context, Delete2); *\/ */ - -/* /\* if (result == GT) *\/ */ -/* /\* newNode->right = context->heap; *\/ */ -/* /\* else if (result == LT) *\/ */ -/* /\* newNode->left = context->heap; *\/ */ - -/* /\* allocator(context); *\/ */ - -/* /\* compare(context, tree, tree->current->key, context->data[Node]->node.key); *\/ */ - -/* /\* goto meta(context, Replace_d1); *\/ */ -/* /\* } *\/ */ - -/* /\* __code replaceNodeForDelete1_stub(struct Context* context) { *\/ */ -/* /\* goto replaceNodeForDelete1(context, &context->data[Tree]->tree, context->data[Tree]->tree.current, &context->data[context->dataNum]->node, context->data[Tree]->tree.result); *\/ */ -/* /\* } *\/ */ - -/* /\* __code replaceNodeForDelete2(struct Context* context, struct Tree* tree, struct Node* newNode) { *\/ */ -/* /\* if (tree->current->left && tree->current->right) { *\/ */ -/* /\* newNode->left->parent = newNode; *\/ */ -/* /\* tree->current = newNode->left; *\/ */ -/* /\* newNode->left = context->heap; *\/ */ -/* /\* tree->deleted = newNode; *\/ */ - -/* /\* allocator(context); *\/ */ -/* /\* tree->current->parent = newNode; *\/ */ - -/* /\* goto meta(context, FindMax1); *\/ */ -/* /\* } *\/ */ - -/* /\* goto meta(context, Delete2); *\/ */ -/* /\* } *\/ */ - -/* /\* __code replaceNodeForDelete2_stub(struct Context* context) { *\/ */ -/* /\* goto replaceNodeForDelete2(context, &context->data[Tree]->tree, &context->data[context->dataNum]->node); *\/ */ -/* /\* } *\/ */ - -/* /\* __code findMax1(struct Context* context, struct Tree* tree, struct Node* oldNode, struct Node* newNode) { *\/ */ -/* /\* *newNode = *oldNode; *\/ */ - -/* /\* if (newNode->right) *\/ */ -/* /\* goto meta(context, FindMax2); *\/ */ - -/* /\* tree->deleted->key = newNode->key; *\/ */ -/* /\* tree->deleted->value = newNode->value; *\/ */ - -/* /\* tree->current = newNode; *\/ */ - -/* /\* goto meta(context, Delete2); *\/ */ -/* /\* } *\/ */ - -/* /\* __code findMax1_stub(struct Context* context) { *\/ */ -/* /\* goto findMax1(context, &context->data[Tree]->tree, context->data[Tree]->tree.current, &context->data[context->dataNum]->node); *\/ */ -/* /\* } *\/ */ - - -/* /\* __code findMax2(struct Context* context, struct Tree* tree, struct Node* oldNode, struct Node* newNode) { *\/ */ -/* /\* *newNode = *oldNode; *\/ */ - -/* /\* if (newNode->right->right) { *\/ */ -/* /\* tree->current = newNode->right; *\/ */ -/* /\* newNode->right = context->heap; *\/ */ - -/* /\* allocator(context); *\/ */ -/* /\* tree->current->parent = newNode; *\/ */ - -/* /\* goto meta(context, FindMax2); *\/ */ -/* /\* } *\/ */ - -/* /\* tree->deleted->key = newNode->right->key; *\/ */ -/* /\* tree->deleted->value = newNode->right->value; *\/ */ - -/* /\* tree->current = newNode; *\/ */ - -/* /\* goto meta(context, Delete2); *\/ */ -/* /\* } *\/ */ - -/* /\* __code findMax2_stub(struct Context* context) { *\/ */ -/* /\* goto findMax2(context, &context->data[Tree]->tree, context->data[Tree]->tree.current, &context->data[context->dataNum]->node); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase1(struct Context* context, struct Node* current) { *\/ */ -/* /\* if (current->parent) *\/ */ -/* /\* goto meta(context, DeleteCase2); *\/ */ - -/* /\* goto meta(context, Delete3); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase1_stub(struct Context* context) { *\/ */ -/* /\* goto deleteCase1(context, context->data[Tree]->tree.current); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase2(struct Context* context, struct Tree* tree, struct Node* current) { *\/ */ -/* /\* struct Node* sibling = current == current->parent->left ? current->parent->right : current->parent->left; *\/ */ - -/* /\* if ((sibling == NULL ? Black : sibling->color) == Red) { *\/ */ -/* /\* current->parent->color = Red; *\/ */ -/* /\* sibling->color = Black; *\/ */ - -/* /\* current == current->parent->left ? (current->parent->left = context->heap) : (current->parent->right = context->heap); *\/ */ -/* /\* allocator(context); *\/ */ -/* /\* context->data[context->dataNum]->node = *sibling; *\/ */ - -/* /\* tree->current = current->parent; *\/ */ - -/* /\* context->next = DeleteCase3; *\/ */ -/* /\* stack_push(context->code_stack, &context->next); *\/ */ - -/* /\* if (current == current->parent->left) *\/ */ -/* /\* goto meta(context, RotateL); *\/ */ -/* /\* else *\/ */ -/* /\* goto meta(context, RotateR); *\/ */ -/* /\* } *\/ */ - -/* /\* goto meta(context, DeleteCase3); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase2_stub(struct Context* context) { *\/ */ -/* /\* goto deleteCase2(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase3(struct Context* context, struct Tree* tree, struct Node* current) { *\/ */ -/* /\* struct Node* sibling = current == current->parent->left ? current->parent->right : current->parent->left; *\/ */ - -/* /\* if (current->parent->color == Black && *\/ */ -/* /\* (sibling == NULL ? Black : sibling->color) == Black && *\/ */ -/* /\* (sibling->left == NULL ? Black : sibling->left->color) == Black && *\/ */ -/* /\* (sibling->right == NULL ? Black : sibling->right->color) == Black) { *\/ */ -/* /\* sibling->color = Red; *\/ */ - -/* /\* tree->current = current->parent; *\/ */ -/* /\* goto meta(context, DeleteCase1); *\/ */ -/* /\* } *\/ */ - -/* /\* goto meta(context, DeleteCase4); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase3_stub(struct Context* context) { *\/ */ -/* /\* goto deleteCase3(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase4(struct Context* context, struct Node* current) { *\/ */ -/* /\* struct Node* sibling = current == current->parent->left ? current->parent->right : current->parent->left; *\/ */ - -/* /\* if (current->parent->color == Red && *\/ */ -/* /\* (sibling == NULL ? Black : sibling->color) == Black && *\/ */ -/* /\* (sibling->left == NULL ? Black : sibling->left->color) == Black && *\/ */ -/* /\* (sibling->right == NULL ? Black : sibling->right->color) == Black) { *\/ */ -/* /\* sibling->color = Red; *\/ */ -/* /\* current->parent->color = Black; *\/ */ - -/* /\* goto meta(context, Delete3); *\/ */ -/* /\* } *\/ */ - -/* /\* goto meta(context, DeleteCase5); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase4_stub(struct Context* context) { *\/ */ -/* /\* goto deleteCase4(context, context->data[Tree]->tree.current); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase5(struct Context* context, struct Tree* tree, struct Node* current) { *\/ */ -/* /\* struct Node* sibling = current == current->parent->left ? current->parent->right : current->parent->left; *\/ */ -/* /\* sibling->parent = current->parent; *\/ */ - -/* /\* if (current == current->parent->left && *\/ */ -/* /\* (sibling == NULL ? Black : sibling->color) == Black && *\/ */ -/* /\* (sibling->left == NULL ? Black : sibling->left->color) == Red && *\/ */ -/* /\* (sibling->right == NULL ? Black : sibling->right->color) == Black) { *\/ */ -/* /\* sibling->color = Red; *\/ */ -/* /\* sibling->left->color = Black; *\/ */ - -/* /\* sibling == sibling->parent->left ? (sibling->parent->left = context->heap) : (sibling->parent->right = context->heap); *\/ */ -/* /\* allocator(context); *\/ */ -/* /\* struct Node* tmp = &context->data[context->dataNum]->node; *\/ */ -/* /\* *tmp = *sibling; *\/ */ -/* /\* tmp->parent = current; *\/ */ - -/* /\* tmp->left = context->heap; *\/ */ -/* /\* allocator(context); *\/ */ -/* /\* context->data[context->dataNum]->node = *sibling->left; *\/ */ -/* /\* context->data[context->dataNum]->node.parent = tmp; *\/ */ - -/* /\* tree->current = tmp; *\/ */ - -/* /\* context->next = DeleteCase6; *\/ */ -/* /\* stack_push(context->code_stack, &context->next); *\/ */ - -/* /\* goto meta(context, RotateR); *\/ */ -/* /\* } else if (current == current->parent->right && *\/ */ -/* /\* (sibling == NULL ? Black : sibling->color) == Black && *\/ */ -/* /\* (sibling->left == NULL ? Black : sibling->left->color) == Black && *\/ */ -/* /\* (sibling->right == NULL ? Black : sibling->right->color) == Red) { *\/ */ -/* /\* sibling->color = Red; *\/ */ -/* /\* sibling->right->color = Black; *\/ */ - -/* /\* sibling == sibling->parent->left ? (sibling->parent->left = context->heap) : (sibling->parent->right = context->heap); *\/ */ -/* /\* allocator(context); *\/ */ -/* /\* struct Node* tmp = &context->data[context->dataNum]->node; *\/ */ -/* /\* *tmp = *sibling; *\/ */ -/* /\* tmp->parent = current; *\/ */ - -/* /\* tmp->right = context->heap; *\/ */ -/* /\* allocator(context); *\/ */ -/* /\* context->data[context->dataNum]->node = *sibling->right; *\/ */ -/* /\* context->data[context->dataNum]->node.parent = tmp; *\/ */ - -/* /\* tree->current = tmp; *\/ */ - -/* /\* context->next = DeleteCase6; *\/ */ -/* /\* stack_push(context->code_stack, &context->next); *\/ */ -/* /\* goto meta(context, RotateL); *\/ */ -/* /\* } *\/ */ - -/* /\* goto meta(context, DeleteCase6); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase5_stub(struct Context* context) { *\/ */ -/* /\* goto deleteCase5(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase6(struct Context* context, struct Tree* tree, struct Node* current) { *\/ */ -/* /\* struct Node* sibling = current == current->parent->left ? current->parent->right : current->parent->left; *\/ */ - -/* /\* sibling == sibling->parent->left ? (sibling->parent->left = context->heap) : (sibling->parent->right = context->heap); *\/ */ -/* /\* allocator(context); *\/ */ -/* /\* struct Node* tmp = &context->data[context->dataNum]->node; *\/ */ -/* /\* *tmp = *sibling; *\/ */ -/* /\* tmp->parent = current; *\/ */ - -/* /\* tmp->color = current->parent->color; *\/ */ -/* /\* current->parent->color = Black; *\/ */ - -/* /\* context->next = Delete3; *\/ */ -/* /\* stack_push(context->code_stack, &context->next); *\/ */ - -/* /\* if (current == current->parent->left) { *\/ */ -/* /\* tmp->right->color = Black; *\/ */ -/* /\* tree->current = current->parent; *\/ */ - -/* /\* goto meta(context, RotateL); *\/ */ -/* /\* } else { *\/ */ -/* /\* tmp->left->color = Black; *\/ */ -/* /\* tree->current = current->parent; *\/ */ - -/* /\* goto meta(context, RotateR); *\/ */ -/* /\* } *\/ */ -/* /\* } *\/ */ - -/* /\* __code deleteCase6_stub(struct Context* context) { *\/ */ -/* /\* goto deleteCase6(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); *\/ */ -/* /\* } *\/ */
--- a/src/llrb/llrbContext.c Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -#include <stdlib.h> - -#include "llrbContext.h" - -extern __code code1_stub(struct Context*); -extern __code code2_stub(struct Context*); -extern __code code3_stub(struct Context*); -extern __code code4(struct Context*); -extern __code code5(struct Context*); -extern __code find(struct Context*); -extern __code not_find(struct Context*); -extern __code code6(struct Context*); -extern __code meta(struct Context*); -extern __code put_stub(struct Context*); -extern __code replaceNode_stub(struct Context*); -extern __code insertNode_stub(struct Context*); -extern __code rotateLeft_stub(struct Context*); -extern __code rotateRight_stub(struct Context*); -extern __code colorFlip_stub(struct Context*); -extern __code fixUp_stub(struct Context*); -extern __code changeReference_stub(struct Context*); -extern __code insert1_stub(struct Context*); -extern __code insert2_stub(struct Context*); -extern __code insert3_stub(struct Context*); -extern __code insert4_stub(struct Context*); -extern __code insert4_1_stub(struct Context*); -extern __code insert4_2_stub(struct Context*); -extern __code insert5_stub(struct Context*); -extern __code stackClear_stub(struct Context*); -extern __code get_stub(struct Context*); -extern __code search_stub(struct Context*); -extern __code delete_stub(struct Context*); -extern __code delete1_stub(struct Context*); -extern __code delete2_stub(struct Context*); -extern __code delete3_stub(struct Context*); -extern __code replaceNodeForDelete1_stub(struct Context*); -extern __code replaceNodeForDelete2_stub(struct Context*); -extern __code findMax1_stub(struct Context*); -extern __code findMax2_stub(struct Context*); -extern __code deleteCase1_stub(struct Context*); -extern __code deleteCase2_stub(struct Context*); -extern __code deleteCase3_stub(struct Context*); -extern __code deleteCase4_stub(struct Context*); -extern __code deleteCase5_stub(struct Context*); -extern __code deleteCase6_stub(struct Context*); -extern __code exit_code(struct Context*); - -__code initLLRBContext(struct Context* context, int num) { - context->heapLimit = sizeof(union Data)*ALLOCATE_SIZE; - context->code = malloc(sizeof(__code*)*ALLOCATE_SIZE); - context->data = malloc(sizeof(union Data*)*ALLOCATE_SIZE); - context->heapStart = malloc(context->heapLimit); - - context->codeNum = Exit; - - context->code[Code1] = code1_stub; - context->code[Code2] = code2_stub; - context->code[Code3] = code3_stub; - context->code[Code4] = code4; - context->code[Code5] = code5; - context->code[Find] = find; - context->code[Not_find] = not_find; - context->code[Code6] = code6; - context->code[Put] = put_stub; - context->code[Replace] = replaceNode_stub; - context->code[Insert] = insertNode_stub; - context->code[RotateL] = rotateLeft_stub; - context->code[RotateR] = rotateRight_stub; - context->code[InsertCase1] = insert1_stub; - context->code[InsertCase2] = insert2_stub; - context->code[InsertCase3] = insert3_stub; - context->code[InsertCase4] = insert4_stub; - context->code[InsertCase4_1] = insert4_1_stub; - context->code[InsertCase4_2] = insert4_2_stub; - context->code[InsertCase5] = insert5_stub; - context->code[StackClear] = stackClear_stub; - /* context->code[Get] = get_stub; */ - /* context->code[Search] = search_stub; */ - /* context->code[Delete] = delete_stub; */ - /* context->code[Delete1] = delete1_stub; */ - /* context->code[Delete2] = delete2_stub; */ - /* context->code[Delete3] = delete3_stub; */ - /* context->code[Replace_d1] = replaceNodeForDelete1_stub; */ - /* context->code[Replace_d2] = replaceNodeForDelete2_stub; */ - /* context->code[FindMax1] = findMax1_stub; */ - /* context->code[FindMax2] = findMax2_stub; */ - /* context->code[DeleteCase1] = deleteCase1_stub; */ - /* context->code[DeleteCase2] = deleteCase2_stub; */ - /* context->code[DeleteCase3] = deleteCase3_stub; */ - /* context->code[DeleteCase4] = deleteCase4_stub; */ - /* context->code[DeleteCase5] = deleteCase5_stub; */ - /* context->code[DeleteCase6] = deleteCase6_stub; */ - context->code[Exit] = exit_code; - - context->heap = context->heapStart; - - context->data[Allocate] = context->heap; - context->heap += sizeof(struct Allocate); - - context->data[Tree] = context->heap; - context->heap += sizeof(struct Tree); - - context->data[Node] = context->heap; - context->heap += sizeof(struct Node); - - context->dataNum = Node; - - struct Tree* tree = &context->data[Tree]->tree; - tree->root = 0; - tree->current = 0; - tree->deleted = 0; - - context->node_stack = stack_init(sizeof(struct Node*), 100); - context->code_stack = stack_init(sizeof(enum Code), 100); -}
--- a/src/llrb/main.c Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,136 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <sys/time.h> - -#include "llrbContext.h" -#include "origin_cs.h" - -static double st_time; -static double ed_time; -static clock_t c1,c2; - -int num; - -extern __code initLLRBContext(struct Context* context, int); -extern void allocator(struct Context* context); - -static double getTime() { - struct timeval tv; - gettimeofday(&tv, NULL); - return tv.tv_sec + (double)tv.tv_usec*1e-6; -} - -void print_tree(struct Node* node, int n) { - if (node != 0) { - print_tree(node->left, n+1); - for (int i=0;i<n;i++) - printf(" "); - printf("key=%d value=%d color=%s\t%p\n", node->key, node->value,/* n, */node->color==0? "R":"B", node); - print_tree(node->right, n+1); - } -} - -/* -__code code1(Allocate allocate) { - allocate.size = sizeof(long); - allocate.next = Code2; - goto Allocate(allocate); -} -*/ - -__code code1(struct Context* context, struct Allocate *allocate) { - allocate->size = sizeof(struct Count); - allocator(context); - goto meta(context, Code2); -} - -__code code1_stub(struct Context* context) { - goto code1(context, &context->data[Allocate]->allocate); -} - -/* -__code code2(Allocate allocate, Count count) { - count.count = 0; - goto code3(count); -} -*/ - -__code code2(struct Context* context, struct Count* count) { - count->i = num; - goto meta(context, Code3); -} - -__code code2_stub(struct Context* context) { - goto code2(context, &context->data[context->dataNum]->count); -} - -__code code3(struct Context* context, struct Node* node, struct Count* count) { - if (count->i == 0) { - goto meta(context, Code4); - } - - print_tree(context->data[Tree]->tree.root, 0); - puts(""); - context->next = Code3; - node->key = rand()%100+1; - node->value = count->i; - - count->i--; - goto meta(context, Put); -} - -__code code3_stub(struct Context* context) { - goto code3(context, &context->data[Node]->node, &context->data[3]->count); -} - -__code code4(struct Context* context) { - puts("---before---"); - print_tree(context->data[Tree]->tree.root, 0); - - struct Node* node = &context->data[Node]->node; - node->key = 4; - - context->next = Code5; - - goto meta(context, Exit); -} - -__code code5(struct Context* context) { - puts("---after---"); - print_tree(context->data[Tree]->tree.root, 0); - puts("--Number of Data--"); - printf("%d\n", context->dataNum); - - goto meta(context, Exit); -} - -__code find(struct Context* context) { - context->data[Node]->node.key = 2; - context->next = Not_find; - - goto meta(context, Get); -} - -__code not_find(struct Context* context) { - context->data[Node]->node.key = 10; - context->next = Code6; - - printf("%p\n", context->data[Tree]->tree.current); - context->data[Tree]->tree.current = 0; - goto meta(context, Get); -} - -__code code6(struct Context* context) { - printf("%p\n", context->data[Tree]->tree.current); - - stack_free(context->node_stack); - - goto meta(context, Exit); -} - -int main(int argc, char** argv) { - num = (int)atoi(argv[1]); - struct Context* context = (struct Context*)malloc(sizeof(struct Context)); - initLLRBContext(context, num); - goto start_code(context, Code1); -}
--- a/src/llrb/origin_cs.c Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -#include <stdlib.h> -#include "llrbContext.h" - -__code meta(struct Context* context, enum Code next) { - goto (context->code[next])(context); -} - -__code start_code(struct Context* context, enum Code next) { - goto meta(context, next); -} - -__code exit_code(struct Context* context) { - free(context->code); - free(context->data); - free(context->heapStart); - goto exit(0); -}
--- a/src/llrb/stack.c Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -#include <string.h> -#include "stack.h" - -stack_ptr stack_init(size_t size, int max) { - stack_ptr stack_ptr; - - if ((stack_ptr = calloc(1, sizeof(stack))) == NULL) - return NULL; - - if ((stack_ptr->data = calloc(max, size)) == NULL) { - free(stack_ptr); - return NULL; - } - - stack_ptr->size = size; - stack_ptr->max = max; - stack_ptr->num = 0; - - return stack_ptr; -} - -stack_ptr stack_realloc(stack_ptr stack_ptr, int max) { - if (stack_ptr == NULL) - return NULL; - - if ((stack_ptr->data = realloc(stack_ptr->data, stack_ptr->size*max)) == NULL) - return NULL; - - stack_ptr->max = max; - - return stack_ptr; -} - -void stack_free(stack_ptr stack_ptr) { - if (stack_ptr != NULL && stack_ptr->data != NULL) { - free(stack_ptr->data); - free(stack_ptr); - } -} - -int stack_push(stack_ptr stack_ptr, void* data) { - if (stack_ptr->max <= stack_ptr->num) - return -1; - - memcpy((char*)stack_ptr->data+stack_ptr->num*stack_ptr->size, data, stack_ptr->size); - stack_ptr->num++; - - return 0; -} - -int stack_pop(stack_ptr stack_ptr, void* data) { - if (stack_ptr->num == 0) - return -1; - - stack_ptr->num--; - - memcpy(data, (char*)stack_ptr->data+stack_ptr->num*stack_ptr->size, stack_ptr->size); - - return 0; -} - -int isMax(const stack_ptr stack_ptr) { - return stack_ptr->max<=stack_ptr->num; -} - -int isEmpty(const stack_ptr stack_ptr) { - return stack_ptr->num<=0; -}
--- a/src/test/CMakeLists.txt Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -cmake_minimum_required(VERSION 2.8) - -add_definitions("-Wall -g -O0") - -set(NVCCFLAG "-std=c++11" "-g" "-O0" ) - -include_directories("/usr/local/cuda/include") - -set(CMAKE_C_COMPILER $ENV{CBC_COMPILER}) - -set(CUDA_LINK_FLAGS "-framework CUDA -lc++ -Wl,-search_paths_first -Wl,-headerpad_max_install_names /Developer/NVIDIA/CUDA-8.0/lib/libcudart_static.a -Wl,-rpath,/usr/local/cuda/lib") -# for linux use -lcuda - -SET( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CUDA_LINK_FLAGS}" ) - -cmake_minimum_required(VERSION 2.8) -find_package(CUDA REQUIRED) - -add_custom_command(OUTPUT main.o - DEPENDS main.cu - COMMAND nvcc ${NVCCFLAG} -c main.cu -) - -add_executable(cudaExmple main.o test.c) - -add_custom_command(OUTPUT multiply.ptx - DEPENDS multiply.cu - COMMAND nvcc ${NVCCFLAG} -c multiply.cu -ptx -) - -add_executable(twiceExample twice.cc multiply.ptx ) - -add_custom_command(OUTPUT vectorAdd_kernel.ptx - DEPENDS vectorAdd_kernel.cu - COMMAND nvcc ${NVCCFLAG} -c vectorAdd_kernel.cu -ptx -) - -add_executable(vectorExample vectorAddDrv.cc vectorAdd_kernel.ptx) - -# to compile these, comment out CMAKE_C_COMPILER -# cuda_add_executable(Cudasample_gpu Cudasample_gpu.cu) -# cuda_add_executable(Cudasample_cpu Cudasample_cpu.cu) - -# target_link_libraries(twiceExample ${CUDA_LIBRARIES} ${MPI_LIBRARIES} ${OPENGL_LIBRARIES}) -
--- a/src/test/Cudasample_cpu.cu Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -#include <stdio.h> - -int main(void) -{ - int b; - - for (b = 99; b >= 0; b--) { - switch (b) { - case 0: - printf("No more bottles of beer on the wall, no more bottles of beer.\n"); - printf("Go to the store and buy some more, 99 bottles of beer on the wall.\n"); - break; - case 1: - printf("1 bottle of beer on the wall, 1 bottle of beer.\n"); - printf("Take one down and pass it around, no more bottles of beer on the wall\n"); - break; - default: - printf("%d bottles of beer on the wall, %d bottles of beer.\n", b, b); - printf("Take one down and pass it around, %d %s of beer on the wall.\n" - ,b - 1 - ,((b - 1) > 1)? "bottles" : "bottle"); - break; - } - } - return 0; -} \ No newline at end of file
--- a/src/test/Cudasample_gpu.cu Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -#include <stdio.h> - -#define SIZE_TEXT (sizeof(text)-1) -#define SIZE_END (sizeof(end)-1) - -__device__ char text[] = -"__ bottles of beer on the wall, __ bottles of beer!\n" -"Take one down, and pass it around, ## bottles of beer on the wall!\n\n"; - -__device__ char end[] = -"01 bottle of beer on the wall, 01 bottle of beer.\n" -"Take one down and pass it around, no more bottles of beer on the wall.\n" -"\n" -"No more bottles of beer on the wall, no more bottles of beer.\n" -"Go to the store and buy some more, 99 bottles of beer on the wall."; - - -__global__ -void bottle99(char *addr){ - int x = threadIdx.x; - addr += x * SIZE_TEXT; - int bottle = 99 - x; - if (bottle == 1) { - for (int i=0; i<SIZE_END; i++) { - addr[i] = end[i]; - } - addr[SIZE_END] = '\0'; - } else { - char c1 = (bottle/10) + '0'; - char c2 = (bottle%10) + '0'; - - char d1 = ((bottle-1)/10) + '0'; - char d2 = ((bottle-1)%10) + '0'; - - for (int i=0; i<SIZE_TEXT; i++) { - int c = text[i]; - if (c == '_') { - addr[i] = c1; - addr[i+1] = c2; - i++; - } else if (c == '#') { - - addr[i] = d1; - addr[i+1] = d2; - i++; - } else { - - addr[i] = text[i]; - } - } - } -} - -int main() -{ - char *buffer; - char *d_buffer; - - int size = SIZE_TEXT * 98 + SIZE_END + 1; - - buffer = new char[size]; - cudaMalloc((void**)&d_buffer, size); - - bottle99<<<1, 99>>>(d_buffer); - - cudaMemcpy(buffer, d_buffer, size, cudaMemcpyDeviceToHost); - cudaFree(d_buffer); - - puts(buffer); - free(buffer); - } - \ No newline at end of file
--- a/src/test/OpenCL_gpu.c Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,91 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> - -#ifdef __APPLE__ -#include <OpenCL/opencl.h> -#else -#include <CL/cl.h> -#endif - -#define MEM_SIZE (128) -#define MAX_SOURCE_SIZE (0x100000) - -int main() -{ - cl_device_id device_id = NULL; - cl_context context = NULL; - cl_command_queue command_queue = NULL; - cl_mem memobj = NULL; - cl_program program = NULL; - cl_kernel kernel = NULL; - cl_platform_id platform_id = NULL; - cl_uint ret_num_devices; - cl_uint ret_num_platforms; - cl_int ret; - - char string[MEM_SIZE]; - - FILE *fp; - char fileName[] = "./hello.cl"; - char *source_str; - size_t source_size; - -/* Load the source code containing the kernel*/ - fp = fopen(fileName, "r"); - if (!fp) { - fprintf(stderr, "Failed to load kernel.\n"); - exit(1); - } - source_str = (char*)malloc(MAX_SOURCE_SIZE); - source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp); - fclose(fp); - -/* Get Platform and Device Info */ - ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); - ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices); - -/* Create OpenCL context */ - context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); - -/* Create Command Queue */ - command_queue = clCreateCommandQueue(context, device_id, 0, &ret); - -/* Create Memory Buffer */ - memobj = clCreateBuffer(context, CL_MEM_READ_WRITE,MEM_SIZE * sizeof(char), NULL, &ret); - -/* Create Kernel Program from the source */ - program = clCreateProgramWithSource(context, 1, (const char **)&source_str, - (const size_t *)&source_size, &ret); - -/* Build Kernel Program */ - ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); - -/* Create OpenCL Kernel */ - kernel = clCreateKernel(program, "hello", &ret); - -/* Set OpenCL Kernel Parameters */ - ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj); - -/* Execute OpenCL Kernel */ - ret = clEnqueueTask(command_queue, kernel, 0, NULL,NULL); - -/* Copy results from the memory buffer */ - ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, - MEM_SIZE * sizeof(char),string, 0, NULL, NULL); - -/* Display Result */ - puts(string); - -/* Finalization */ - ret = clFlush(command_queue); - ret = clFinish(command_queue); - ret = clReleaseKernel(kernel); - ret = clReleaseProgram(program); - ret = clReleaseMemObject(memobj); - ret = clReleaseCommandQueue(command_queue); - ret = clReleaseContext(context); - - free(source_str); - - return 0; -}
--- a/src/test/helper_cuda.h Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1283 +0,0 @@ -/** - * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -//////////////////////////////////////////////////////////////////////////////// -// These are CUDA Helper functions for initialization and error checking - -#ifndef HELPER_CUDA_H -#define HELPER_CUDA_H - -#pragma once - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> - -#include "helper_string.h" - -#ifndef EXIT_WAIVED -#define EXIT_WAIVED 2 -#endif - -// Note, it is required that your SDK sample to include the proper header files, please -// refer the CUDA examples for examples of the needed CUDA headers, which may change depending -// on which CUDA functions are used. - -// CUDA Runtime error messages -#ifdef __DRIVER_TYPES_H__ -static const char *_cudaGetErrorEnum(cudaError_t error) -{ - switch (error) - { - case cudaSuccess: - return "cudaSuccess"; - - case cudaErrorMissingConfiguration: - return "cudaErrorMissingConfiguration"; - - case cudaErrorMemoryAllocation: - return "cudaErrorMemoryAllocation"; - - case cudaErrorInitializationError: - return "cudaErrorInitializationError"; - - case cudaErrorLaunchFailure: - return "cudaErrorLaunchFailure"; - - case cudaErrorPriorLaunchFailure: - return "cudaErrorPriorLaunchFailure"; - - case cudaErrorLaunchTimeout: - return "cudaErrorLaunchTimeout"; - - case cudaErrorLaunchOutOfResources: - return "cudaErrorLaunchOutOfResources"; - - case cudaErrorInvalidDeviceFunction: - return "cudaErrorInvalidDeviceFunction"; - - case cudaErrorInvalidConfiguration: - return "cudaErrorInvalidConfiguration"; - - case cudaErrorInvalidDevice: - return "cudaErrorInvalidDevice"; - - case cudaErrorInvalidValue: - return "cudaErrorInvalidValue"; - - case cudaErrorInvalidPitchValue: - return "cudaErrorInvalidPitchValue"; - - case cudaErrorInvalidSymbol: - return "cudaErrorInvalidSymbol"; - - case cudaErrorMapBufferObjectFailed: - return "cudaErrorMapBufferObjectFailed"; - - case cudaErrorUnmapBufferObjectFailed: - return "cudaErrorUnmapBufferObjectFailed"; - - case cudaErrorInvalidHostPointer: - return "cudaErrorInvalidHostPointer"; - - case cudaErrorInvalidDevicePointer: - return "cudaErrorInvalidDevicePointer"; - - case cudaErrorInvalidTexture: - return "cudaErrorInvalidTexture"; - - case cudaErrorInvalidTextureBinding: - return "cudaErrorInvalidTextureBinding"; - - case cudaErrorInvalidChannelDescriptor: - return "cudaErrorInvalidChannelDescriptor"; - - case cudaErrorInvalidMemcpyDirection: - return "cudaErrorInvalidMemcpyDirection"; - - case cudaErrorAddressOfConstant: - return "cudaErrorAddressOfConstant"; - - case cudaErrorTextureFetchFailed: - return "cudaErrorTextureFetchFailed"; - - case cudaErrorTextureNotBound: - return "cudaErrorTextureNotBound"; - - case cudaErrorSynchronizationError: - return "cudaErrorSynchronizationError"; - - case cudaErrorInvalidFilterSetting: - return "cudaErrorInvalidFilterSetting"; - - case cudaErrorInvalidNormSetting: - return "cudaErrorInvalidNormSetting"; - - case cudaErrorMixedDeviceExecution: - return "cudaErrorMixedDeviceExecution"; - - case cudaErrorCudartUnloading: - return "cudaErrorCudartUnloading"; - - case cudaErrorUnknown: - return "cudaErrorUnknown"; - - case cudaErrorNotYetImplemented: - return "cudaErrorNotYetImplemented"; - - case cudaErrorMemoryValueTooLarge: - return "cudaErrorMemoryValueTooLarge"; - - case cudaErrorInvalidResourceHandle: - return "cudaErrorInvalidResourceHandle"; - - case cudaErrorNotReady: - return "cudaErrorNotReady"; - - case cudaErrorInsufficientDriver: - return "cudaErrorInsufficientDriver"; - - case cudaErrorSetOnActiveProcess: - return "cudaErrorSetOnActiveProcess"; - - case cudaErrorInvalidSurface: - return "cudaErrorInvalidSurface"; - - case cudaErrorNoDevice: - return "cudaErrorNoDevice"; - - case cudaErrorECCUncorrectable: - return "cudaErrorECCUncorrectable"; - - case cudaErrorSharedObjectSymbolNotFound: - return "cudaErrorSharedObjectSymbolNotFound"; - - case cudaErrorSharedObjectInitFailed: - return "cudaErrorSharedObjectInitFailed"; - - case cudaErrorUnsupportedLimit: - return "cudaErrorUnsupportedLimit"; - - case cudaErrorDuplicateVariableName: - return "cudaErrorDuplicateVariableName"; - - case cudaErrorDuplicateTextureName: - return "cudaErrorDuplicateTextureName"; - - case cudaErrorDuplicateSurfaceName: - return "cudaErrorDuplicateSurfaceName"; - - case cudaErrorDevicesUnavailable: - return "cudaErrorDevicesUnavailable"; - - case cudaErrorInvalidKernelImage: - return "cudaErrorInvalidKernelImage"; - - case cudaErrorNoKernelImageForDevice: - return "cudaErrorNoKernelImageForDevice"; - - case cudaErrorIncompatibleDriverContext: - return "cudaErrorIncompatibleDriverContext"; - - case cudaErrorPeerAccessAlreadyEnabled: - return "cudaErrorPeerAccessAlreadyEnabled"; - - case cudaErrorPeerAccessNotEnabled: - return "cudaErrorPeerAccessNotEnabled"; - - case cudaErrorDeviceAlreadyInUse: - return "cudaErrorDeviceAlreadyInUse"; - - case cudaErrorProfilerDisabled: - return "cudaErrorProfilerDisabled"; - - case cudaErrorProfilerNotInitialized: - return "cudaErrorProfilerNotInitialized"; - - case cudaErrorProfilerAlreadyStarted: - return "cudaErrorProfilerAlreadyStarted"; - - case cudaErrorProfilerAlreadyStopped: - return "cudaErrorProfilerAlreadyStopped"; - - /* Since CUDA 4.0*/ - case cudaErrorAssert: - return "cudaErrorAssert"; - - case cudaErrorTooManyPeers: - return "cudaErrorTooManyPeers"; - - case cudaErrorHostMemoryAlreadyRegistered: - return "cudaErrorHostMemoryAlreadyRegistered"; - - case cudaErrorHostMemoryNotRegistered: - return "cudaErrorHostMemoryNotRegistered"; - - /* Since CUDA 5.0 */ - case cudaErrorOperatingSystem: - return "cudaErrorOperatingSystem"; - - case cudaErrorPeerAccessUnsupported: - return "cudaErrorPeerAccessUnsupported"; - - case cudaErrorLaunchMaxDepthExceeded: - return "cudaErrorLaunchMaxDepthExceeded"; - - case cudaErrorLaunchFileScopedTex: - return "cudaErrorLaunchFileScopedTex"; - - case cudaErrorLaunchFileScopedSurf: - return "cudaErrorLaunchFileScopedSurf"; - - case cudaErrorSyncDepthExceeded: - return "cudaErrorSyncDepthExceeded"; - - case cudaErrorLaunchPendingCountExceeded: - return "cudaErrorLaunchPendingCountExceeded"; - - case cudaErrorNotPermitted: - return "cudaErrorNotPermitted"; - - case cudaErrorNotSupported: - return "cudaErrorNotSupported"; - - /* Since CUDA 6.0 */ - case cudaErrorHardwareStackError: - return "cudaErrorHardwareStackError"; - - case cudaErrorIllegalInstruction: - return "cudaErrorIllegalInstruction"; - - case cudaErrorMisalignedAddress: - return "cudaErrorMisalignedAddress"; - - case cudaErrorInvalidAddressSpace: - return "cudaErrorInvalidAddressSpace"; - - case cudaErrorInvalidPc: - return "cudaErrorInvalidPc"; - - case cudaErrorIllegalAddress: - return "cudaErrorIllegalAddress"; - - /* Since CUDA 6.5*/ - case cudaErrorInvalidPtx: - return "cudaErrorInvalidPtx"; - - case cudaErrorInvalidGraphicsContext: - return "cudaErrorInvalidGraphicsContext"; - - case cudaErrorStartupFailure: - return "cudaErrorStartupFailure"; - - case cudaErrorApiFailureBase: - return "cudaErrorApiFailureBase"; - - /* Since CUDA 8.0*/ - case cudaErrorNvlinkUncorrectable : - return "cudaErrorNvlinkUncorrectable"; - } - - return "<unknown>"; -} -#endif - -#ifdef __cuda_cuda_h__ -// CUDA Driver API errors -const char *_cudaGetErrorEnum(CUresult error) -{ - switch (error) - { - case CUDA_SUCCESS: - return "CUDA_SUCCESS"; - - case CUDA_ERROR_INVALID_VALUE: - return "CUDA_ERROR_INVALID_VALUE"; - - case CUDA_ERROR_OUT_OF_MEMORY: - return "CUDA_ERROR_OUT_OF_MEMORY"; - - case CUDA_ERROR_NOT_INITIALIZED: - return "CUDA_ERROR_NOT_INITIALIZED"; - - case CUDA_ERROR_DEINITIALIZED: - return "CUDA_ERROR_DEINITIALIZED"; - - case CUDA_ERROR_PROFILER_DISABLED: - return "CUDA_ERROR_PROFILER_DISABLED"; - - case CUDA_ERROR_PROFILER_NOT_INITIALIZED: - return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; - - case CUDA_ERROR_PROFILER_ALREADY_STARTED: - return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; - - case CUDA_ERROR_PROFILER_ALREADY_STOPPED: - return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; - - case CUDA_ERROR_NO_DEVICE: - return "CUDA_ERROR_NO_DEVICE"; - - case CUDA_ERROR_INVALID_DEVICE: - return "CUDA_ERROR_INVALID_DEVICE"; - - case CUDA_ERROR_INVALID_IMAGE: - return "CUDA_ERROR_INVALID_IMAGE"; - - case CUDA_ERROR_INVALID_CONTEXT: - return "CUDA_ERROR_INVALID_CONTEXT"; - - case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: - return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; - - case CUDA_ERROR_MAP_FAILED: - return "CUDA_ERROR_MAP_FAILED"; - - case CUDA_ERROR_UNMAP_FAILED: - return "CUDA_ERROR_UNMAP_FAILED"; - - case CUDA_ERROR_ARRAY_IS_MAPPED: - return "CUDA_ERROR_ARRAY_IS_MAPPED"; - - case CUDA_ERROR_ALREADY_MAPPED: - return "CUDA_ERROR_ALREADY_MAPPED"; - - case CUDA_ERROR_NO_BINARY_FOR_GPU: - return "CUDA_ERROR_NO_BINARY_FOR_GPU"; - - case CUDA_ERROR_ALREADY_ACQUIRED: - return "CUDA_ERROR_ALREADY_ACQUIRED"; - - case CUDA_ERROR_NOT_MAPPED: - return "CUDA_ERROR_NOT_MAPPED"; - - case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: - return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; - - case CUDA_ERROR_NOT_MAPPED_AS_POINTER: - return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; - - case CUDA_ERROR_ECC_UNCORRECTABLE: - return "CUDA_ERROR_ECC_UNCORRECTABLE"; - - case CUDA_ERROR_UNSUPPORTED_LIMIT: - return "CUDA_ERROR_UNSUPPORTED_LIMIT"; - - case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: - return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; - - case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: - return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; - - case CUDA_ERROR_INVALID_PTX: - return "CUDA_ERROR_INVALID_PTX"; - - case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: - return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; - - case CUDA_ERROR_NVLINK_UNCORRECTABLE: - return "CUDA_ERROR_NVLINK_UNCORRECTABLE"; - - case CUDA_ERROR_INVALID_SOURCE: - return "CUDA_ERROR_INVALID_SOURCE"; - - case CUDA_ERROR_FILE_NOT_FOUND: - return "CUDA_ERROR_FILE_NOT_FOUND"; - - case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: - return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; - - case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: - return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; - - case CUDA_ERROR_OPERATING_SYSTEM: - return "CUDA_ERROR_OPERATING_SYSTEM"; - - case CUDA_ERROR_INVALID_HANDLE: - return "CUDA_ERROR_INVALID_HANDLE"; - - case CUDA_ERROR_NOT_FOUND: - return "CUDA_ERROR_NOT_FOUND"; - - case CUDA_ERROR_NOT_READY: - return "CUDA_ERROR_NOT_READY"; - - case CUDA_ERROR_ILLEGAL_ADDRESS: - return "CUDA_ERROR_ILLEGAL_ADDRESS"; - - case CUDA_ERROR_LAUNCH_FAILED: - return "CUDA_ERROR_LAUNCH_FAILED"; - - case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: - return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; - - case CUDA_ERROR_LAUNCH_TIMEOUT: - return "CUDA_ERROR_LAUNCH_TIMEOUT"; - - case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: - return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"; - - case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: - return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; - - case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: - return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; - - case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: - return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; - - case CUDA_ERROR_CONTEXT_IS_DESTROYED: - return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; - - case CUDA_ERROR_ASSERT: - return "CUDA_ERROR_ASSERT"; - - case CUDA_ERROR_TOO_MANY_PEERS: - return "CUDA_ERROR_TOO_MANY_PEERS"; - - case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: - return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; - - case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: - return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; - - case CUDA_ERROR_HARDWARE_STACK_ERROR: - return "CUDA_ERROR_HARDWARE_STACK_ERROR"; - - case CUDA_ERROR_ILLEGAL_INSTRUCTION: - return "CUDA_ERROR_ILLEGAL_INSTRUCTION"; - - case CUDA_ERROR_MISALIGNED_ADDRESS: - return "CUDA_ERROR_MISALIGNED_ADDRESS"; - - case CUDA_ERROR_INVALID_ADDRESS_SPACE: - return "CUDA_ERROR_INVALID_ADDRESS_SPACE"; - - case CUDA_ERROR_INVALID_PC: - return "CUDA_ERROR_INVALID_PC"; - - case CUDA_ERROR_NOT_PERMITTED: - return "CUDA_ERROR_NOT_PERMITTED"; - - case CUDA_ERROR_NOT_SUPPORTED: - return "CUDA_ERROR_NOT_SUPPORTED"; - - case CUDA_ERROR_UNKNOWN: - return "CUDA_ERROR_UNKNOWN"; - } - - return "<unknown>"; -} -#endif - -#ifdef CUBLAS_API_H_ -// cuBLAS API errors -static const char *_cudaGetErrorEnum(cublasStatus_t error) -{ - switch (error) - { - case CUBLAS_STATUS_SUCCESS: - return "CUBLAS_STATUS_SUCCESS"; - - case CUBLAS_STATUS_NOT_INITIALIZED: - return "CUBLAS_STATUS_NOT_INITIALIZED"; - - case CUBLAS_STATUS_ALLOC_FAILED: - return "CUBLAS_STATUS_ALLOC_FAILED"; - - case CUBLAS_STATUS_INVALID_VALUE: - return "CUBLAS_STATUS_INVALID_VALUE"; - - case CUBLAS_STATUS_ARCH_MISMATCH: - return "CUBLAS_STATUS_ARCH_MISMATCH"; - - case CUBLAS_STATUS_MAPPING_ERROR: - return "CUBLAS_STATUS_MAPPING_ERROR"; - - case CUBLAS_STATUS_EXECUTION_FAILED: - return "CUBLAS_STATUS_EXECUTION_FAILED"; - - case CUBLAS_STATUS_INTERNAL_ERROR: - return "CUBLAS_STATUS_INTERNAL_ERROR"; - - case CUBLAS_STATUS_NOT_SUPPORTED: - return "CUBLAS_STATUS_NOT_SUPPORTED"; - - case CUBLAS_STATUS_LICENSE_ERROR: - return "CUBLAS_STATUS_LICENSE_ERROR"; - } - - return "<unknown>"; -} -#endif - -#ifdef _CUFFT_H_ -// cuFFT API errors -static const char *_cudaGetErrorEnum(cufftResult error) -{ - switch (error) - { - case CUFFT_SUCCESS: - return "CUFFT_SUCCESS"; - - case CUFFT_INVALID_PLAN: - return "CUFFT_INVALID_PLAN"; - - case CUFFT_ALLOC_FAILED: - return "CUFFT_ALLOC_FAILED"; - - case CUFFT_INVALID_TYPE: - return "CUFFT_INVALID_TYPE"; - - case CUFFT_INVALID_VALUE: - return "CUFFT_INVALID_VALUE"; - - case CUFFT_INTERNAL_ERROR: - return "CUFFT_INTERNAL_ERROR"; - - case CUFFT_EXEC_FAILED: - return "CUFFT_EXEC_FAILED"; - - case CUFFT_SETUP_FAILED: - return "CUFFT_SETUP_FAILED"; - - case CUFFT_INVALID_SIZE: - return "CUFFT_INVALID_SIZE"; - - case CUFFT_UNALIGNED_DATA: - return "CUFFT_UNALIGNED_DATA"; - - case CUFFT_INCOMPLETE_PARAMETER_LIST: - return "CUFFT_INCOMPLETE_PARAMETER_LIST"; - - case CUFFT_INVALID_DEVICE: - return "CUFFT_INVALID_DEVICE"; - - case CUFFT_PARSE_ERROR: - return "CUFFT_PARSE_ERROR"; - - case CUFFT_NO_WORKSPACE: - return "CUFFT_NO_WORKSPACE"; - - case CUFFT_NOT_IMPLEMENTED: - return "CUFFT_NOT_IMPLEMENTED"; - - case CUFFT_LICENSE_ERROR: - return "CUFFT_LICENSE_ERROR"; - - case CUFFT_NOT_SUPPORTED: - return "CUFFT_NOT_SUPPORTED"; - } - - return "<unknown>"; -} -#endif - - -#ifdef CUSPARSEAPI -// cuSPARSE API errors -static const char *_cudaGetErrorEnum(cusparseStatus_t error) -{ - switch (error) - { - case CUSPARSE_STATUS_SUCCESS: - return "CUSPARSE_STATUS_SUCCESS"; - - case CUSPARSE_STATUS_NOT_INITIALIZED: - return "CUSPARSE_STATUS_NOT_INITIALIZED"; - - case CUSPARSE_STATUS_ALLOC_FAILED: - return "CUSPARSE_STATUS_ALLOC_FAILED"; - - case CUSPARSE_STATUS_INVALID_VALUE: - return "CUSPARSE_STATUS_INVALID_VALUE"; - - case CUSPARSE_STATUS_ARCH_MISMATCH: - return "CUSPARSE_STATUS_ARCH_MISMATCH"; - - case CUSPARSE_STATUS_MAPPING_ERROR: - return "CUSPARSE_STATUS_MAPPING_ERROR"; - - case CUSPARSE_STATUS_EXECUTION_FAILED: - return "CUSPARSE_STATUS_EXECUTION_FAILED"; - - case CUSPARSE_STATUS_INTERNAL_ERROR: - return "CUSPARSE_STATUS_INTERNAL_ERROR"; - - case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: - return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; - } - - return "<unknown>"; -} -#endif - -#ifdef CUSOLVER_COMMON_H_ -//cuSOLVER API errors -static const char *_cudaGetErrorEnum(cusolverStatus_t error) -{ - switch(error) - { - case CUSOLVER_STATUS_SUCCESS: - return "CUSOLVER_STATUS_SUCCESS"; - case CUSOLVER_STATUS_NOT_INITIALIZED: - return "CUSOLVER_STATUS_NOT_INITIALIZED"; - case CUSOLVER_STATUS_ALLOC_FAILED: - return "CUSOLVER_STATUS_ALLOC_FAILED"; - case CUSOLVER_STATUS_INVALID_VALUE: - return "CUSOLVER_STATUS_INVALID_VALUE"; - case CUSOLVER_STATUS_ARCH_MISMATCH: - return "CUSOLVER_STATUS_ARCH_MISMATCH"; - case CUSOLVER_STATUS_MAPPING_ERROR: - return "CUSOLVER_STATUS_MAPPING_ERROR"; - case CUSOLVER_STATUS_EXECUTION_FAILED: - return "CUSOLVER_STATUS_EXECUTION_FAILED"; - case CUSOLVER_STATUS_INTERNAL_ERROR: - return "CUSOLVER_STATUS_INTERNAL_ERROR"; - case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: - return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; - case CUSOLVER_STATUS_NOT_SUPPORTED : - return "CUSOLVER_STATUS_NOT_SUPPORTED "; - case CUSOLVER_STATUS_ZERO_PIVOT: - return "CUSOLVER_STATUS_ZERO_PIVOT"; - case CUSOLVER_STATUS_INVALID_LICENSE: - return "CUSOLVER_STATUS_INVALID_LICENSE"; - } - - return "<unknown>"; - -} -#endif - -#ifdef CURAND_H_ -// cuRAND API errors -static const char *_cudaGetErrorEnum(curandStatus_t error) -{ - switch (error) - { - case CURAND_STATUS_SUCCESS: - return "CURAND_STATUS_SUCCESS"; - - case CURAND_STATUS_VERSION_MISMATCH: - return "CURAND_STATUS_VERSION_MISMATCH"; - - case CURAND_STATUS_NOT_INITIALIZED: - return "CURAND_STATUS_NOT_INITIALIZED"; - - case CURAND_STATUS_ALLOCATION_FAILED: - return "CURAND_STATUS_ALLOCATION_FAILED"; - - case CURAND_STATUS_TYPE_ERROR: - return "CURAND_STATUS_TYPE_ERROR"; - - case CURAND_STATUS_OUT_OF_RANGE: - return "CURAND_STATUS_OUT_OF_RANGE"; - - case CURAND_STATUS_LENGTH_NOT_MULTIPLE: - return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; - - case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: - return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; - - case CURAND_STATUS_LAUNCH_FAILURE: - return "CURAND_STATUS_LAUNCH_FAILURE"; - - case CURAND_STATUS_PREEXISTING_FAILURE: - return "CURAND_STATUS_PREEXISTING_FAILURE"; - - case CURAND_STATUS_INITIALIZATION_FAILED: - return "CURAND_STATUS_INITIALIZATION_FAILED"; - - case CURAND_STATUS_ARCH_MISMATCH: - return "CURAND_STATUS_ARCH_MISMATCH"; - - case CURAND_STATUS_INTERNAL_ERROR: - return "CURAND_STATUS_INTERNAL_ERROR"; - } - - return "<unknown>"; -} -#endif - -#ifdef NV_NPPIDEFS_H -// NPP API errors -static const char *_cudaGetErrorEnum(NppStatus error) -{ - switch (error) - { - case NPP_NOT_SUPPORTED_MODE_ERROR: - return "NPP_NOT_SUPPORTED_MODE_ERROR"; - - case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR: - return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"; - - case NPP_RESIZE_NO_OPERATION_ERROR: - return "NPP_RESIZE_NO_OPERATION_ERROR"; - - case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY: - return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"; - -#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 - - case NPP_BAD_ARG_ERROR: - return "NPP_BAD_ARGUMENT_ERROR"; - - case NPP_COEFF_ERROR: - return "NPP_COEFFICIENT_ERROR"; - - case NPP_RECT_ERROR: - return "NPP_RECTANGLE_ERROR"; - - case NPP_QUAD_ERROR: - return "NPP_QUADRANGLE_ERROR"; - - case NPP_MEM_ALLOC_ERR: - return "NPP_MEMORY_ALLOCATION_ERROR"; - - case NPP_HISTO_NUMBER_OF_LEVELS_ERROR: - return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; - - case NPP_INVALID_INPUT: - return "NPP_INVALID_INPUT"; - - case NPP_POINTER_ERROR: - return "NPP_POINTER_ERROR"; - - case NPP_WARNING: - return "NPP_WARNING"; - - case NPP_ODD_ROI_WARNING: - return "NPP_ODD_ROI_WARNING"; -#else - - // These are for CUDA 5.5 or higher - case NPP_BAD_ARGUMENT_ERROR: - return "NPP_BAD_ARGUMENT_ERROR"; - - case NPP_COEFFICIENT_ERROR: - return "NPP_COEFFICIENT_ERROR"; - - case NPP_RECTANGLE_ERROR: - return "NPP_RECTANGLE_ERROR"; - - case NPP_QUADRANGLE_ERROR: - return "NPP_QUADRANGLE_ERROR"; - - case NPP_MEMORY_ALLOCATION_ERR: - return "NPP_MEMORY_ALLOCATION_ERROR"; - - case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR: - return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; - - case NPP_INVALID_HOST_POINTER_ERROR: - return "NPP_INVALID_HOST_POINTER_ERROR"; - - case NPP_INVALID_DEVICE_POINTER_ERROR: - return "NPP_INVALID_DEVICE_POINTER_ERROR"; -#endif - - case NPP_LUT_NUMBER_OF_LEVELS_ERROR: - return "NPP_LUT_NUMBER_OF_LEVELS_ERROR"; - - case NPP_TEXTURE_BIND_ERROR: - return "NPP_TEXTURE_BIND_ERROR"; - - case NPP_WRONG_INTERSECTION_ROI_ERROR: - return "NPP_WRONG_INTERSECTION_ROI_ERROR"; - - case NPP_NOT_EVEN_STEP_ERROR: - return "NPP_NOT_EVEN_STEP_ERROR"; - - case NPP_INTERPOLATION_ERROR: - return "NPP_INTERPOLATION_ERROR"; - - case NPP_RESIZE_FACTOR_ERROR: - return "NPP_RESIZE_FACTOR_ERROR"; - - case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR: - return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"; - - -#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 - - case NPP_MEMFREE_ERR: - return "NPP_MEMFREE_ERR"; - - case NPP_MEMSET_ERR: - return "NPP_MEMSET_ERR"; - - case NPP_MEMCPY_ERR: - return "NPP_MEMCPY_ERROR"; - - case NPP_MIRROR_FLIP_ERR: - return "NPP_MIRROR_FLIP_ERR"; -#else - - case NPP_MEMFREE_ERROR: - return "NPP_MEMFREE_ERROR"; - - case NPP_MEMSET_ERROR: - return "NPP_MEMSET_ERROR"; - - case NPP_MEMCPY_ERROR: - return "NPP_MEMCPY_ERROR"; - - case NPP_MIRROR_FLIP_ERROR: - return "NPP_MIRROR_FLIP_ERROR"; -#endif - - case NPP_ALIGNMENT_ERROR: - return "NPP_ALIGNMENT_ERROR"; - - case NPP_STEP_ERROR: - return "NPP_STEP_ERROR"; - - case NPP_SIZE_ERROR: - return "NPP_SIZE_ERROR"; - - case NPP_NULL_POINTER_ERROR: - return "NPP_NULL_POINTER_ERROR"; - - case NPP_CUDA_KERNEL_EXECUTION_ERROR: - return "NPP_CUDA_KERNEL_EXECUTION_ERROR"; - - case NPP_NOT_IMPLEMENTED_ERROR: - return "NPP_NOT_IMPLEMENTED_ERROR"; - - case NPP_ERROR: - return "NPP_ERROR"; - - case NPP_SUCCESS: - return "NPP_SUCCESS"; - - case NPP_WRONG_INTERSECTION_QUAD_WARNING: - return "NPP_WRONG_INTERSECTION_QUAD_WARNING"; - - case NPP_MISALIGNED_DST_ROI_WARNING: - return "NPP_MISALIGNED_DST_ROI_WARNING"; - - case NPP_AFFINE_QUAD_INCORRECT_WARNING: - return "NPP_AFFINE_QUAD_INCORRECT_WARNING"; - - case NPP_DOUBLE_SIZE_WARNING: - return "NPP_DOUBLE_SIZE_WARNING"; - - case NPP_WRONG_INTERSECTION_ROI_WARNING: - return "NPP_WRONG_INTERSECTION_ROI_WARNING"; - -#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000 - /* These are 6.0 or higher */ - case NPP_LUT_PALETTE_BITSIZE_ERROR: - return "NPP_LUT_PALETTE_BITSIZE_ERROR"; - - case NPP_ZC_MODE_NOT_SUPPORTED_ERROR: - return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR"; - - case NPP_QUALITY_INDEX_ERROR: - return "NPP_QUALITY_INDEX_ERROR"; - - case NPP_CHANNEL_ORDER_ERROR: - return "NPP_CHANNEL_ORDER_ERROR"; - - case NPP_ZERO_MASK_VALUE_ERROR: - return "NPP_ZERO_MASK_VALUE_ERROR"; - - case NPP_NUMBER_OF_CHANNELS_ERROR: - return "NPP_NUMBER_OF_CHANNELS_ERROR"; - - case NPP_COI_ERROR: - return "NPP_COI_ERROR"; - - case NPP_DIVISOR_ERROR: - return "NPP_DIVISOR_ERROR"; - - case NPP_CHANNEL_ERROR: - return "NPP_CHANNEL_ERROR"; - - case NPP_STRIDE_ERROR: - return "NPP_STRIDE_ERROR"; - - case NPP_ANCHOR_ERROR: - return "NPP_ANCHOR_ERROR"; - - case NPP_MASK_SIZE_ERROR: - return "NPP_MASK_SIZE_ERROR"; - - case NPP_MOMENT_00_ZERO_ERROR: - return "NPP_MOMENT_00_ZERO_ERROR"; - - case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR: - return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR"; - - case NPP_THRESHOLD_ERROR: - return "NPP_THRESHOLD_ERROR"; - - case NPP_CONTEXT_MATCH_ERROR: - return "NPP_CONTEXT_MATCH_ERROR"; - - case NPP_FFT_FLAG_ERROR: - return "NPP_FFT_FLAG_ERROR"; - - case NPP_FFT_ORDER_ERROR: - return "NPP_FFT_ORDER_ERROR"; - - case NPP_SCALE_RANGE_ERROR: - return "NPP_SCALE_RANGE_ERROR"; - - case NPP_DATA_TYPE_ERROR: - return "NPP_DATA_TYPE_ERROR"; - - case NPP_OUT_OFF_RANGE_ERROR: - return "NPP_OUT_OFF_RANGE_ERROR"; - - case NPP_DIVIDE_BY_ZERO_ERROR: - return "NPP_DIVIDE_BY_ZERO_ERROR"; - - case NPP_RANGE_ERROR: - return "NPP_RANGE_ERROR"; - - case NPP_NO_MEMORY_ERROR: - return "NPP_NO_MEMORY_ERROR"; - - case NPP_ERROR_RESERVED: - return "NPP_ERROR_RESERVED"; - - case NPP_NO_OPERATION_WARNING: - return "NPP_NO_OPERATION_WARNING"; - - case NPP_DIVIDE_BY_ZERO_WARNING: - return "NPP_DIVIDE_BY_ZERO_WARNING"; -#endif - -#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000 - /* These are 7.0 or higher */ - case NPP_OVERFLOW_ERROR: - return "NPP_OVERFLOW_ERROR"; - - case NPP_CORRUPTED_DATA_ERROR: - return "NPP_CORRUPTED_DATA_ERROR"; -#endif - } - - return "<unknown>"; -} -#endif - -#ifdef __DRIVER_TYPES_H__ -#ifndef DEVICE_RESET -#define DEVICE_RESET cudaDeviceReset(); -#endif -#else -#ifndef DEVICE_RESET -#define DEVICE_RESET -#endif -#endif - -template< typename T > -void check(T result, char const *const func, const char *const file, int const line) -{ - if (result) - { - fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", - file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func); - DEVICE_RESET - // Make sure we call CUDA Device Reset before exiting - exit(EXIT_FAILURE); - } -} - -#ifdef __DRIVER_TYPES_H__ -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ ) - -// This will output the proper error string when calling cudaGetLastError -#define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__) - -inline void __getLastCudaError(const char *errorMessage, const char *file, const int line) -{ - cudaError_t err = cudaGetLastError(); - - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n", - file, line, errorMessage, (int)err, cudaGetErrorString(err)); - DEVICE_RESET - exit(EXIT_FAILURE); - } -} -#endif - -#ifndef MAX -#define MAX(a,b) (a > b ? a : b) -#endif - -// Float To Int conversion -inline int ftoi(float value) -{ - return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5)); -} - -// Beginning of GPU Architecture definitions -inline int _ConvertSMVer2Cores(int major, int minor) -{ - // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM - typedef struct - { - int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version - int Cores; - } sSMtoCores; - - sSMtoCores nGpuArchCoresPerSM[] = - { - { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class - { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class - { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class - { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class - { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class - { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class - { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class - { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class - { 0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class - { 0x60, 64 }, // Pascal Generation (SM 6.0) GP100 class - { 0x61, 128}, // Pascal Generation (SM 6.1) GP10x class - { 0x62, 128}, // Pascal Generation (SM 6.2) GP10x class - { -1, -1 } - }; - - int index = 0; - - while (nGpuArchCoresPerSM[index].SM != -1) - { - if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) - { - return nGpuArchCoresPerSM[index].Cores; - } - - index++; - } - - // If we don't find the values, we default use the previous one to run properly - printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores); - return nGpuArchCoresPerSM[index-1].Cores; -} -// end of GPU Architecture definitions - -#ifdef __CUDA_RUNTIME_H__ -// General GPU Device CUDA Initialization -inline int gpuDeviceInit(int devID) -{ - int device_count; - checkCudaErrors(cudaGetDeviceCount(&device_count)); - - if (device_count == 0) - { - fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n"); - exit(EXIT_FAILURE); - } - - if (devID < 0) - { - devID = 0; - } - - if (devID > device_count-1) - { - fprintf(stderr, "\n"); - fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count); - fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID); - fprintf(stderr, "\n"); - return -devID; - } - - cudaDeviceProp deviceProp; - checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); - - if (deviceProp.computeMode == cudaComputeModeProhibited) - { - fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n"); - return -1; - } - - if (deviceProp.major < 1) - { - fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n"); - exit(EXIT_FAILURE); - } - - checkCudaErrors(cudaSetDevice(devID)); - printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name); - - return devID; -} - -// This function returns the best GPU (with maximum GFLOPS) -inline int gpuGetMaxGflopsDeviceId() -{ - int current_device = 0, sm_per_multiproc = 0; - int max_perf_device = 0; - int device_count = 0, best_SM_arch = 0; - int devices_prohibited = 0; - - unsigned long long max_compute_perf = 0; - cudaDeviceProp deviceProp; - cudaGetDeviceCount(&device_count); - - checkCudaErrors(cudaGetDeviceCount(&device_count)); - - if (device_count == 0) - { - fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n"); - exit(EXIT_FAILURE); - } - - // Find the best major SM Architecture GPU device - while (current_device < device_count) - { - cudaGetDeviceProperties(&deviceProp, current_device); - - // If this GPU is not running on Compute Mode prohibited, then we can add it to the list - if (deviceProp.computeMode != cudaComputeModeProhibited) - { - if (deviceProp.major > 0 && deviceProp.major < 9999) - { - best_SM_arch = MAX(best_SM_arch, deviceProp.major); - } - } - else - { - devices_prohibited++; - } - - current_device++; - } - - if (devices_prohibited == device_count) - { - fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n"); - exit(EXIT_FAILURE); - } - - // Find the best CUDA capable GPU device - current_device = 0; - - while (current_device < device_count) - { - cudaGetDeviceProperties(&deviceProp, current_device); - - // If this GPU is not running on Compute Mode prohibited, then we can add it to the list - if (deviceProp.computeMode != cudaComputeModeProhibited) - { - if (deviceProp.major == 9999 && deviceProp.minor == 9999) - { - sm_per_multiproc = 1; - } - else - { - sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor); - } - - unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; - - if (compute_perf > max_compute_perf) - { - // If we find GPU with SM major > 2, search only these - if (best_SM_arch > 2) - { - // If our device==dest_SM_arch, choose this, or else pass - if (deviceProp.major == best_SM_arch) - { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } - else - { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } - } - - ++current_device; - } - - return max_perf_device; -} - - -// Initialization code to find the best CUDA Device -inline int findCudaDevice(int argc, const char **argv) -{ - cudaDeviceProp deviceProp; - int devID = 0; - - // If the command-line has a device number specified, use it - if (checkCmdLineFlag(argc, argv, "device")) - { - devID = getCmdLineArgumentInt(argc, argv, "device="); - - if (devID < 0) - { - printf("Invalid command line parameter\n "); - exit(EXIT_FAILURE); - } - else - { - devID = gpuDeviceInit(devID); - - if (devID < 0) - { - printf("exiting...\n"); - exit(EXIT_FAILURE); - } - } - } - else - { - // Otherwise pick the device with highest Gflops/s - devID = gpuGetMaxGflopsDeviceId(); - checkCudaErrors(cudaSetDevice(devID)); - checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); - printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor); - } - - return devID; -} - -// General check for CUDA GPU SM Capabilities -inline bool checkCudaCapabilities(int major_version, int minor_version) -{ - cudaDeviceProp deviceProp; - deviceProp.major = 0; - deviceProp.minor = 0; - int dev; - - checkCudaErrors(cudaGetDevice(&dev)); - checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev)); - - if ((deviceProp.major > major_version) || - (deviceProp.major == major_version && deviceProp.minor >= minor_version)) - { - printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor); - return true; - } - else - { - printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version); - return false; - } -} -#endif - -// end of CUDA Helper Functions - - -#endif
--- a/src/test/helper_string.h Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,527 +0,0 @@ -/** - * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -// These are helper functions for the SDK samples (string parsing, timers, etc) -#ifndef STRING_HELPER_H -#define STRING_HELPER_H - -#include <stdio.h> -#include <stdlib.h> -#include <fstream> -#include <string> - -#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) -#ifndef _CRT_SECURE_NO_DEPRECATE -#define _CRT_SECURE_NO_DEPRECATE -#endif -#ifndef STRCASECMP -#define STRCASECMP _stricmp -#endif -#ifndef STRNCASECMP -#define STRNCASECMP _strnicmp -#endif -#ifndef STRCPY -#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath) -#endif - -#ifndef FOPEN -#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode) -#endif -#ifndef FOPEN_FAIL -#define FOPEN_FAIL(result) (result != 0) -#endif -#ifndef SSCANF -#define SSCANF sscanf_s -#endif -#ifndef SPRINTF -#define SPRINTF sprintf_s -#endif -#else // Linux Includes -#include <string.h> -#include <strings.h> - -#ifndef STRCASECMP -#define STRCASECMP strcasecmp -#endif -#ifndef STRNCASECMP -#define STRNCASECMP strncasecmp -#endif -#ifndef STRCPY -#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath) -#endif - -#ifndef FOPEN -#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode)) -#endif -#ifndef FOPEN_FAIL -#define FOPEN_FAIL(result) (result == NULL) -#endif -#ifndef SSCANF -#define SSCANF sscanf -#endif -#ifndef SPRINTF -#define SPRINTF sprintf -#endif -#endif - -#ifndef EXIT_WAIVED -#define EXIT_WAIVED 2 -#endif - -// CUDA Utility Helper Functions -inline int stringRemoveDelimiter(char delimiter, const char *string) -{ - int string_start = 0; - - while (string[string_start] == delimiter) - { - string_start++; - } - - if (string_start >= (int)strlen(string)-1) - { - return 0; - } - - return string_start; -} - -inline int getFileExtension(char *filename, char **extension) -{ - int string_length = (int)strlen(filename); - - while (filename[string_length--] != '.') - { - if (string_length == 0) - break; - } - - if (string_length > 0) string_length += 2; - - if (string_length == 0) - *extension = NULL; - else - *extension = &filename[string_length]; - - return string_length; -} - - -inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref) -{ - bool bFound = false; - - if (argc >= 1) - { - for (int i=1; i < argc; i++) - { - int string_start = stringRemoveDelimiter('-', argv[i]); - const char *string_argv = &argv[i][string_start]; - - const char *equal_pos = strchr(string_argv, '='); - int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv); - - int length = (int)strlen(string_ref); - - if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length)) - { - bFound = true; - continue; - } - } - } - - return bFound; -} - -// This function wraps the CUDA Driver API into a template function -template <class T> -inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value) -{ - bool bFound = false; - - if (argc >= 1) - { - for (int i=1; i < argc; i++) - { - int string_start = stringRemoveDelimiter('-', argv[i]); - const char *string_argv = &argv[i][string_start]; - int length = (int)strlen(string_ref); - - if (!STRNCASECMP(string_argv, string_ref, length)) - { - if (length+1 <= (int)strlen(string_argv)) - { - int auto_inc = (string_argv[length] == '=') ? 1 : 0; - *value = (T)atoi(&string_argv[length + auto_inc]); - } - - bFound = true; - i=argc; - } - } - } - - return bFound; -} - -inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref) -{ - bool bFound = false; - int value = -1; - - if (argc >= 1) - { - for (int i=1; i < argc; i++) - { - int string_start = stringRemoveDelimiter('-', argv[i]); - const char *string_argv = &argv[i][string_start]; - int length = (int)strlen(string_ref); - - if (!STRNCASECMP(string_argv, string_ref, length)) - { - if (length+1 <= (int)strlen(string_argv)) - { - int auto_inc = (string_argv[length] == '=') ? 1 : 0; - value = atoi(&string_argv[length + auto_inc]); - } - else - { - value = 0; - } - - bFound = true; - continue; - } - } - } - - if (bFound) - { - return value; - } - else - { - return 0; - } -} - -inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref) -{ - bool bFound = false; - float value = -1; - - if (argc >= 1) - { - for (int i=1; i < argc; i++) - { - int string_start = stringRemoveDelimiter('-', argv[i]); - const char *string_argv = &argv[i][string_start]; - int length = (int)strlen(string_ref); - - if (!STRNCASECMP(string_argv, string_ref, length)) - { - if (length+1 <= (int)strlen(string_argv)) - { - int auto_inc = (string_argv[length] == '=') ? 1 : 0; - value = (float)atof(&string_argv[length + auto_inc]); - } - else - { - value = 0.f; - } - - bFound = true; - continue; - } - } - } - - if (bFound) - { - return value; - } - else - { - return 0; - } -} - -inline bool getCmdLineArgumentString(const int argc, const char **argv, - const char *string_ref, char **string_retval) -{ - bool bFound = false; - - if (argc >= 1) - { - for (int i=1; i < argc; i++) - { - int string_start = stringRemoveDelimiter('-', argv[i]); - char *string_argv = (char *)&argv[i][string_start]; - int length = (int)strlen(string_ref); - - if (!STRNCASECMP(string_argv, string_ref, length)) - { - *string_retval = &string_argv[length+1]; - bFound = true; - continue; - } - } - } - - if (!bFound) - { - *string_retval = NULL; - } - - return bFound; -} - -////////////////////////////////////////////////////////////////////////////// -//! Find the path for a file assuming that -//! files are found in the searchPath. -//! -//! @return the path if succeeded, otherwise 0 -//! @param filename name of the file -//! @param executable_path optional absolute path of the executable -////////////////////////////////////////////////////////////////////////////// -inline char *sdkFindFilePath(const char *filename, const char *executable_path) -{ - // <executable_name> defines a variable that is replaced with the name of the executable - - // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files) - // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc - const char *searchPath[] = - { - "./", // same dir - "./<executable_name>_data_files/", - "./common/", // "/common/" subdir - "./common/data/", // "/common/data/" subdir - "./data/", // "/data/" subdir - "./src/", // "/src/" subdir - "./src/<executable_name>/data/", // "/src/<executable_name>/data/" subdir - "./inc/", // "/inc/" subdir - "./0_Simple/", // "/0_Simple/" subdir - "./1_Utilities/", // "/1_Utilities/" subdir - "./2_Graphics/", // "/2_Graphics/" subdir - "./3_Imaging/", // "/3_Imaging/" subdir - "./4_Finance/", // "/4_Finance/" subdir - "./5_Simulations/", // "/5_Simulations/" subdir - "./6_Advanced/", // "/6_Advanced/" subdir - "./7_CUDALibraries/", // "/7_CUDALibraries/" subdir - "./8_Android/", // "/8_Android/" subdir - "./samples/", // "/samples/" subdir - - "./0_Simple/<executable_name>/data/", // "/0_Simple/<executable_name>/data/" subdir - "./1_Utilities/<executable_name>/data/", // "/1_Utilities/<executable_name>/data/" subdir - "./2_Graphics/<executable_name>/data/", // "/2_Graphics/<executable_name>/data/" subdir - "./3_Imaging/<executable_name>/data/", // "/3_Imaging/<executable_name>/data/" subdir - "./4_Finance/<executable_name>/data/", // "/4_Finance/<executable_name>/data/" subdir - "./5_Simulations/<executable_name>/data/", // "/5_Simulations/<executable_name>/data/" subdir - "./6_Advanced/<executable_name>/data/", // "/6_Advanced/<executable_name>/data/" subdir - "./7_CUDALibraries/<executable_name>/", // "/7_CUDALibraries/<executable_name>/" subdir - "./7_CUDALibraries/<executable_name>/data/", // "/7_CUDALibraries/<executable_name>/data/" subdir - - "../", // up 1 in tree - "../common/", // up 1 in tree, "/common/" subdir - "../common/data/", // up 1 in tree, "/common/data/" subdir - "../data/", // up 1 in tree, "/data/" subdir - "../src/", // up 1 in tree, "/src/" subdir - "../inc/", // up 1 in tree, "/inc/" subdir - - "../0_Simple/<executable_name>/data/", // up 1 in tree, "/0_Simple/<executable_name>/" subdir - "../1_Utilities/<executable_name>/data/", // up 1 in tree, "/1_Utilities/<executable_name>/" subdir - "../2_Graphics/<executable_name>/data/", // up 1 in tree, "/2_Graphics/<executable_name>/" subdir - "../3_Imaging/<executable_name>/data/", // up 1 in tree, "/3_Imaging/<executable_name>/" subdir - "../4_Finance/<executable_name>/data/", // up 1 in tree, "/4_Finance/<executable_name>/" subdir - "../5_Simulations/<executable_name>/data/", // up 1 in tree, "/5_Simulations/<executable_name>/" subdir - "../6_Advanced/<executable_name>/data/", // up 1 in tree, "/6_Advanced/<executable_name>/" subdir - "../7_CUDALibraries/<executable_name>/data/",// up 1 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../8_Android/<executable_name>/data/", // up 1 in tree, "/8_Android/<executable_name>/" subdir - "../samples/<executable_name>/data/", // up 1 in tree, "/samples/<executable_name>/" subdir - "../../", // up 2 in tree - "../../common/", // up 2 in tree, "/common/" subdir - "../../common/data/", // up 2 in tree, "/common/data/" subdir - "../../data/", // up 2 in tree, "/data/" subdir - "../../src/", // up 2 in tree, "/src/" subdir - "../../inc/", // up 2 in tree, "/inc/" subdir - "../../sandbox/<executable_name>/data/", // up 2 in tree, "/sandbox/<executable_name>/" subdir - "../../0_Simple/<executable_name>/data/", // up 2 in tree, "/0_Simple/<executable_name>/" subdir - "../../1_Utilities/<executable_name>/data/", // up 2 in tree, "/1_Utilities/<executable_name>/" subdir - "../../2_Graphics/<executable_name>/data/", // up 2 in tree, "/2_Graphics/<executable_name>/" subdir - "../../3_Imaging/<executable_name>/data/", // up 2 in tree, "/3_Imaging/<executable_name>/" subdir - "../../4_Finance/<executable_name>/data/", // up 2 in tree, "/4_Finance/<executable_name>/" subdir - "../../5_Simulations/<executable_name>/data/", // up 2 in tree, "/5_Simulations/<executable_name>/" subdir - "../../6_Advanced/<executable_name>/data/", // up 2 in tree, "/6_Advanced/<executable_name>/" subdir - "../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../8_Android/<executable_name>/data/", // up 2 in tree, "/8_Android/<executable_name>/" subdir - "../../samples/<executable_name>/data/", // up 2 in tree, "/samples/<executable_name>/" subdir - "../../../", // up 3 in tree - "../../../src/<executable_name>/", // up 3 in tree, "/src/<executable_name>/" subdir - "../../../src/<executable_name>/data/", // up 3 in tree, "/src/<executable_name>/data/" subdir - "../../../src/<executable_name>/src/", // up 3 in tree, "/src/<executable_name>/src/" subdir - "../../../src/<executable_name>/inc/", // up 3 in tree, "/src/<executable_name>/inc/" subdir - "../../../sandbox/<executable_name>/", // up 3 in tree, "/sandbox/<executable_name>/" subdir - "../../../sandbox/<executable_name>/data/", // up 3 in tree, "/sandbox/<executable_name>/data/" subdir - "../../../sandbox/<executable_name>/src/", // up 3 in tree, "/sandbox/<executable_name>/src/" subdir - "../../../sandbox/<executable_name>/inc/", // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir - "../../../0_Simple/<executable_name>/data/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir - "../../../1_Utilities/<executable_name>/data/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir - "../../../2_Graphics/<executable_name>/data/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir - "../../../3_Imaging/<executable_name>/data/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir - "../../../4_Finance/<executable_name>/data/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir - "../../../5_Simulations/<executable_name>/data/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir - "../../../6_Advanced/<executable_name>/data/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir - "../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../../8_Android/<executable_name>/data/", // up 3 in tree, "/8_Android/<executable_name>/" subdir - "../../../0_Simple/<executable_name>/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir - "../../../1_Utilities/<executable_name>/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir - "../../../2_Graphics/<executable_name>/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir - "../../../3_Imaging/<executable_name>/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir - "../../../4_Finance/<executable_name>/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir - "../../../5_Simulations/<executable_name>/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir - "../../../6_Advanced/<executable_name>/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir - "../../../7_CUDALibraries/<executable_name>/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../../8_Android/<executable_name>/", // up 3 in tree, "/8_Android/<executable_name>/" subdir - "../../../samples/<executable_name>/data/", // up 3 in tree, "/samples/<executable_name>/" subdir - "../../../common/", // up 3 in tree, "../../../common/" subdir - "../../../common/data/", // up 3 in tree, "../../../common/data/" subdir - "../../../data/", // up 3 in tree, "../../../data/" subdir - "../../../../", // up 4 in tree - "../../../../src/<executable_name>/", // up 4 in tree, "/src/<executable_name>/" subdir - "../../../../src/<executable_name>/data/", // up 4 in tree, "/src/<executable_name>/data/" subdir - "../../../../src/<executable_name>/src/", // up 4 in tree, "/src/<executable_name>/src/" subdir - "../../../../src/<executable_name>/inc/", // up 4 in tree, "/src/<executable_name>/inc/" subdir - "../../../../sandbox/<executable_name>/", // up 4 in tree, "/sandbox/<executable_name>/" subdir - "../../../../sandbox/<executable_name>/data/", // up 4 in tree, "/sandbox/<executable_name>/data/" subdir - "../../../../sandbox/<executable_name>/src/", // up 4 in tree, "/sandbox/<executable_name>/src/" subdir - "../../../../sandbox/<executable_name>/inc/", // up 4 in tree, "/sandbox/<executable_name>/inc/" subdir - "../../../../0_Simple/<executable_name>/data/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir - "../../../../1_Utilities/<executable_name>/data/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir - "../../../../2_Graphics/<executable_name>/data/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir - "../../../../3_Imaging/<executable_name>/data/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir - "../../../../4_Finance/<executable_name>/data/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir - "../../../../5_Simulations/<executable_name>/data/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir - "../../../../6_Advanced/<executable_name>/data/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir - "../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../../../8_Android/<executable_name>/data/", // up 4 in tree, "/8_Android/<executable_name>/" subdir - "../../../../0_Simple/<executable_name>/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir - "../../../../1_Utilities/<executable_name>/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir - "../../../../2_Graphics/<executable_name>/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir - "../../../../3_Imaging/<executable_name>/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir - "../../../../4_Finance/<executable_name>/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir - "../../../../5_Simulations/<executable_name>/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir - "../../../../6_Advanced/<executable_name>/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir - "../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../../../8_Android/<executable_name>/", // up 4 in tree, "/8_Android/<executable_name>/" subdir - "../../../../samples/<executable_name>/data/", // up 4 in tree, "/samples/<executable_name>/" subdir - "../../../../common/", // up 4 in tree, "../../../common/" subdir - "../../../../common/data/", // up 4 in tree, "../../../common/data/" subdir - "../../../../data/", // up 4 in tree, "../../../data/" subdir - "../../../../../", // up 5 in tree - "../../../../../src/<executable_name>/", // up 5 in tree, "/src/<executable_name>/" subdir - "../../../../../src/<executable_name>/data/", // up 5 in tree, "/src/<executable_name>/data/" subdir - "../../../../../src/<executable_name>/src/", // up 5 in tree, "/src/<executable_name>/src/" subdir - "../../../../../src/<executable_name>/inc/", // up 5 in tree, "/src/<executable_name>/inc/" subdir - "../../../../../sandbox/<executable_name>/", // up 5 in tree, "/sandbox/<executable_name>/" subdir - "../../../../../sandbox/<executable_name>/data/", // up 5 in tree, "/sandbox/<executable_name>/data/" subdir - "../../../../../sandbox/<executable_name>/src/", // up 5 in tree, "/sandbox/<executable_name>/src/" subdir - "../../../../../sandbox/<executable_name>/inc/", // up 5 in tree, "/sandbox/<executable_name>/inc/" subdir - "../../../../../0_Simple/<executable_name>/data/", // up 5 in tree, "/0_Simple/<executable_name>/" subdir - "../../../../../1_Utilities/<executable_name>/data/", // up 5 in tree, "/1_Utilities/<executable_name>/" subdir - "../../../../../2_Graphics/<executable_name>/data/", // up 5 in tree, "/2_Graphics/<executable_name>/" subdir - "../../../../../3_Imaging/<executable_name>/data/", // up 5 in tree, "/3_Imaging/<executable_name>/" subdir - "../../../../../4_Finance/<executable_name>/data/", // up 5 in tree, "/4_Finance/<executable_name>/" subdir - "../../../../../5_Simulations/<executable_name>/data/",// up 5 in tree, "/5_Simulations/<executable_name>/" subdir - "../../../../../6_Advanced/<executable_name>/data/", // up 5 in tree, "/6_Advanced/<executable_name>/" subdir - "../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../../../../8_Android/<executable_name>/data/", // up 5 in tree, "/8_Android/<executable_name>/" subdir - "../../../../../samples/<executable_name>/data/", // up 5 in tree, "/samples/<executable_name>/" subdir - "../../../../../common/", // up 5 in tree, "../../../common/" subdir - "../../../../../common/data/", // up 5 in tree, "../../../common/data/" subdir - }; - - // Extract the executable name - std::string executable_name; - - if (executable_path != 0) - { - executable_name = std::string(executable_path); - -#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) - // Windows path delimiter - size_t delimiter_pos = executable_name.find_last_of('\\'); - executable_name.erase(0, delimiter_pos + 1); - - if (executable_name.rfind(".exe") != std::string::npos) - { - // we strip .exe, only if the .exe is found - executable_name.resize(executable_name.size() - 4); - } - -#else - // Linux & OSX path delimiter - size_t delimiter_pos = executable_name.find_last_of('/'); - executable_name.erase(0,delimiter_pos+1); -#endif - } - - // Loop over all search paths and return the first hit - for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i) - { - std::string path(searchPath[i]); - size_t executable_name_pos = path.find("<executable_name>"); - - // If there is executable_name variable in the searchPath - // replace it with the value - if (executable_name_pos != std::string::npos) - { - if (executable_path != 0) - { - path.replace(executable_name_pos, strlen("<executable_name>"), executable_name); - } - else - { - // Skip this path entry if no executable argument is given - continue; - } - } - -#ifdef _DEBUG - printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str()); -#endif - - // Test if the file exists - path.append(filename); - FILE *fp; - FOPEN(fp, path.c_str(), "rb"); - - if (fp != NULL) - { - fclose(fp); - // File found - // returning an allocated array here for backwards compatibility reasons - char *file_path = (char *) malloc(path.length() + 1); - STRCPY(file_path, path.length() + 1, path.c_str()); - return file_path; - } - - if (fp) - { - fclose(fp); - } - } - - // File not found - return 0; -} - -#endif
--- a/src/test/main.cu Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,368 +0,0 @@ -/* - * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -/* - * Quadro and Tesla GPUs with compute capability >= 2.0 can overlap two memcopies - * with kernel execution. This sample illustrates the usage of CUDA streams to - * achieve overlapping of kernel execution with copying data to and from the device. - * - * Additionally, this sample uses CUDA events to measure elapsed time for - * CUDA calls. Events are a part of CUDA API and provide a system independent - * way to measure execution times on CUDA devices with approximately 0.5 - * microsecond precision. - * - * Elapsed times are averaged over nreps repetitions (10 by default). - * -*/ - -const char *sSDKname = "simpleMultiCopy"; - -// includes, system -#include <stdio.h> - -extern "C" { -extern void test1(); -} -// include CUDA -#include <cuda.h> -#include <cuda_runtime.h> - -// includes, project -//#include <helper_cuda.h> -//#include <helper_functions.h> // helper for shared that are common to CUDA Samples - -#include "helper_cuda.h" - -// includes, kernels -// Declare the CUDA kernels here and main() code that is needed to launch -// Compute workload on the system -__global__ void incKernel(int *g_out, int *g_in, int N, int inner_reps) -{ - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - if (idx < N) - { - for (int i=0; i<inner_reps; ++i) - { - g_out[idx] = g_in[idx] + 1; - } - } -} - -#define STREAM_COUNT 4 - -// Uncomment to simulate data source/sink IO times -//#define SIMULATE_IO - -int *h_data_source; -int *h_data_sink; - -int *h_data_in[STREAM_COUNT]; -int *d_data_in[STREAM_COUNT]; - -int *h_data_out[STREAM_COUNT]; -int *d_data_out[STREAM_COUNT]; - - -cudaEvent_t cycleDone[STREAM_COUNT]; -cudaStream_t stream[STREAM_COUNT]; - -cudaEvent_t start, stop; - -int N = 1 << 22; -int nreps = 10; // number of times each experiment is repeated -int inner_reps = 5; - -int memsize; - -dim3 block(512); -dim3 grid; - -int thread_blocks; - -float processWithStreams(int streams_used); -void init(); -bool test(); - -//////////////////////////////////////////////////////////////////////////////// -// Program main -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char *argv[]) -{ - int cuda_device = 0; - float scale_factor; - cudaDeviceProp deviceProp; - - test1(); - printf("[%s] - Starting...\n", sSDKname); - - // Otherwise pick the device with the highest Gflops/s - cuda_device = 0; - checkCudaErrors(cudaSetDevice(cuda_device)); - checkCudaErrors(cudaGetDeviceProperties(&deviceProp, cuda_device)); - printf("> Using CUDA device [%d]: %s\n", cuda_device, deviceProp.name); - - checkCudaErrors(cudaGetDeviceProperties(&deviceProp, cuda_device)); - printf("[%s] has %d MP(s) x %d (Cores/MP) = %d (Cores)\n", - deviceProp.name, deviceProp.multiProcessorCount, - _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor), - _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount); - - // Anything that is less than 32 Cores will have scaled down workload - scale_factor = max((32.0f / (_ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * (float)deviceProp.multiProcessorCount)), 1.0f); - N = (int)((float)N / scale_factor); - - printf("> Device name: %s\n", deviceProp.name); - printf("> CUDA Capability %d.%d hardware with %d multi-processors\n", - deviceProp.major, deviceProp.minor, - deviceProp.multiProcessorCount); - printf("> scale_factor = %.2f\n", 1.0f/scale_factor); - printf("> array_size = %d\n\n", N); - - memsize = N * sizeof(int); - - thread_blocks = N / block.x; - - grid.x = thread_blocks % 65535; - grid.y = (thread_blocks / 65535 + 1); - - - // Allocate resources - - h_data_source = (int *) malloc(memsize); - h_data_sink = (int *) malloc(memsize); - - for (int i =0; i<STREAM_COUNT; ++i) - { - - checkCudaErrors(cudaHostAlloc(&h_data_in[i], memsize, - cudaHostAllocDefault)); - checkCudaErrors(cudaMalloc(&d_data_in[i], memsize)); - - checkCudaErrors(cudaHostAlloc(&h_data_out[i], memsize, - cudaHostAllocDefault)); - checkCudaErrors(cudaMalloc(&d_data_out[i], memsize)); - - checkCudaErrors(cudaStreamCreate(&stream[i])); - checkCudaErrors(cudaEventCreate(&cycleDone[i])); - - cudaEventRecord(cycleDone[i], stream[i]); - } - - cudaEventCreate(&start); - cudaEventCreate(&stop); - - init(); - - // Kernel warmup - incKernel<<<grid, block>>>(d_data_out[0], d_data_in[0], N, inner_reps); - - - // Time copies and kernel - cudaEventRecord(start,0); - checkCudaErrors(cudaMemcpyAsync(d_data_in[0], h_data_in[0], memsize, - cudaMemcpyHostToDevice,0)); - cudaEventRecord(stop,0); - cudaEventSynchronize(stop); - - float memcpy_h2d_time; - cudaEventElapsedTime(&memcpy_h2d_time, start, stop); - - cudaEventRecord(start,0); - checkCudaErrors(cudaMemcpyAsync(h_data_out[0], d_data_out[0], memsize, - cudaMemcpyDeviceToHost, 0)); - cudaEventRecord(stop,0); - cudaEventSynchronize(stop); - - float memcpy_d2h_time; - cudaEventElapsedTime(&memcpy_d2h_time, start, stop); - - cudaEventRecord(start,0); - incKernel<<<grid, block,0,0>>>(d_data_out[0], d_data_in[0], N, inner_reps); - cudaEventRecord(stop,0); - cudaEventSynchronize(stop); - - float kernel_time; - cudaEventElapsedTime(&kernel_time, start, stop); - - printf("\n"); - printf("Relevant properties of this CUDA device\n"); - printf("(%s) Can overlap one CPU<>GPU data transfer with GPU kernel execution (device property \"deviceOverlap\")\n", deviceProp.deviceOverlap ? "X" : " "); - //printf("(%s) Can execute several GPU kernels simultaneously (compute capability >= 2.0)\n", deviceProp.major >= 2 ? "X": " "); - printf("(%s) Can overlap two CPU<>GPU data transfers with GPU kernel execution\n" - " (Compute Capability >= 2.0 AND (Tesla product OR Quadro 4000/5000/6000/K5000)\n", - (deviceProp.major >= 2 && deviceProp.asyncEngineCount > 1) - ? "X" : " "); - - printf("\n"); - printf("Measured timings (throughput):\n"); - printf(" Memcpy host to device\t: %f ms (%f GB/s)\n", - memcpy_h2d_time, (memsize * 1e-6)/ memcpy_h2d_time); - printf(" Memcpy device to host\t: %f ms (%f GB/s)\n", - memcpy_d2h_time, (memsize * 1e-6)/ memcpy_d2h_time); - printf(" Kernel\t\t\t: %f ms (%f GB/s)\n", - kernel_time, (inner_reps *memsize * 2e-6)/ kernel_time); - - printf("\n"); - printf("Theoretical limits for speedup gained from overlapped data transfers:\n"); - printf("No overlap at all (transfer-kernel-transfer): %f ms \n", - memcpy_h2d_time + memcpy_d2h_time + kernel_time); - printf("Compute can overlap with one transfer: %f ms\n", - max((memcpy_h2d_time + memcpy_d2h_time), kernel_time)); - printf("Compute can overlap with both data transfers: %f ms\n", - max(max(memcpy_h2d_time,memcpy_d2h_time), kernel_time)); - - // Process pipelined work - float serial_time = processWithStreams(1); - float overlap_time = processWithStreams(STREAM_COUNT); - - printf("\nAverage measured timings over %d repetitions:\n", nreps); - printf(" Avg. time when execution fully serialized\t: %f ms\n", - serial_time / nreps); - printf(" Avg. time when overlapped using %d streams\t: %f ms\n", - STREAM_COUNT, overlap_time / nreps); - printf(" Avg. speedup gained (serialized - overlapped)\t: %f ms\n", - (serial_time - overlap_time) / nreps); - - printf("\nMeasured throughput:\n"); - printf(" Fully serialized execution\t\t: %f GB/s\n", - (nreps * (memsize * 2e-6))/ serial_time); - printf(" Overlapped using %d streams\t\t: %f GB/s\n", - STREAM_COUNT, (nreps * (memsize * 2e-6))/ overlap_time); - - // Verify the results, we will use the results for final output - bool bResults = test(); - - // Free resources - - free(h_data_source); - free(h_data_sink); - - for (int i =0; i<STREAM_COUNT; ++i) - { - - cudaFreeHost(h_data_in[i]); - cudaFree(d_data_in[i]); - - cudaFreeHost(h_data_out[i]); - cudaFree(d_data_out[i]); - - cudaStreamDestroy(stream[i]); - cudaEventDestroy(cycleDone[i]); - } - - cudaEventDestroy(start); - cudaEventDestroy(stop); - - // Test result - exit(bResults ? EXIT_SUCCESS : EXIT_FAILURE); -} - -float processWithStreams(int streams_used) -{ - - int current_stream = 0; - - float time; - - // Do processing in a loop - // - // Note: All memory commands are processed in the order they are issued, - // independent of the stream they are enqueued in. Hence the pattern by - // which the copy and kernel commands are enqueued in the stream - // has an influence on the achieved overlap. - - cudaEventRecord(start, 0); - - for (int i=0; i<nreps; ++i) - { - int next_stream = (current_stream + 1) % streams_used; - -#ifdef SIMULATE_IO - // Store the result - memcpy(h_data_sink, h_data_out[current_stream],memsize); - - // Read new input - memcpy(h_data_in[next_stream], h_data_source, memsize); -#endif - - // Ensure that processing and copying of the last cycle has finished - cudaEventSynchronize(cycleDone[next_stream]); - - // Process current frame - incKernel<<<grid, block, 0, stream[current_stream]>>>( - d_data_out[current_stream], - d_data_in[current_stream], - N, - inner_reps); - - // Upload next frame - checkCudaErrors(cudaMemcpyAsync( - d_data_in[next_stream], - h_data_in[next_stream], - memsize, - cudaMemcpyHostToDevice, - stream[next_stream])); - - // Download current frame - checkCudaErrors(cudaMemcpyAsync( - h_data_out[current_stream], - d_data_out[current_stream], - memsize, - cudaMemcpyDeviceToHost, - stream[current_stream])); - - checkCudaErrors(cudaEventRecord( - cycleDone[current_stream], - stream[current_stream])); - - current_stream = next_stream; - } - - cudaEventRecord(stop, 0); - - cudaDeviceSynchronize(); - - cudaEventElapsedTime(&time, start, stop); - - return time; - -} - -void init() -{ - for (int i=0; i<N; ++i) - { - h_data_source[i] = 0; - } - - for (int i =0; i<STREAM_COUNT; ++i) - { - memcpy(h_data_in[i], h_data_source, memsize); - } -} - - -bool test() -{ - - bool passed = true; - - for (int j =0; j<STREAM_COUNT; ++j) - { - for (int i =0; i<N; ++i) - { - passed &= (h_data_out[j][i] == 1); - } - } - - return passed; -}
--- a/src/test/multiply.cu Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -extern "C" { - __global__ void multiply(float* A, float* B, float* C) { -// printf("%d %d\n",i[0],i[1]); - int index = blockIdx.x * blockDim.x + threadIdx.x; - C[index] = A[index] * B[0]; - } - -}
--- a/src/test/test.c Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -#include <stdio.h> - -void test1() { - printf("test\n"); -}
--- a/src/test/twice.cc Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,182 +0,0 @@ -#include <stdio.h> -#include <sys/time.h> -#include <string.h> -#include <stdlib.h> -extern "C" { -#include <pthread.h> -} - -#include <cuda.h> - -#include <cuda_runtime.h> -#include "helper_cuda.h" - -#define LENGTH (10) -#define THREAD (10) - -double -getTime() { - struct timeval tv; - gettimeofday(&tv, NULL); - return tv.tv_sec + (double)tv.tv_usec*1e-6; -} - -void -check_data(float* A, float B, float* C) { - for (int i=0; i<LENGTH*THREAD; i++) { - if (A[i]*B!=C[i]) { - puts("multiply failure."); - return; - } - } - puts("success."); -} - -void print_result(float* C) { - for (int i=0; i<LENGTH*THREAD; i++) { - printf("%f\n",C[i]); - } -} - -int num_stream = 1; // number of stream -int num_exec = 16; // number of executed kernel - -static void *start_cuda(void *) ; - -int main(int args, char* argv[]) { - - for (int i=1;argv[i];i++) { - if (strcmp(argv[i], "--stream") == 0 || strcmp(argv[i], "-s") == 0) { - num_stream = atoi(argv[++i]); - } - if (strcmp(argv[i], "--numExec") == 0 || strcmp(argv[i], "-e") == 0) { - num_exec = atoi(argv[++i]); - } - } -#if 0 - start_cuda(NULL); -#else - pthread_t thread; - pthread_create(&thread, NULL, start_cuda, NULL); - pthread_join(thread,NULL); -#endif - return 0; -} - -static void *start_cuda(void *args) { - // initialize and load kernel - CUdevice device; - CUcontext context; - CUmodule module; - CUfunction function; - CUstream stream[num_stream]; - - checkCudaErrors(cuInit(0)); - checkCudaErrors(cuDeviceGet(&device, 0)); - checkCudaErrors(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device)); - checkCudaErrors(cuModuleLoad(&module, "multiply.ptx")); - checkCudaErrors(cuModuleGetFunction(&function, module, "multiply")); - if (num_stream) { - for (int i=0;i<num_stream;i++) - checkCudaErrors(cuStreamCreate(&stream[i],0)); - } - - // memory allocate - CUdeviceptr devA; - CUdeviceptr devB[num_exec]; - CUdeviceptr devOut[num_exec]; - - checkCudaErrors(cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float))); - for (int i=0;i<num_exec;i++) { - checkCudaErrors(cuMemAlloc(&devB[i], sizeof(float))); - checkCudaErrors(cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float))); - } - - // input buffer - float* A = new float[LENGTH*THREAD]; - float* B = new float[num_exec]; - - for (int i=0; i<LENGTH*THREAD; i++) - A[i] = (float)(i+1000); - - // output buffer - float** result = new float* [num_exec]; - - for (int i=0;i<num_exec;i++) - result[i] = new float[LENGTH*THREAD]; - - // Synchronous data transfer(host to device) - checkCudaErrors(cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float))); - - // Asynchronous data transfer(host to device) - int cur = 0; - - for (int i=0;i<num_exec;i++,cur++) { - if (num_stream <= cur) - cur = 0; - B[i] = (float)(i+1); - if (num_stream) { - checkCudaErrors(cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur])); - } else { - checkCudaErrors(cuMemcpyHtoD(devB[i], &B[i], sizeof(float))); - } - } - - cur = 0; - - // Asynchronous launch kernel - for (int i=0;i<num_exec;i++,cur++) { - if (num_stream <= cur) - cur=0; - //B[i] = (float)(i+1); - //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); - void* args[] = {&devA, &devB[i], &devOut[i]}; - checkCudaErrors(cuLaunchKernel(function, - LENGTH, 1, 1, - THREAD, 1, 1, - 0, num_stream ? stream[cur] : NULL , args, NULL)); - //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); - } - - cur = 0; - - - // Asynchronous data transfer(device to host) - for (int i=0;i<num_exec;i++,cur++) { - if (num_stream <= cur) - cur = 0; - if (num_stream) { - checkCudaErrors(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur])); - } else { - checkCudaErrors(cuMemcpyDtoH(result[i], devOut[i], LENGTH*THREAD*sizeof(float))); - } - } - - // wait for stream - for (int i=0;i<num_stream;i++) - checkCudaErrors(cuStreamSynchronize(stream[i])); - - //printf("%0.6f\n",getTime()-start); - - for (int i=0;i<num_exec;i++) - check_data(A,(float)(i+1),result[i]); - - // memory release - checkCudaErrors(cuMemFree(devA)); - for (int i=0;i<num_exec;i++) { - checkCudaErrors(cuMemFree(devB[i])); - checkCudaErrors(cuMemFree(devOut[i])); - } - for (int i=0;i<num_stream;i++) - checkCudaErrors(cuStreamDestroy(stream[i])); - checkCudaErrors(cuModuleUnload(module)); - checkCudaErrors(cuCtxDestroy(context)); - - delete[] A; - delete[] B; - for (int i=0;i<num_exec;i++) - delete[] result[i]; - delete[] result; - return 0; -} -
--- a/src/test/vectorAddDrv.cc Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,451 +0,0 @@ -/* - * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -/* Vector addition: C = A + B. - * - * This sample is a very basic sample that implements element by element - * vector addition. It is the same as the sample illustrating Chapter 3 - * of the programming guide with some additions like error checking. - * - */ - -// Includes -#include <stdio.h> -#include <string.h> -#include <iostream> -#include <cstring> -#include <math.h> - -// includes, project -#include <driver_types.h> -#include <cuda_runtime.h> -#include <cuda.h> -#include "helper_cuda.h" - -// includes, CUDA -#include <builtin_types.h> - -#define PTX_FILE "vectorAdd_kernel.ptx" - - -using namespace std; - -// Variables -CUdevice cuDevice; -CUcontext cuContext; -CUmodule cuModule; -CUfunction vecAdd_kernel; -float *h_A; -float *h_B; -float *h_C; -CUdeviceptr d_A; -CUdeviceptr d_B; -CUdeviceptr d_C; -bool noprompt = false; - -// Functions -void Cleanup(bool); -CUresult CleanupNoFailure(); -void RandomInit(float *, int); -bool findModulePath(const char *, string &, char **, string &); -void ParseArguments(int, char **); - -int *pArgc = NULL; -char **pArgv = NULL; - - -// Host code -int main(int argc, char **argv) -{ - pArgc = &argc; - pArgv = argv; - - printf("Vector Addition (Driver API)\n"); - int N = 50000, devID = 0; - size_t size = N * sizeof(float); - - ParseArguments(argc, argv); - - // Initialize - checkCudaErrors(cuInit(0)); - - // This assumes that the user is attempting to specify a explicit device -device=n - if (argc > 1) - { - bool bFound = false; - - for (int param=0; param < argc; param++) - { - int string_start = 0; - - while (argv[param][string_start] == '-') - { - string_start++; - } - - char *string_argv = &argv[param][string_start]; - - if (!strncmp(string_argv, "device", 6)) - { - int len=(int)strlen(string_argv); - - while (string_argv[len] != '=') - { - len--; - } - - devID = atoi(&string_argv[++len]); - bFound = true; - } - - if (bFound) - { - break; - } - } - } - - // Get number of devices supporting CUDA - int deviceCount = 0; - checkCudaErrors(cuDeviceGetCount(&deviceCount)); - if (deviceCount == 0) - { - printf("There is no device supporting CUDA.\n"); - Cleanup(false); - } - - if (devID < 0) - { - devID = 0; - } - - if (devID > deviceCount-1) - { - fprintf(stderr, "(Device=%d) invalid GPU device. %d GPU device(s) detected.\nexiting...\n", devID, deviceCount); - CleanupNoFailure(); - exit(EXIT_SUCCESS); - } - else - { - int major, minor; - char deviceName[100]; - checkCudaErrors(cuDeviceComputeCapability(&major, &minor, devID)); - checkCudaErrors(cuDeviceGetName(deviceName, 256, devID)); - printf("> Using Device %d: \"%s\" with Compute %d.%d capability\n", devID, deviceName, major, minor); - } - - // pick up device with zero ordinal (default, or devID) - checkCudaErrors(cuDeviceGet(&cuDevice, devID)); - // Create context - checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice)); - // first search for the module path before we load the results - string module_path, ptx_source; - - if (!findModulePath(PTX_FILE, module_path, argv, ptx_source)) - { - if (!findModulePath("vectorAdd_kernel.cubin", module_path, argv, ptx_source)) - { - printf("> findModulePath could not find <vectorAdd> ptx or cubin\n"); - Cleanup(false); - } - } - else - { - printf("> initCUDA loading module: <%s>\n", module_path.c_str()); - } - - // Create module from binary file (PTX or CUBIN) - if (module_path.rfind("ptx") != string::npos) - { - // in this branch we use compilation with parameters - const unsigned int jitNumOptions = 3; - CUjit_option *jitOptions = new CUjit_option[jitNumOptions]; - void **jitOptVals = new void *[jitNumOptions]; - - // set up size of compilation log buffer - jitOptions[0] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; - int jitLogBufferSize = 1024; - jitOptVals[0] = (void *)(size_t)jitLogBufferSize; - - // set up pointer to the compilation log buffer - jitOptions[1] = CU_JIT_INFO_LOG_BUFFER; - char *jitLogBuffer = new char[jitLogBufferSize]; - jitOptVals[1] = jitLogBuffer; - - // set up pointer to set the Maximum # of registers for a particular kernel - jitOptions[2] = CU_JIT_MAX_REGISTERS; - int jitRegCount = 32; - jitOptVals[2] = (void *)(size_t)jitRegCount; - - checkCudaErrors(cuModuleLoadDataEx(&cuModule, ptx_source.c_str(), jitNumOptions, jitOptions, (void **)jitOptVals)); - - printf("> PTX JIT log:\n%s\n", jitLogBuffer); - } - else - { - checkCudaErrors(cuModuleLoad(&cuModule, module_path.c_str())); - } - - // Get function handle from module - checkCudaErrors(cuModuleGetFunction(&vecAdd_kernel, cuModule, "VecAdd_kernel")); - - // Allocate input vectors h_A and h_B in host memory - h_A = (float *)malloc(size); - if (h_A == 0) { Cleanup(false); } - - h_B = (float *)malloc(size); - if (h_B == 0) { Cleanup(false); } - - h_C = (float *)malloc(size); - if (h_C == 0) { Cleanup(false); } - - // Initialize input vectors - RandomInit(h_A, N); - RandomInit(h_B, N); - - // Allocate vectors in device memory - checkCudaErrors(cuMemAlloc(&d_A, size)); - checkCudaErrors(cuMemAlloc(&d_B, size)); - checkCudaErrors(cuMemAlloc(&d_C, size)); - - // Copy vectors from host memory to device memory - checkCudaErrors(cuMemcpyHtoD(d_A, h_A, size)); - checkCudaErrors(cuMemcpyHtoD(d_B, h_B, size)); - -#if 1 - - if (1) - { - // This is the new CUDA 4.0 API for Kernel Parameter Passing and Kernel Launch (simpler method) - - // Grid/Block configuration - int threadsPerBlock = 256; - int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock; - - void *args[] = { &d_A, &d_B, &d_C, &N }; - - // Launch the CUDA kernel - checkCudaErrors(cuLaunchKernel(vecAdd_kernel, blocksPerGrid, 1, 1, - threadsPerBlock, 1, 1, - 0, - NULL, args, NULL)); - } - else - { - // This is the new CUDA 4.0 API for Kernel Parameter Passing and Kernel Launch (advanced method) - int offset = 0; - void *argBuffer[16]; - *((CUdeviceptr *)&argBuffer[offset]) = d_A; - offset += sizeof(d_A); - *((CUdeviceptr *)&argBuffer[offset]) = d_B; - offset += sizeof(d_B); - *((CUdeviceptr *)&argBuffer[offset]) = d_C; - offset += sizeof(d_C); - *((int *)&argBuffer[offset]) = N; - offset += sizeof(N); - - // Grid/Block configuration - int threadsPerBlock = 256; - int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock; - - // Launch the CUDA kernel - checkCudaErrors(cuLaunchKernel(vecAdd_kernel, blocksPerGrid, 1, 1, - threadsPerBlock, 1, 1, - 0, - NULL, NULL, argBuffer)); - } - -#else - { - char argBuffer[256]; - - // pass in launch parameters (not actually de-referencing CUdeviceptr). CUdeviceptr is - // storing the value of the parameters - *((CUdeviceptr *)&argBuffer[offset]) = d_A; - offset += sizeof(d_A); - *((CUdeviceptr *)&argBuffer[offset]) = d_B; - offset += sizeof(d_B); - *((CUdeviceptr *)&argBuffer[offset]) = d_C; - offset += sizeof(d_C); - *((int *)&argBuffer[offset]) = N; - offset += sizeof(N); - - void *kernel_launch_config[5] = - { - CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer, - CU_LAUNCH_PARAM_BUFFER_SIZE, &offset, - CU_LAUNCH_PARAM_END - }; - - // Grid/Block configuration - int threadsPerBlock = 256; - int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock; - - // Launch the CUDA kernel - checkCudaErrors(cuLaunchKernel(vecAdd_kernel, blocksPerGrid, 1, 1, - threadsPerBlock, 1, 1, - 0, 0, - NULL, (void **)&kernel_launch_config)); - } -#endif - -#ifdef _DEBUG - checkCudaErrors(cuCtxSynchronize()); -#endif - - // Copy result from device memory to host memory - // h_C contains the result in host memory - checkCudaErrors(cuMemcpyDtoH(h_C, d_C, size)); - - // Verify result - int i; - - for (i = 0; i < N; ++i) - { - float sum = h_A[i] + h_B[i]; - - if (fabs(h_C[i] - sum) > 1e-7f) - { - break; - } - } - - printf("%s\n", (i==N) ? "Result = PASS" : "Result = FAIL"); - - exit((i==N) ? EXIT_SUCCESS : EXIT_FAILURE); -} - -CUresult CleanupNoFailure() -{ - CUresult error; - - // Free device memory - if (d_A) - { - error = cuMemFree(d_A); - } - - if (d_B) - { - error = cuMemFree(d_B); - } - - if (d_C) - { - error = cuMemFree(d_C); - } - - // Free host memory - if (h_A) - { - free(h_A); - } - - if (h_B) - { - free(h_B); - } - - if (h_C) - { - free(h_C); - } - - error = cuCtxDestroy(cuContext); - - return error; -} - -void Cleanup(bool noError) -{ - CUresult error; - error = CleanupNoFailure(); - - if (!noError || error != CUDA_SUCCESS) - { - printf("Function call failed\nFAILED\n"); - exit(EXIT_FAILURE); - } - - if (!noprompt) - { - printf("\nPress ENTER to exit...\n"); - fflush(stdout); - fflush(stderr); - getchar(); - } -} - - -// Allocates an array with random float entries. -void RandomInit(float *data, int n) -{ - for (int i = 0; i < n; ++i) - { - data[i] = rand() / (float)RAND_MAX; - } -} - -bool inline -findModulePath(const char *module_file, string &module_path, char **argv, string &ptx_source) -{ - char *actual_path = sdkFindFilePath(module_file, argv[0]); - - if (actual_path) - { - module_path = actual_path; - } - else - { - printf("> findModulePath file not found: <%s> \n", module_file); - return false; - } - - if (module_path.empty()) - { - printf("> findModulePath could not find file: <%s> \n", module_file); - return false; - } - else - { - printf("> findModulePath found file at <%s>\n", module_path.c_str()); - - if (module_path.rfind(".ptx") != string::npos) - { - FILE *fp = fopen(module_path.c_str(), "rb"); - fseek(fp, 0, SEEK_END); - int file_size = ftell(fp); - char *buf = new char[file_size+1]; - fseek(fp, 0, SEEK_SET); - fread(buf, sizeof(char), file_size, fp); - fclose(fp); - buf[file_size] = '\0'; - ptx_source = buf; - delete[] buf; - } - - return true; - } -} - -// Parse program arguments -void ParseArguments(int argc, char **argv) -{ - for (int i = 0; i < argc; ++i) - { - if (strcmp(argv[i], "--noprompt") == 0 || - strcmp(argv[i], "-noprompt") == 0) - { - noprompt = true; - break; - } - } -}
--- a/src/test/vectorAdd_kernel.cu Thu Jan 16 14:44:03 2020 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -/* - * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -/* Vector addition: C = A + B. - * - * This sample is a very basic sample that implements element by element - * vector addition. It is the same as the sample illustrating Chapter 3 - * of the programming guide with some additions like error checking. - * - */ - -// Device code -extern "C" __global__ void VecAdd_kernel(const float *A, const float *B, float *C, int N) -{ - int i = blockDim.x * blockIdx.x + threadIdx.x; - - if (i < N) - C[i] = A[i] + B[i]; -}