Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
*.a
*.o
*.d
*.so
*.so.*
*.swp
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "NVTX"]
path = NVTX
url = https://github.com/NVIDIA/NVTX
branch = dev-mem-api
branch = release-v3
2 changes: 1 addition & 1 deletion NVTX
Submodule NVTX updated 715 files
2 changes: 1 addition & 1 deletion NvtxNaming/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ $ compute-sanitizer --nvtx=yes --leak-check=full --destroy-on-device-error=kerne
=========
========= LEAK SUMMARY: 1 bytes leaked in 1 allocations
========= ERROR SUMMARY: 1 error
$ compute-sanitizer --nvtx=yes --tool=initcheck --track-unused-memory=yes --destroy-on-device-error=kernel --show-backtrace=no ./NvtxNaming
$ compute-sanitizer --nvtx=yes --tool=initcheck --track-unused-memory --destroy-on-device-error=kernel --show-backtrace=no ./NvtxNaming
========= COMPUTE-SANITIZER
========= Unused memory in allocation 0x7efc4d000000 called My allocation of size 1
========= Not written any memory.
Expand Down
15 changes: 13 additions & 2 deletions Synccheck/divergent_threads.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ static constexpr int Size = (NumThreads * DataBlocks) - 16;

__shared__ int smem[NumThreads];

__device__ __noinline__
void outlineSync()
{
__syncthreads();
}

__global__
void myKernel(int *data_in, int *sum_out)
{
Expand All @@ -51,11 +57,16 @@ void myKernel(int *data_in, int *sum_out)
if (offset < Size)
{
smem[tx] += data_in[offset];
__syncthreads();
outlineSync();
}
else
{
// We still need to wait before we continue
outlineSync();
}
}

if (tx == 0)
if (tx == blockDim.x - 1)
{
*sum_out = 0;
for (int i = 0; i < NumThreads; ++i)
Expand Down