Description
Consider this testcase:
#include <cstdlib>
#include <cstdio>
#include <cassert>
struct S {
int a;
int b;
int c;
};
struct T {
S *s0;
S *s1;
S *s2;
};
int main() {
T *v = (T *) malloc (sizeof(T));
v->s0 = (S *) malloc (sizeof(S));
v->s1 = (S *) malloc (sizeof(S));
v->s2 = (S *) malloc (sizeof(S));
v->s0->a = 10;
v->s0->b = 10;
v->s0->c = 10;
v->s1->a = 20;
v->s1->b = 20;
v->s1->c = 20;
v->s2->a = 30;
v->s2->b = 30;
v->s2->c = 30;
fprintf(stderr, "about to call target region:\n");
fprintf(stderr, "&v=%p, &v->s0=%p, &v->s1=%p, &v->s2=%p, v->s0=%p, v->s1=%p, v->s2=%p\n",
&v, &v->s0, &v->s1, &v->s2, (void*)v->s0, (void*)v->s1, (void*)v->s2);
fprintf(stderr, "&v->s0->c=%p, &v->s0->b=%p, &v->s0->c=%p\n",
&v->s0->a, &v->s0->b, &v->s0->c);
fprintf(stderr, "&v->s1->c=%p, &v->s1->b=%p, &v->s1->c=%p\n",
&v->s1->a, &v->s1->b, &v->s1->c);
fprintf(stderr, "&v->s2->c=%p, &v->s2->b=%p, &v->s2->c=%p\n",
&v->s2->a, &v->s2->b, &v->s2->c);
fprintf(stderr, "sizeof(S)=%d\n", (int)sizeof(S));
fprintf(stderr, "sizeof(T)=%d\n", (int)sizeof(T));
#pragma omp target map(tofrom: v->s1->b, v->s1->c, v->s2->b)
//#pragma omp target map(to: v[:1]) map(tofrom: v->s1->b, v->s1->c, v->s2->b)
{
v->s1->b += 3;
v->s1->c += 5;
v->s2->b += 7;
}
fprintf(stderr, "v->s0->a=%d, v->s0->b=%d, v->s0->c=%d\n",
v->s0->a, v->s0->b, v->s0->c);
fprintf(stderr, "v->s1->a=%d, v->s1->b=%d, v->s1->c=%d\n",
v->s1->a, v->s1->b, v->s1->c);
fprintf(stderr, "v->s2->a=%d, v->s2->b=%d, v->s2->c=%d\n",
v->s2->a, v->s2->b, v->s2->c);
assert (v->s0->a == 10);
assert (v->s0->b == 10);
assert (v->s0->c == 10);
assert (v->s1->a == 20);
assert (v->s1->b == 23);
assert (v->s1->c == 25);
assert (v->s2->a == 30);
assert (v->s2->b == 37);
assert (v->s2->c == 30);
free(v->s0);
free(v->s1);
free(v->s2);
free(v);
return 0;
}
Compiled with offloading to AMDGCN, this gives the following result:
about to call target region:
&v=0x7ffeb71ab720, &v->s0=0x56eb0cb17630, &v->s1=0x56eb0cb17638, &v->s2=0x56eb0cb17640, v->s0=0x56eb0cb17650, v->s1=0x56eb0cb17670, v->s2=0x56eb0cb17690
&v->s0->c=0x56eb0cb17650, &v->s0->b=0x56eb0cb17654, &v->s0->c=0x56eb0cb17658
&v->s1->c=0x56eb0cb17670, &v->s1->b=0x56eb0cb17674, &v->s1->c=0x56eb0cb17678
&v->s2->c=0x56eb0cb17690, &v->s2->b=0x56eb0cb17694, &v->s2->c=0x56eb0cb17698
sizeof(S)=12
sizeof(T)=24
v->s0->a=10, v->s0->b=10, v->s0->c=10
v->s1->a=20, v->s1->b=20, v->s1->c=25
v->s2->a=30, v->s2->b=37, v->s2->c=30
map-struct-6: map-struct-6.cc:63: int main(): Assertion `v->s1->b == 23' failed.
Aborted (core dumped)
The modification of v->s1->b
in the target region has been lost.
(I used options -fopenmp --offload-arch=gfx90a -fopenmp-version=60
)
If I set LIBOMPTARGET_DEBUG=5, we get a clue what's happening here:
omptarget --> Entry 0: Base=0x0000641caa1f8360, Begin=0x0000641caa1f8368, Size=16, Type=0x20, Name=unknown
(base is &v->s0, begin is &v->s1)
omptarget --> Entry 1: Base=0x0000641caa1f8368, Begin=0x0000641caa1f83a4, Size=4, Type=0x1000000000013, Name=unknown
(base is &v->s1, begin is &v->s1->b)
omptarget --> Entry 2: Base=0x0000641caa1f8368, Begin=0x0000641caa1f83a8, Size=4, Type=0x1000000000013, Name=unknown
(base is &v->s1, begin is &v->s1->c)
omptarget --> Entry 3: Base=0x0000641caa1f8370, Begin=0x0000641caa1f83c4, Size=4, Type=0x1000000000013, Name=unknown
(base is &v->s2, begin is &v->s2->b)
For entries 1 & 2, the containing structure of v->s1
has not been aggregated properly, i.e. according to the spec (6.0, 7.9.6 map Clause):
All map clause list items that share storage or have the same containing structure or containing array
result in a single mappable storage block that contains the storage of the list items, unless otherwise
specified.
There's no indication, AFAICT, that this does not apply if the containing structure is "in the middle" of map expressions -- so there should be just three mappings generated here, and the middle one should be a block of size 8 containing both the b
and c
elements of s1
.
I haven't examined the generated code for the offload region in detail -- I assume the v->s1->b
field is not modified because the data is accessed through the wrong (other) pointer.
I think both variants of the target directive in the above test should work, but definitely the commented-out version that maps the base v[:1]
explicitly should. Neither works at present.