Skip to content

[Clang][AMDGPU] Add builtins for instrinsic llvm.amdgcn.raw.buffer.store #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,19 @@ BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")

BUILTIN(__builtin_amdgcn_make_buffer_rsrc, "Qbv*sii", "nc")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_i8, "vcQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_i16, "vsQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_i32, "viQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_f16, "vhQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_f32, "vfQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_v2i16, "vV2sQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_v2i32, "vV2iQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_v2f16, "vV2hQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_v2f32, "vV2fQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_v4i16, "vV4sQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_v4i32, "vV4iQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_v4f16, "vV4hQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_store_v4f32, "vV4fQbiiIi", "n")

//===----------------------------------------------------------------------===//
// Ballot builtins.
Expand Down
26 changes: 26 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,18 @@ static Value *emitQuaternaryBuiltin(CodeGenFunction &CGF, const CallExpr *E,
return CGF.Builder.CreateCall(F, {Src0, Src1, Src2, Src3});
}

static Value *emitQuinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E,
unsigned IntrinsicID) {
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
llvm::Value *Src3 = CGF.EmitScalarExpr(E->getArg(3));
llvm::Value *Src4 = CGF.EmitScalarExpr(E->getArg(4));

Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
return CGF.Builder.CreateCall(F, {Src0, Src1, Src2, Src3, Src4});
}

// Emit an intrinsic that has 1 float or double operand, and 1 integer.
static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
const CallExpr *E,
Expand Down Expand Up @@ -19095,6 +19107,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
}
case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
return emitQuaternaryBuiltin(*this, E, Intrinsic::amdgcn_make_buffer_rsrc);
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_i8:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_i16:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_i32:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_f32:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_f16:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_v2i16:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_v2i32:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_v2f16:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_v2f32:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_v4i16:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_v4i32:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_v4f16:
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_store_v4f32:
return emitQuinaryBuiltin(*this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
default:
return nullptr;
}
Expand Down
131 changes: 131 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-raw-ptr-buffer-store.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

typedef short v2i16 __attribute__((ext_vector_type(2)));
typedef int v2i32 __attribute__((ext_vector_type(2)));
typedef half v2f16 __attribute__((ext_vector_type(2)));
typedef float v2f32 __attribute__((ext_vector_type(2)));
typedef short v4i16 __attribute__((ext_vector_type(4)));
typedef int v4i32 __attribute__((ext_vector_type(4)));
typedef half v4f16 __attribute__((ext_vector_type(4)));
typedef float v4f32 __attribute__((ext_vector_type(4)));

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_i8(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_i8(char vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_i8(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_i16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_i16(short vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_i16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_i32(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_i32(int vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_i32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.f16(half [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_f16(half vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_f16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_f32(float vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_f32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_v2i16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16> [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_v2i16(v2i16 vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_v2i16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_v2i32(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_v2i32(v2i32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_v2i32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_v2f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_v2f16(v2f16 vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_v2f16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_v2f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_v2f32(v2f32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_v2f32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_v4i16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16> [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_v4i16(v4i16 vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_v4i16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_v4i32(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_v4i32(v4i32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_v4i32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_v4f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half> [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_v4f16(v4f16 vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_v4f16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_store_v4f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[VDATA:%.*]], ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT: ret void
//
void test_amdgcn_raw_ptr_buffer_store_v4f32(v4f32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
__builtin_amdgcn_raw_ptr_buffer_store_v4f32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -S -verify -o - %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -S -verify -o - %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s
// REQUIRES: amdgpu-registered-target

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

typedef short v2i16 __attribute__((ext_vector_type(2)));
typedef int v2i32 __attribute__((ext_vector_type(2)));
typedef half v2f16 __attribute__((ext_vector_type(2)));
typedef float v2f32 __attribute__((ext_vector_type(2)));
typedef short v4i16 __attribute__((ext_vector_type(4)));
typedef int v4i32 __attribute__((ext_vector_type(4)));
typedef half v4f16 __attribute__((ext_vector_type(4)));
typedef float v4f32 __attribute__((ext_vector_type(4)));

void test_amdgcn_raw_ptr_buffer_store_i8(char vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_i8(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_i8' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_i16(short vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_i16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_i16' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_i32(int vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_i32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_i32' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_f16(half vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_f16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_f16' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_f32(float vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_f32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_f32' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_v2i16(v2i16 vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_v2i16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_v2i16' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_v2i32(v2i32 vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_v2i32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_v2i32' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_v2f16(v2f16 vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_v2f16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_v2f16' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_v2f32(v2f32 vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_v2f32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_v2f32' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_v4i16(v4i16 vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_v4i16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_v4i16' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_v4i32(v4i32 vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_v4i32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_v4i32' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_v4f16(v4f16 vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_v4f16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_v4f16' must be a constant integer}}
}

void test_amdgcn_raw_ptr_buffer_store_v4f32(v4f32 vdata, __attribute__((address_space(8))) void *rsrc, int offset, int soffset, int aux) {
__builtin_amdgcn_raw_ptr_buffer_store_v4f32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_store_v4f32' must be a constant integer}}
}