Skip to content

Commit 928b06c

Browse files
Lancernlanza
authored andcommitted
[CIR][CIRGen] Support wide string literals (#399)
This commit supports the codegen of wide string literals, including `wchar_t` string literals, `char16_t` string literals, and `char32_t` string literals. I'm not following the proposal in #374. The clang frontend doesn't record the literal string. It only records the encoded code units for wide string literals. So I believe that a dedicated string attribute with an encoding tag as described in #374 may not be that helpful as I thought.
1 parent 0dce533 commit 928b06c

File tree

2 files changed

+57
-2
lines changed

2 files changed

+57
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,8 +1104,37 @@ CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *E) {
11041104
return builder.getString(Str, eltTy, finalSize);
11051105
}
11061106

1107-
assert(0 && "not implemented");
1108-
return {};
1107+
auto arrayTy =
1108+
getTypes().ConvertType(E->getType()).dyn_cast<mlir::cir::ArrayType>();
1109+
assert(arrayTy && "string literals must be emitted as an array type");
1110+
1111+
auto arrayEltTy = arrayTy.getEltType().dyn_cast<mlir::cir::IntType>();
1112+
assert(arrayEltTy &&
1113+
"string literal elements must be emitted as integral type");
1114+
1115+
auto arraySize = arrayTy.getSize();
1116+
auto literalSize = E->getLength();
1117+
1118+
// Collect the code units.
1119+
SmallVector<uint32_t, 32> elementValues;
1120+
elementValues.reserve(arraySize);
1121+
for (unsigned i = 0; i < literalSize; ++i)
1122+
elementValues.push_back(E->getCodeUnit(i));
1123+
elementValues.resize(arraySize);
1124+
1125+
// If the string is full of null bytes, emit a #cir.zero instead.
1126+
if (std::all_of(elementValues.begin(), elementValues.end(),
1127+
[](uint32_t x) { return x == 0; }))
1128+
return builder.getZeroAttr(arrayTy);
1129+
1130+
// Otherwise emit a constant array holding the characters.
1131+
SmallVector<mlir::Attribute, 32> elements;
1132+
elements.reserve(arraySize);
1133+
for (uint64_t i = 0; i < arraySize; ++i)
1134+
elements.push_back(mlir::cir::IntAttr::get(arrayEltTy, elementValues[i]));
1135+
1136+
auto elementsAttr = mlir::ArrayAttr::get(builder.getContext(), elements);
1137+
return builder.getConstArray(elementsAttr, arrayTy);
11091138
}
11101139

11111140
// TODO(cir): this could be a common AST helper for both CIR and LLVM codegen.
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
2+
// RUN: FileCheck --input-file=%t.cir %s
3+
4+
const char16_t *test_utf16() {
5+
return u"你好世界";
6+
}
7+
8+
// CHECK: cir.global "private" constant internal @{{.+}} = #cir.const_array<[#cir.int<20320> : !u16i, #cir.int<22909> : !u16i, #cir.int<19990> : !u16i, #cir.int<30028> : !u16i, #cir.int<0> : !u16i]> : !cir.array<!u16i x 5>
9+
10+
const char32_t *test_utf32() {
11+
return U"你好世界";
12+
}
13+
14+
// CHECK: cir.global "private" constant internal @{{.+}} = #cir.const_array<[#cir.int<20320> : !u32i, #cir.int<22909> : !u32i, #cir.int<19990> : !u32i, #cir.int<30028> : !u32i, #cir.int<0> : !u32i]> : !cir.array<!u32i x 5>
15+
16+
const char16_t *test_zero16() {
17+
return u"\0\0\0\0";
18+
}
19+
20+
// CHECK: cir.global "private" constant internal @{{.+}} = #cir.zero : !cir.array<!u16i x 5>
21+
22+
const char32_t *test_zero32() {
23+
return U"\0\0\0\0";
24+
}
25+
26+
// CHECK: cir.global "private" constant internal @{{.+}} = #cir.zero : !cir.array<!u32i x 5>

0 commit comments

Comments
 (0)