diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index 08887abcd0f10..c65a27567d59d 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -31,6 +31,7 @@ // NOTE: Client code will need to include "mlir/ExecutionEngine/Float16bits.h" // if they want to use the `MLIR_SPARSETENSOR_FOREVERY_V` macro. +#include #include #include #include @@ -443,6 +444,56 @@ static_assert((isUniqueDLT(DimLevelType::Dense) && !isUniqueDLT(DimLevelType::LooseCompressedNuNo)), "isUniqueDLT definition is broken"); +/// Bit manipulations for affine encoding. +/// +/// Note that because the indices in the mappings refer to dimensions +/// and levels (and *not* the sizes of these dimensions and levels), the +/// 64-bit encoding gives ample room for a compact encoding of affine +/// operations in the higher bits. Pure permutations still allow for +/// 60-bit indices. But non-permutations reserve 20-bits for the +/// potential three components (index i, constant, index ii). +/// +/// The compact encoding is as follows: +/// +/// 0xffffffffffffffff +/// |0000 | 60-bit idx| e.g. i +/// |0001 floor| 20-bit const|20-bit idx| e.g. i floor c +/// |0010 mod | 20-bit const|20-bit idx| e.g. i mod c +/// |0011 mul |20-bit idx|20-bit const|20-bit idx| e.g. i + c * ii +/// +/// This encoding provides sufficient generality for currently supported +/// sparse tensor types. To generalize this more, we will need to provide +/// a broader encoding scheme for affine functions. Also, the library +/// encoding may be replaced with pure "direct-IR" code in the future. +/// +constexpr uint64_t encodeDim(uint64_t i, uint64_t cf, uint64_t cm) { + if (cf != 0) { + assert(cf <= 0xfffff && cm == 0 && i <= 0xfffff); + return (0x01L << 60) | (cf << 20) | i; + } + if (cm != 0) { + assert(cm <= 0xfffff && i <= 0xfffff); + return (0x02L << 60) | (cm << 20) | i; + } + assert(i <= 0x0fffffffffffffffu); + return i; +} +constexpr uint64_t encodeLvl(uint64_t i, uint64_t c, uint64_t ii) { + if (c != 0) { + assert(c <= 0xfffff && ii <= 0xfffff && i <= 0xfffff); + return (0x03L << 60) | (c << 20) | (ii << 40) | i; + } + assert(i <= 0x0fffffffffffffffu); + return i; +} +constexpr bool isEncodedFloor(uint64_t v) { return (v >> 60) == 0x01; } +constexpr bool isEncodedMod(uint64_t v) { return (v >> 60) == 0x02; } +constexpr bool isEncodedMul(uint64_t v) { return (v >> 60) == 0x03; } +constexpr uint64_t decodeIndex(uint64_t v) { return v & 0xfffffu; } +constexpr uint64_t decodeConst(uint64_t v) { return (v >> 20) & 0xfffffu; } +constexpr uint64_t decodeMulc(uint64_t v) { return (v >> 20) & 0xfffffu; } +constexpr uint64_t decodeMuli(uint64_t v) { return (v >> 40) & 0xfffffu; } + } // namespace sparse_tensor } // namespace mlir diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h index 22ae70a61d95e..5c09aa4e4b60c 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h @@ -38,13 +38,23 @@ class MapRef final { // Push forward maps from dimensions to levels. // + // Map from dimRank in to lvlRank out. template inline void pushforward(const T *in, T *out) const { if (isPermutation) { - for (uint64_t i = 0; i < lvlRank; ++i) - out[i] = in[lvl2dim[i]]; + for (uint64_t l = 0; l < lvlRank; l++) { + out[l] = in[dim2lvl[l]]; + } } else { - assert(0 && "coming soon"); + uint64_t i, c; + for (uint64_t l = 0; l < lvlRank; l++) + if (isFloor(l, i, c)) { + out[l] = in[i] / c; + } else if (isMod(l, i, c)) { + out[l] = in[i] % c; + } else { + out[l] = in[dim2lvl[l]]; + } } } @@ -52,13 +62,20 @@ class MapRef final { // Push backward maps from levels to dimensions. // + // Map from lvlRank in to dimRank out. template inline void pushbackward(const T *in, T *out) const { if (isPermutation) { - for (uint64_t i = 0; i < dimRank; ++i) - out[i] = in[dim2lvl[i]]; + for (uint64_t d = 0; d < dimRank; d++) + out[d] = in[lvl2dim[d]]; } else { - assert(0 && "coming soon"); + uint64_t i, c, ii; + for (uint64_t d = 0; d < dimRank; d++) + if (isMul(d, i, c, ii)) { + out[d] = in[i] + c * in[ii]; + } else { + out[d] = in[lvl2dim[d]]; + } } } @@ -68,6 +85,10 @@ class MapRef final { private: bool isPermutationMap() const; + bool isFloor(uint64_t l, uint64_t &i, uint64_t &c) const; + bool isMod(uint64_t l, uint64_t &i, uint64_t &c) const; + bool isMul(uint64_t d, uint64_t &i, uint64_t &c, uint64_t &ii) const; + const uint64_t dimRank; const uint64_t lvlRank; const uint64_t *const dim2lvl; // non-owning pointer diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 298ff09883556..98b412c8ec9eb 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -688,25 +688,70 @@ Value sparse_tensor::genMapBuffers(OpBuilder &builder, Location loc, return dimSizesBuffer; } // Otherwise, some code needs to be generated to set up the buffers. - // TODO: use the lvl2dim once available and deal with non-permutations! + // This code deals with permutations as well as non-permutations that + // arise from rank changing blocking. const auto dimToLvl = stt.getDimToLvl(); - assert(dimToLvl.isPermutation()); - SmallVector dim2lvlValues(dimRank); - SmallVector lvl2dimValues(lvlRank); + SmallVector dim2lvlValues(lvlRank); // for each lvl, expr in dim vars + SmallVector lvl2dimValues(dimRank); // for each dim, expr in lvl vars SmallVector lvlSizesValues(lvlRank); + // Generate dim2lvl. + assert(lvlRank == dimToLvl.getNumResults()); for (Level l = 0; l < lvlRank; l++) { - // The `d`th source variable occurs in the `l`th result position. - Dimension d = dimToLvl.getDimPosition(l); - Value lvl = constantIndex(builder, loc, l); - Value dim = constantIndex(builder, loc, d); - dim2lvlValues[d] = lvl; - lvl2dimValues[l] = dim; - if (stt.isDynamicDim(d)) - lvlSizesValues[l] = - builder.create(loc, dimSizesBuffer, dim); - else - lvlSizesValues[l] = dimShapesValues[d]; + AffineExpr exp = dimToLvl.getResult(l); + // We expect: + // (1) l = d + // (2) l = d / c + // (3) l = d % c + Dimension d = 0; + uint64_t cf = 0, cm = 0; + switch (exp.getKind()) { + case AffineExprKind::DimId: + d = exp.cast().getPosition(); + break; + case AffineExprKind::FloorDiv: + d = exp.cast() + .getLHS() + .cast() + .getPosition(); + cf = exp.cast() + .getRHS() + .cast() + .getValue(); + break; + case AffineExprKind::Mod: + d = exp.cast() + .getLHS() + .cast() + .getPosition(); + cm = exp.cast() + .getRHS() + .cast() + .getValue(); + break; + default: + llvm::report_fatal_error("unsupported dim2lvl in sparse tensor type"); + } + dim2lvlValues[l] = constantIndex(builder, loc, encodeDim(d, cf, cm)); + lvl2dimValues[d] = constantIndex(builder, loc, l); // FIXME, use lvlToDim + // Compute the level sizes. + // (1) l = d : size(d) + // (2) l = d / c : size(d) / c + // (3) l = d % c : c + Value lvlSz; + if (cm == 0) { + lvlSz = dimShapesValues[d]; + if (stt.isDynamicDim(d)) + lvlSz = builder.create(loc, dimSizesBuffer, + constantIndex(builder, loc, d)); + if (cf != 0) + lvlSz = builder.create(loc, lvlSz, + constantIndex(builder, loc, cf)); + } else { + lvlSz = constantIndex(builder, loc, cm); + } + lvlSizesValues[l] = lvlSz; } + // Return buffers. dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues); lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues); return allocaBuffer(builder, loc, lvlSizesValues); diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp index ee4d6fa0d34b4..ace6ac8152a29 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp @@ -7,14 +7,15 @@ //===----------------------------------------------------------------------===// #include "mlir/ExecutionEngine/SparseTensor/MapRef.h" +#include "mlir/Dialect/SparseTensor/IR/Enums.h" mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d) : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d), isPermutation(isPermutationMap()) { if (isPermutation) { - for (uint64_t i = 0; i < dimRank; i++) - assert(lvl2dim[dim2lvl[i]] == i); + for (uint64_t l = 0; l < lvlRank; l++) + assert(lvl2dim[dim2lvl[l]] == l); } } @@ -22,11 +23,42 @@ bool mlir::sparse_tensor::MapRef::isPermutationMap() const { if (dimRank != lvlRank) return false; std::vector seen(dimRank, false); - for (uint64_t i = 0; i < dimRank; i++) { - const uint64_t j = dim2lvl[i]; - if (j >= dimRank || seen[j]) + for (uint64_t l = 0; l < lvlRank; l++) { + const uint64_t d = dim2lvl[l]; + if (d >= dimRank || seen[d]) return false; - seen[j] = true; + seen[d] = true; } return true; } + +bool mlir::sparse_tensor::MapRef::isFloor(uint64_t l, uint64_t &i, + uint64_t &c) const { + if (isEncodedFloor(dim2lvl[l])) { + i = decodeIndex(dim2lvl[l]); + c = decodeConst(dim2lvl[l]); + return true; + } + return false; +} + +bool mlir::sparse_tensor::MapRef::isMod(uint64_t l, uint64_t &i, + uint64_t &c) const { + if (isEncodedMod(dim2lvl[l])) { + i = decodeIndex(dim2lvl[l]); + c = decodeConst(dim2lvl[l]); + return true; + } + return false; +} + +bool mlir::sparse_tensor::MapRef::isMul(uint64_t d, uint64_t &i, uint64_t &c, + uint64_t &ii) const { + if (isEncodedMul(lvl2dim[d])) { + i = decodeIndex(lvl2dim[d]); + c = decodeMulc(lvl2dim[d]); + ii = decodeMuli(lvl2dim[d]); + return true; + } + return false; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp index f5890ebb6f3ff..40805a179d4b3 100644 --- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp @@ -24,8 +24,8 @@ SparseTensorStorageBase::SparseTensorStorageBase( // NOLINT : dimSizes(dimSizes, dimSizes + dimRank), lvlSizes(lvlSizes, lvlSizes + lvlRank), lvlTypes(lvlTypes, lvlTypes + lvlRank), - dim2lvlVec(dim2lvl, dim2lvl + dimRank), - lvl2dimVec(lvl2dim, lvl2dim + lvlRank), + dim2lvlVec(dim2lvl, dim2lvl + lvlRank), + lvl2dimVec(lvl2dim, lvl2dim + dimRank), map(dimRank, lvlRank, dim2lvlVec.data(), lvl2dimVec.data()) { assert(dimSizes && lvlSizes && lvlTypes && dim2lvl && lvl2dim); // Validate dim-indexed parameters. diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index 36d888a08de6d..7a6756e689b27 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -185,8 +185,8 @@ void *_mlir_ciface_newSparseTensor( // NOLINT const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef); const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef); ASSERT_USIZE_EQ(lvlTypesRef, lvlRank); - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); - ASSERT_USIZE_EQ(lvl2dimRef, lvlRank); + ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); + ASSERT_USIZE_EQ(lvl2dimRef, dimRank); const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef); const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef); const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef); @@ -423,10 +423,10 @@ void _mlir_ciface_getSparseTensorReaderDimSizes( ASSERT_NO_STRIDE(cref); \ ASSERT_NO_STRIDE(vref); \ const uint64_t dimRank = reader.getRank(); \ - const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \ + const uint64_t lvlRank = MEMREF_GET_USIZE(dim2lvlRef); \ const uint64_t cSize = MEMREF_GET_USIZE(cref); \ const uint64_t vSize = MEMREF_GET_USIZE(vref); \ - ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ + ASSERT_USIZE_EQ(lvl2dimRef, dimRank); \ assert(cSize >= lvlRank * vSize); \ assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \ (void)dimRank; \ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir new file mode 100755 index 0000000000000..d0b5e77bd4a72 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir @@ -0,0 +1,85 @@ +//-------------------------------------------------------------------------------------------------- +// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. +// +// Set-up that's shared across all tests in this directory. In principle, this +// config could be moved to lit.local.cfg. However, there are downstream users that +// do not use these LIT config files. Hence why this is kept inline. +// +// DEFINE: %{sparse_compiler_opts} = enable-runtime-library=true +// DEFINE: %{sparse_compiler_opts_sve} = enable-arm-sve=true %{sparse_compiler_opts} +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts}" +// DEFINE: %{compile_sve} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts_sve}" +// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils +// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} +// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} +// +// DEFINE: %{env} = +//-------------------------------------------------------------------------------------------------- + +// REDEFINE: %{env} = TENSOR0="%mlir_src_dir/test/Integration/data/block.mtx" +// RUN: %{compile} | env %{env} %{run} | FileCheck %s +// +// TODO: enable! +// Do the same run, but now with direct IR generation. +// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false +// R_UN: %{compile} | env %{env} %{run} | FileCheck %s + +#BSR = #sparse_tensor.encoding<{ + map = (i, j) -> + ( i floordiv 2 : dense + , j floordiv 2 : compressed + , i mod 2 : dense + , j mod 2 : dense + ) +}> + +!Filename = !llvm.ptr + +// +// Example 2x2 block storage: +// +// +-----+-----+-----+ +-----+-----+-----+ +// | 1 2 | . . | 4 . | | 1 2 | | 4 0 | +// | . 3 | . . | . 5 | | 0 3 | | 0 5 | +// +-----+-----+-----+ => +-----+-----+-----+ +// | . . | 6 7 | . . | | | 6 7 | | +// | . . | 8 . | . . | | | 8 0 | | +// +-----+-----+-----+ +-----+-----+-----+ +// +// Stored as: +// +// positions[1] : 0 2 3 +// coordinates[1] : 0 2 1 +// values : 1.000000 2.000000 0.000000 3.000000 4.000000 0.000000 0.000000 5.000000 6.000000 7.000000 8.000000 0.000000 +// +module { + + func.func private @getTensorFilename(index) -> (!Filename) + + func.func @entry() { + %c0 = arith.constant 0 : index + %f0 = arith.constant 0.0 : f64 + + %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename) + %A = sparse_tensor.new %fileName : !Filename to tensor + + // CHECK: ( 0, 2, 3 ) + // CHECK-NEXT: ( 0, 2, 1 ) + // CHECK-NEXT: ( 1, 2, 0, 3, 4, 0, 0, 5, 6, 7, 8, 0 ) + %pos = sparse_tensor.positions %A {level = 1 : index } : tensor to memref + %vecp = vector.transfer_read %pos[%c0], %c0 : memref, vector<3xindex> + vector.print %vecp : vector<3xindex> + %crd = sparse_tensor.coordinates %A {level = 1 : index } : tensor to memref + %vecc = vector.transfer_read %crd[%c0], %c0 : memref, vector<3xindex> + vector.print %vecc : vector<3xindex> + %val = sparse_tensor.values %A : tensor to memref + %vecv = vector.transfer_read %val[%c0], %f0 : memref, vector<12xf64> + vector.print %vecv : vector<12xf64> + + // Release the resources. + bufferization.dealloc_tensor %A: tensor + + return + } +} diff --git a/mlir/test/Integration/data/block.mtx b/mlir/test/Integration/data/block.mtx new file mode 100755 index 0000000000000..9bb3ea7d50a10 --- /dev/null +++ b/mlir/test/Integration/data/block.mtx @@ -0,0 +1,10 @@ +%%MatrixMarket matrix coordinate real general +4 6 8 +1 1 1.0 +1 2 2.0 +1 5 4.0 +2 2 3.0 +2 6 5.0 +3 3 6.0 +3 4 7.0 +4 3 8.0