diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index 08887abcd0f10..c65a27567d59d 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -31,6 +31,7 @@
 // NOTE: Client code will need to include "mlir/ExecutionEngine/Float16bits.h"
 // if they want to use the `MLIR_SPARSETENSOR_FOREVERY_V` macro.
 
+#include <cassert>
 #include <cinttypes>
 #include <complex>
 #include <optional>
@@ -443,6 +444,56 @@ static_assert((isUniqueDLT(DimLevelType::Dense) &&
                !isUniqueDLT(DimLevelType::LooseCompressedNuNo)),
               "isUniqueDLT definition is broken");
 
+/// Bit manipulations for affine encoding.
+///
+/// Note that because the indices in the mappings refer to dimensions
+/// and levels (and *not* the sizes of these dimensions and levels), the
+/// 64-bit encoding gives ample room for a compact encoding of affine
+/// operations in the higher bits. Pure permutations still allow for
+/// 60-bit indices. But non-permutations reserve 20-bits for the
+/// potential three components (index i, constant, index ii).
+///
+/// The compact encoding is as follows:
+///
+///  0xffffffffffffffff
+/// |0000      |                        60-bit idx| e.g. i
+/// |0001 floor|           20-bit const|20-bit idx| e.g. i floor c
+/// |0010 mod  |           20-bit const|20-bit idx| e.g. i mod c
+/// |0011 mul  |20-bit idx|20-bit const|20-bit idx| e.g. i + c * ii
+///
+/// This encoding provides sufficient generality for currently supported
+/// sparse tensor types. To generalize this more, we will need to provide
+/// a broader encoding scheme for affine functions. Also, the library
+/// encoding may be replaced with pure "direct-IR" code in the future.
+///
+constexpr uint64_t encodeDim(uint64_t i, uint64_t cf, uint64_t cm) {
+  if (cf != 0) {
+    assert(cf <= 0xfffff && cm == 0 && i <= 0xfffff);
+    return (0x01L << 60) | (cf << 20) | i;
+  }
+  if (cm != 0) {
+    assert(cm <= 0xfffff && i <= 0xfffff);
+    return (0x02L << 60) | (cm << 20) | i;
+  }
+  assert(i <= 0x0fffffffffffffffu);
+  return i;
+}
+constexpr uint64_t encodeLvl(uint64_t i, uint64_t c, uint64_t ii) {
+  if (c != 0) {
+    assert(c <= 0xfffff && ii <= 0xfffff && i <= 0xfffff);
+    return (0x03L << 60) | (c << 20) | (ii << 40) | i;
+  }
+  assert(i <= 0x0fffffffffffffffu);
+  return i;
+}
+constexpr bool isEncodedFloor(uint64_t v) { return (v >> 60) == 0x01; }
+constexpr bool isEncodedMod(uint64_t v) { return (v >> 60) == 0x02; }
+constexpr bool isEncodedMul(uint64_t v) { return (v >> 60) == 0x03; }
+constexpr uint64_t decodeIndex(uint64_t v) { return v & 0xfffffu; }
+constexpr uint64_t decodeConst(uint64_t v) { return (v >> 20) & 0xfffffu; }
+constexpr uint64_t decodeMulc(uint64_t v) { return (v >> 20) & 0xfffffu; }
+constexpr uint64_t decodeMuli(uint64_t v) { return (v >> 40) & 0xfffffu; }
+
 } // namespace sparse_tensor
 } // namespace mlir
 
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
index 22ae70a61d95e..5c09aa4e4b60c 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
@@ -38,13 +38,23 @@ class MapRef final {
   // Push forward maps from dimensions to levels.
   //
 
+  // Map from dimRank in to lvlRank out.
   template <typename T>
   inline void pushforward(const T *in, T *out) const {
     if (isPermutation) {
-      for (uint64_t i = 0; i < lvlRank; ++i)
-        out[i] = in[lvl2dim[i]];
+      for (uint64_t l = 0; l < lvlRank; l++) {
+        out[l] = in[dim2lvl[l]];
+      }
     } else {
-      assert(0 && "coming soon");
+      uint64_t i, c;
+      for (uint64_t l = 0; l < lvlRank; l++)
+        if (isFloor(l, i, c)) {
+          out[l] = in[i] / c;
+        } else if (isMod(l, i, c)) {
+          out[l] = in[i] % c;
+        } else {
+          out[l] = in[dim2lvl[l]];
+        }
     }
   }
 
@@ -52,13 +62,20 @@ class MapRef final {
   // Push backward maps from levels to dimensions.
   //
 
+  // Map from lvlRank in to dimRank out.
   template <typename T>
   inline void pushbackward(const T *in, T *out) const {
     if (isPermutation) {
-      for (uint64_t i = 0; i < dimRank; ++i)
-        out[i] = in[dim2lvl[i]];
+      for (uint64_t d = 0; d < dimRank; d++)
+        out[d] = in[lvl2dim[d]];
     } else {
-      assert(0 && "coming soon");
+      uint64_t i, c, ii;
+      for (uint64_t d = 0; d < dimRank; d++)
+        if (isMul(d, i, c, ii)) {
+          out[d] = in[i] + c * in[ii];
+        } else {
+          out[d] = in[lvl2dim[d]];
+        }
     }
   }
 
@@ -68,6 +85,10 @@ class MapRef final {
 private:
   bool isPermutationMap() const;
 
+  bool isFloor(uint64_t l, uint64_t &i, uint64_t &c) const;
+  bool isMod(uint64_t l, uint64_t &i, uint64_t &c) const;
+  bool isMul(uint64_t d, uint64_t &i, uint64_t &c, uint64_t &ii) const;
+
   const uint64_t dimRank;
   const uint64_t lvlRank;
   const uint64_t *const dim2lvl; // non-owning pointer
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index 298ff09883556..98b412c8ec9eb 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -688,25 +688,70 @@ Value sparse_tensor::genMapBuffers(OpBuilder &builder, Location loc,
     return dimSizesBuffer;
   }
   // Otherwise, some code needs to be generated to set up the buffers.
-  // TODO: use the lvl2dim once available and deal with non-permutations!
+  // This code deals with permutations as well as non-permutations that
+  // arise from rank changing blocking.
   const auto dimToLvl = stt.getDimToLvl();
-  assert(dimToLvl.isPermutation());
-  SmallVector<Value> dim2lvlValues(dimRank);
-  SmallVector<Value> lvl2dimValues(lvlRank);
+  SmallVector<Value> dim2lvlValues(lvlRank); // for each lvl, expr in dim vars
+  SmallVector<Value> lvl2dimValues(dimRank); // for each dim, expr in lvl vars
   SmallVector<Value> lvlSizesValues(lvlRank);
+  // Generate dim2lvl.
+  assert(lvlRank == dimToLvl.getNumResults());
   for (Level l = 0; l < lvlRank; l++) {
-    // The `d`th source variable occurs in the `l`th result position.
-    Dimension d = dimToLvl.getDimPosition(l);
-    Value lvl = constantIndex(builder, loc, l);
-    Value dim = constantIndex(builder, loc, d);
-    dim2lvlValues[d] = lvl;
-    lvl2dimValues[l] = dim;
-    if (stt.isDynamicDim(d))
-      lvlSizesValues[l] =
-          builder.create<memref::LoadOp>(loc, dimSizesBuffer, dim);
-    else
-      lvlSizesValues[l] = dimShapesValues[d];
+    AffineExpr exp = dimToLvl.getResult(l);
+    // We expect:
+    //    (1) l = d
+    //    (2) l = d / c
+    //    (3) l = d % c
+    Dimension d = 0;
+    uint64_t cf = 0, cm = 0;
+    switch (exp.getKind()) {
+    case AffineExprKind::DimId:
+      d = exp.cast<AffineDimExpr>().getPosition();
+      break;
+    case AffineExprKind::FloorDiv:
+      d = exp.cast<AffineBinaryOpExpr>()
+              .getLHS()
+              .cast<AffineDimExpr>()
+              .getPosition();
+      cf = exp.cast<AffineBinaryOpExpr>()
+               .getRHS()
+               .cast<AffineConstantExpr>()
+               .getValue();
+      break;
+    case AffineExprKind::Mod:
+      d = exp.cast<AffineBinaryOpExpr>()
+              .getLHS()
+              .cast<AffineDimExpr>()
+              .getPosition();
+      cm = exp.cast<AffineBinaryOpExpr>()
+               .getRHS()
+               .cast<AffineConstantExpr>()
+               .getValue();
+      break;
+    default:
+      llvm::report_fatal_error("unsupported dim2lvl in sparse tensor type");
+    }
+    dim2lvlValues[l] = constantIndex(builder, loc, encodeDim(d, cf, cm));
+    lvl2dimValues[d] = constantIndex(builder, loc, l); // FIXME, use lvlToDim
+    // Compute the level sizes.
+    //    (1) l = d        : size(d)
+    //    (2) l = d / c    : size(d) / c
+    //    (3) l = d % c    : c
+    Value lvlSz;
+    if (cm == 0) {
+      lvlSz = dimShapesValues[d];
+      if (stt.isDynamicDim(d))
+        lvlSz = builder.create<memref::LoadOp>(loc, dimSizesBuffer,
+                                               constantIndex(builder, loc, d));
+      if (cf != 0)
+        lvlSz = builder.create<arith::DivUIOp>(loc, lvlSz,
+                                               constantIndex(builder, loc, cf));
+    } else {
+      lvlSz = constantIndex(builder, loc, cm);
+    }
+    lvlSizesValues[l] = lvlSz;
   }
+  // Return buffers.
   dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues);
   lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues);
   return allocaBuffer(builder, loc, lvlSizesValues);
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp
index ee4d6fa0d34b4..ace6ac8152a29 100644
--- a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp
@@ -7,14 +7,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/ExecutionEngine/SparseTensor/MapRef.h"
+#include "mlir/Dialect/SparseTensor/IR/Enums.h"
 
 mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l,
                                     const uint64_t *l2d)
     : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d),
       isPermutation(isPermutationMap()) {
   if (isPermutation) {
-    for (uint64_t i = 0; i < dimRank; i++)
-      assert(lvl2dim[dim2lvl[i]] == i);
+    for (uint64_t l = 0; l < lvlRank; l++)
+      assert(lvl2dim[dim2lvl[l]] == l);
   }
 }
 
@@ -22,11 +23,42 @@ bool mlir::sparse_tensor::MapRef::isPermutationMap() const {
   if (dimRank != lvlRank)
     return false;
   std::vector<bool> seen(dimRank, false);
-  for (uint64_t i = 0; i < dimRank; i++) {
-    const uint64_t j = dim2lvl[i];
-    if (j >= dimRank || seen[j])
+  for (uint64_t l = 0; l < lvlRank; l++) {
+    const uint64_t d = dim2lvl[l];
+    if (d >= dimRank || seen[d])
       return false;
-    seen[j] = true;
+    seen[d] = true;
   }
   return true;
 }
+
+bool mlir::sparse_tensor::MapRef::isFloor(uint64_t l, uint64_t &i,
+                                          uint64_t &c) const {
+  if (isEncodedFloor(dim2lvl[l])) {
+    i = decodeIndex(dim2lvl[l]);
+    c = decodeConst(dim2lvl[l]);
+    return true;
+  }
+  return false;
+}
+
+bool mlir::sparse_tensor::MapRef::isMod(uint64_t l, uint64_t &i,
+                                        uint64_t &c) const {
+  if (isEncodedMod(dim2lvl[l])) {
+    i = decodeIndex(dim2lvl[l]);
+    c = decodeConst(dim2lvl[l]);
+    return true;
+  }
+  return false;
+}
+
+bool mlir::sparse_tensor::MapRef::isMul(uint64_t d, uint64_t &i, uint64_t &c,
+                                        uint64_t &ii) const {
+  if (isEncodedMul(lvl2dim[d])) {
+    i = decodeIndex(lvl2dim[d]);
+    c = decodeMulc(lvl2dim[d]);
+    ii = decodeMuli(lvl2dim[d]);
+    return true;
+  }
+  return false;
+}
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
index f5890ebb6f3ff..40805a179d4b3 100644
--- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
@@ -24,8 +24,8 @@ SparseTensorStorageBase::SparseTensorStorageBase( // NOLINT
     : dimSizes(dimSizes, dimSizes + dimRank),
       lvlSizes(lvlSizes, lvlSizes + lvlRank),
       lvlTypes(lvlTypes, lvlTypes + lvlRank),
-      dim2lvlVec(dim2lvl, dim2lvl + dimRank),
-      lvl2dimVec(lvl2dim, lvl2dim + lvlRank),
+      dim2lvlVec(dim2lvl, dim2lvl + lvlRank),
+      lvl2dimVec(lvl2dim, lvl2dim + dimRank),
       map(dimRank, lvlRank, dim2lvlVec.data(), lvl2dimVec.data()) {
   assert(dimSizes && lvlSizes && lvlTypes && dim2lvl && lvl2dim);
   // Validate dim-indexed parameters.
diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
index 36d888a08de6d..7a6756e689b27 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
@@ -185,8 +185,8 @@ void *_mlir_ciface_newSparseTensor( // NOLINT
   const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef);
   const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef);
   ASSERT_USIZE_EQ(lvlTypesRef, lvlRank);
-  ASSERT_USIZE_EQ(dim2lvlRef, dimRank);
-  ASSERT_USIZE_EQ(lvl2dimRef, lvlRank);
+  ASSERT_USIZE_EQ(dim2lvlRef, lvlRank);
+  ASSERT_USIZE_EQ(lvl2dimRef, dimRank);
   const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef);
   const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef);
   const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef);
@@ -423,10 +423,10 @@ void _mlir_ciface_getSparseTensorReaderDimSizes(
     ASSERT_NO_STRIDE(cref);                                                    \
     ASSERT_NO_STRIDE(vref);                                                    \
     const uint64_t dimRank = reader.getRank();                                 \
-    const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef);                     \
+    const uint64_t lvlRank = MEMREF_GET_USIZE(dim2lvlRef);                     \
     const uint64_t cSize = MEMREF_GET_USIZE(cref);                             \
     const uint64_t vSize = MEMREF_GET_USIZE(vref);                             \
-    ASSERT_USIZE_EQ(dim2lvlRef, dimRank);                                      \
+    ASSERT_USIZE_EQ(lvl2dimRef, dimRank);                                      \
     assert(cSize >= lvlRank * vSize);                                          \
     assert(vSize >= reader.getNSE() && "Not enough space in buffers");         \
     (void)dimRank;                                                             \
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
new file mode 100755
index 0000000000000..d0b5e77bd4a72
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
@@ -0,0 +1,85 @@
+//--------------------------------------------------------------------------------------------------
+// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
+//
+// Set-up that's shared across all tests in this directory. In principle, this
+// config could be moved to lit.local.cfg. However, there are downstream users that
+//  do not use these LIT config files. Hence why this is kept inline.
+//
+// DEFINE: %{sparse_compiler_opts} = enable-runtime-library=true
+// DEFINE: %{sparse_compiler_opts_sve} = enable-arm-sve=true %{sparse_compiler_opts}
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts}"
+// DEFINE: %{compile_sve} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts_sve}"
+// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
+// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
+// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
+//
+// DEFINE: %{env} =
+//--------------------------------------------------------------------------------------------------
+
+// REDEFINE: %{env} = TENSOR0="%mlir_src_dir/test/Integration/data/block.mtx"
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
+//
+// TODO: enable!
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false
+// R_UN: %{compile} | env %{env} %{run} | FileCheck %s
+
+#BSR = #sparse_tensor.encoding<{
+  map = (i, j) ->
+    ( i floordiv 2 : dense
+    , j floordiv 2 : compressed
+    , i mod 2 : dense
+    , j mod 2 : dense
+    )
+}>
+
+!Filename = !llvm.ptr<i8>
+
+//
+// Example 2x2 block storage:
+//
+//  +-----+-----+-----+    +-----+-----+-----+
+//  | 1 2 | . . | 4 . |    | 1 2 |     | 4 0 |
+//  | . 3 | . . | . 5 |    | 0 3 |     | 0 5 |
+//  +-----+-----+-----+ => +-----+-----+-----+
+//  | . . | 6 7 | . . |    |     | 6 7 |     |
+//  | . . | 8 . | . . |    |     | 8 0 |     |
+//  +-----+-----+-----+    +-----+-----+-----+
+//
+// Stored as:
+//
+//    positions[1]   : 0 2 3
+//    coordinates[1] : 0 2 1
+//    values         : 1.000000 2.000000 0.000000 3.000000 4.000000 0.000000 0.000000 5.000000 6.000000 7.000000 8.000000 0.000000
+//
+module {
+
+  func.func private @getTensorFilename(index) -> (!Filename)
+
+  func.func @entry() {
+    %c0 = arith.constant 0   : index
+    %f0 = arith.constant 0.0 : f64
+
+    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+    %A = sparse_tensor.new %fileName : !Filename to tensor<?x?xf64, #BSR>
+
+    // CHECK:      ( 0, 2, 3 )
+    // CHECK-NEXT: ( 0, 2, 1 )
+    // CHECK-NEXT: ( 1, 2, 0, 3, 4, 0, 0, 5, 6, 7, 8, 0 )
+    %pos = sparse_tensor.positions %A {level = 1 : index } : tensor<?x?xf64, #BSR> to memref<?xindex>
+    %vecp = vector.transfer_read %pos[%c0], %c0 : memref<?xindex>, vector<3xindex>
+    vector.print %vecp : vector<3xindex>
+    %crd = sparse_tensor.coordinates %A {level = 1 : index } : tensor<?x?xf64, #BSR> to memref<?xindex>
+    %vecc = vector.transfer_read %crd[%c0], %c0 : memref<?xindex>, vector<3xindex>
+    vector.print %vecc : vector<3xindex>
+    %val = sparse_tensor.values %A : tensor<?x?xf64, #BSR> to memref<?xf64>
+    %vecv = vector.transfer_read %val[%c0], %f0 : memref<?xf64>, vector<12xf64>
+    vector.print %vecv : vector<12xf64>
+
+    // Release the resources.
+    bufferization.dealloc_tensor %A: tensor<?x?xf64, #BSR>
+
+    return
+  }
+}
diff --git a/mlir/test/Integration/data/block.mtx b/mlir/test/Integration/data/block.mtx
new file mode 100755
index 0000000000000..9bb3ea7d50a10
--- /dev/null
+++ b/mlir/test/Integration/data/block.mtx
@@ -0,0 +1,10 @@
+%%MatrixMarket matrix coordinate real general
+4 6 8
+1 1 1.0
+1 2 2.0
+1 5 4.0
+2 2 3.0
+2 6 5.0
+3 3 6.0
+3 4 7.0
+4 3 8.0