Upstream MLIR PyTACO implementation.

bixia1 · bixia1 · commit b7fd91c84b4e · 2022-01-21T08:38:36.000-08:00
Add TACO tests to test/Integration/Dialect/SparseTensor/taco. Add the MLIR PyTACO implementation as tools under the directory. Reviewed By: aartbik, mehdi_amini Differential Revision: https://reviews.llvm.org/D117260
diff --git a/mlir/python/requirements.txt b/mlir/python/requirements.txt
@@ -1,3 +1,4 @@
 numpy
 pybind11>=2.8.0
 PyYAML
+dataclasses
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/README.md b/mlir/test/Integration/Dialect/SparseTensor/taco/README.md
@@ -0,0 +1,27 @@
+# MLIR-PyTACO: Implementing PyTACO with MLIR
+
+TACO (http://tensor-compiler.org/) is a tensor algebra compiler. TACO defines
+PyTACO, a domain specific language in Python, for writing tensor algebra
+applications.
+
+This directory contains the implementation of PyTACO using MLIR. In particular,
+we implement a Python layer that accepts the PyTACO language, generates MLIR
+linalg.generic OPs with sparse tensor annotation to represent the tensor
+computation, and invokes the MLIR sparse tensor code generator
+(https://mlir.llvm.org/docs/Dialects/SparseTensorOps/) as well as other MLIR
+compilation passes to generate an executable. Then, we invoke the MLIR execution
+engine to execute the program and pass the result back to the Python layer.
+
+As can be seen from the tests in this directory, in order to port a PyTACO
+program to MLIR-PyTACO, we basically only need to replace this line that imports
+PyTACO:
+
+```python
+import pytaco as pt
+```
+
+with this line to import MLIR-PyTACO:
+
+```python
+from tools import mlir_pytaco_api as pt
+```
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_A.tns b/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_A.tns
@@ -0,0 +1,50 @@
+1 1 12
+1 2 12
+1 3 12
+1 4 12
+1 5 12
+1 6 12
+1 7 12
+1 8 12
+1 9 12
+1 10 12
+1 11 12
+1 12 12
+1 13 12
+1 14 12
+1 15 12
+1 16 12
+1 17 12
+1 18 12
+1 19 12
+1 20 12
+1 21 12
+1 22 12
+1 23 12
+1 24 12
+1 25 12
+2 1 6
+2 2 6
+2 3 6
+2 4 6
+2 5 6
+2 6 6
+2 7 6
+2 8 6
+2 9 6
+2 10 6
+2 11 6
+2 12 6
+2 13 6
+2 14 6
+2 15 6
+2 16 6
+2 17 6
+2 18 6
+2 19 6
+2 20 6
+2 21 6
+2 22 6
+2 23 6
+2 24 6
+2 25 6
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_y.tns b/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_y.tns
@@ -0,0 +1,4 @@
+# See http://frostt.io/tensors/file-formats.html for FROSTT (.tns) format
+1 37102
+2 -20.4138
+3 804927
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns b/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns
@@ -0,0 +1,5 @@
+1 1 1 1.0
+1 2 2 2.0
+1 3 4 3.0
+2 1 1 1.0
+2 4 3 2.0
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/pwtk.mtx b/mlir/test/Integration/Dialect/SparseTensor/taco/data/pwtk.mtx
@@ -0,0 +1,11 @@
+%%MatrixMarket matrix coordinate real symmetric
+%-------------------------------------------------------------------------------
+% To download a matrix for a real world application
+% https://math.nist.gov/MatrixMarket/
+%-------------------------------------------------------------------------------
+3 3 5
+1 1 37423.0879671
+2 1 -22.4050781162
+3 1 -300.654980157
+3 2 -.00869762944058
+3 3 805225.750212
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py
@@ -0,0 +1,53 @@
+# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s
+
+import numpy as np
+import os
+import sys
+import tempfile
+
+_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_PATH)
+from tools import mlir_pytaco_api as pt
+
+###### This PyTACO part is taken from the TACO open-source project. ######
+# See http://tensor-compiler.org/docs/data_analytics/index.html.
+
+compressed = pt.compressed
+dense = pt.dense
+
+# Define formats for storing the sparse tensor and dense matrices.
+csf = pt.format([compressed, compressed, compressed])
+rm = pt.format([dense, dense])
+
+# Load a sparse three-dimensional tensor from file (stored in the FROSTT
+# format) and store it as a compressed sparse fiber tensor. We use a small
+# tensor for the purpose of testing. To run the program using the data from
+# the real application, please download the data from:
+# http://frostt.io/tensors/nell-2/
+B = pt.read(os.path.join(_SCRIPT_PATH, "data/nell-2.tns"), csf)
+
+# These two lines have been modified from the original program to use static
+# data to support result comparison.
+C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float64))
+D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float64))
+
+# Declare the result to be a dense matrix.
+A = pt.tensor([B.shape[0], 25], rm)
+
+# Declare index vars.
+i, j, k, l = pt.get_index_vars(4)
+
+# Define the MTTKRP computation.
+A[i, j] = B[i, k, l] * D[l, j] * C[k, j]
+
+##########################################################################
+
+# CHECK: Compare result True
+# Perform the MTTKRP computation and write the result to file.
+with tempfile.TemporaryDirectory() as test_dir:
+  actual_file = os.path.join(test_dir, "A.tns")
+  pt.write(actual_file, A)
+  actual = np.loadtxt(actual_file, np.float64)
+  expected = np.loadtxt(
+      os.path.join(_SCRIPT_PATH, "data/gold_A.tns"), np.float64)
+  print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}")
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py
@@ -0,0 +1,54 @@
+# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s
+
+import numpy as np
+import os
+import sys
+import tempfile
+
+_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_PATH)
+from tools import mlir_pytaco_api as pt
+
+###### This PyTACO part is taken from the TACO open-source project. ######
+# See http://tensor-compiler.org/docs/scientific_computing/index.html.
+
+compressed = pt.compressed
+dense = pt.dense
+
+# Define formats for storing the sparse matrix and dense vectors.
+csr = pt.format([dense, compressed])
+dv = pt.format([dense])
+
+# Load a sparse matrix stored in the matrix market format) and store it
+# as a CSR matrix.  The matrix in this test is a reduced version of the data
+# downloaded from here:
+# https://www.cise.ufl.edu/research/sparse/MM/Boeing/pwtk.tar.gz
+# In order to run the program using the matrix above, you can download the
+# matrix and replace this path to the actual path to the file.
+A = pt.read(os.path.join(_SCRIPT_PATH, "data/pwtk.mtx"), csr)
+
+# These two lines have been modified from the original program to use static
+# data to support result comparison.
+x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float64))
+z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float64))
+
+# Declare the result to be a dense vector
+y = pt.tensor([A.shape[0]], dv)
+
+# Declare index vars
+i, j = pt.get_index_vars(2)
+
+# Define the SpMV computation
+y[i] = A[i, j] * x[j] + z[i]
+
+##########################################################################
+
+# CHECK: Compare result True
+# Perform the SpMV computation and write the result to file
+with tempfile.TemporaryDirectory() as test_dir:
+  actual_file = os.path.join(test_dir, "y.tns")
+  pt.write(actual_file, y)
+  actual = np.loadtxt(actual_file, np.float64)
+  expected = np.loadtxt(
+      os.path.join(_SCRIPT_PATH, "data/gold_y.tns"), np.float64)
+  print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}")
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py
@@ -0,0 +1,30 @@
+# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s
+
+import os
+import sys
+
+_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_PATH)
+from tools import mlir_pytaco_api as pt
+
+compressed = pt.compressed
+dense = pt.dense
+
+# Ensure that we can run an unmodified PyTACO program with a simple tensor
+# algebra expression using tensor index notation, and produce the expected
+# result.
+i, j = pt.get_index_vars(2)
+A = pt.tensor([2, 3])
+B = pt.tensor([2, 3])
+C = pt.tensor([2, 3])
+D = pt.tensor([2, 3], dense)
+A.insert([0, 1], 10)
+A.insert([1, 2], 40)
+B.insert([0, 0], 20)
+B.insert([1, 2], 30)
+C.insert([0, 1], 5)
+C.insert([1, 2], 7)
+D[i, j] = A[i, j] + B[i, j] - C[i, j]
+
+# CHECK: [20. 5. 0. 0. 0. 63.]
+print(D.to_array().reshape(6))
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/lit.local.cfg
@@ -0,0 +1,2 @@
+# Files in this directory are tools, not tests.
+config.unsupported = True
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_api.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_api.py
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py

-Original file line number
+Diff line change
@@ @@ -1,3 +1,4 @@ @@
 numpy
 pybind11>=2.8.0
 PyYAML
 +dataclasses
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# Files in this directory are tools, not tests.`
	`2`	`+config.unsupported = True`