Abstract

We show how to build a compiler for a sparse array language that supports shape operators such as reshaping or concatenating arrays, in addition to compute operators. Existing sparse array programming systems implement generic shape operators for only some sparse data structures, reduce shape operators on other data structures to those, and do not support fusion. Our system compiles sparse array expressions to code that efficiently iterates over reshaped views of irregular sparse data structures, without needing to materialize temporary storage for intermediates. Our evaluation shows that our approach generates sparse array code competitive with popular sparse array libraries: our generated shape operators achieve geometric mean speed-ups of 1.66x-15.3x when compared to hand-written kernels in scipy.sparse and 1.67x-651x when compared to generic implementations in pydata/sparse. For operators that require data structure conversions in these libraries, our generated code achieves geometric mean speed-ups of 7.29x-13.0x when compared to scipy.sparse and 21.3x-511x when compared to pydata/sparse. Finally, our evaluation demonstrates that fusing shape and compute operators improves the performance of several expressions by geometric mean speed-ups of 1.22x-2.23x.

Article

pdf

ACM Digital Library

Code

Code is publicly available here.

BibTeX

@inproceedings{root2024burrito,
author = {Root, Alexander J and Yan, Bobby and Liu, Peiming and Gyurgyik, Christophe and Bik, Aart J.C. and Kjolstad, Fredrik},
title = {Compilation of Shape Operators on Sparse Arrays},
year = {2024},
issue_date = {October 2024},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {8},
number = {OOPSLA2},
url = {https://doi.org/10.1145/3689752},
doi = {10.1145/3689752},
abstract = {We show how to build a compiler for a sparse array language that supports shape operators such as reshaping or concatenating arrays, in addition to compute operators. Existing sparse array programming systems implement generic shape operators for only some sparse data structures, reduce shape operators on other data structures to those, and do not support fusion. Our system compiles sparse array expressions to code that efficiently iterates over reshaped views of irregular sparse data structures, without needing to materialize temporary storage for intermediates. Our evaluation shows that our approach generates sparse array code competitive with popular sparse array libraries: our generated shape operators achieve geometric mean speed-ups of 1.66x-15.3x when compared to hand-written kernels in scipy.sparse and 1.67x-651x when compared to generic implementations in pydata/sparse. For operators that require data structure conversions in these libraries, our generated code achieves geometric mean speed-ups of 7.29x-13.0x when compared to scipy.sparse and 21.3x-511x when compared to pydata/sparse. Finally, our evaluation demonstrates that fusing shape and compute operators improves the performance of several expressions by geometric mean speed-ups of 1.22x-2.23x.},
journal = {Proc. ACM Program. Lang.},
month = oct,
articleno = {312},
numpages = {27},
keywords = {sparse array programming, sparse data structures, sparse iteration theory}
}