BibTeX
@INPROCEEDINGS{
Moses2021RMA,
author = "Moses, William S. and Churavy, Valentin and Paehler, Ludger and
H\"{u}ckelheim, Jan and Narayanan, Sri Hari Krishna and Schanen, Michel and Doerfert,
Johannes",
title = "Reverse-Mode Automatic Differentiation and Optimization of {GPU} Kernels via Enzyme",
year = "2021",
isbn = "9781450384421",
publisher = "Association for Computing Machinery",
address = "New York, NY, USA",
url = "https://doi.org/10.1145/3458817.3476165",
doi = "10.1145/3458817.3476165",
abstract = "Computing derivatives is key to many algorithms in scientific computing and machine
learning such as optimization, uncertainty quantification, and stability analysis. Enzyme is a LLVM
compiler plugin that performs reverse-mode automatic differentiation (AD) and thus generates high
performance gradients of programs in languages including C/C++, Fortran, Julia, and Rust. Prior to
this work, Enzyme and other AD tools were not capable of generating gradients of GPU kernels. Our
paper presents a combination of novel techniques that make Enzyme the first fully automatic
reversemode AD tool to generate gradients of GPU kernels. Since unlike other tools Enzyme performs
automatic differentiation within a general-purpose compiler, we are able to introduce several novel
GPU and AD-specific optimizations. To show the generality and efficiency of our approach, we compute
gradients of five GPU-based HPC applications, executed on NVIDIA and AMD GPUs. All benchmarks run
within an order of magnitude of the original program's execution time. Without GPU and
AD-specific optimizations, gradients of GPU kernels either fail to run from a lack of resources or
have infeasible overhead. Finally, we demonstrate that increasing the problem size by either
increasing the number of threads or increasing the work per thread, does not substantially impact
the overhead from differentiation.",
booktitle = "Proceedings of the International Conference for High Performance Computing,
Networking, Storage and Analysis",
articleno = "61",
numpages = "16",
keywords = "AD, GPU, HPC, LLVM, automatic differentiation, ROCm, CUDA",
location = "St. Louis, Missouri",
series = "SC '21",
ad_tools = "Enzyme",
ad_theotech = "Parallelism"
}
|