Use upstream math dialect polynomial approximation pass (#5068)

- Note the approximations don't have finite math assumption, so they are
on by default.
diff --git a/iree/compiler/Conversion/LLVMToLLVM/BUILD b/iree/compiler/Conversion/LLVMToLLVM/BUILD
deleted file mode 100644
index 1656647..0000000
--- a/iree/compiler/Conversion/LLVMToLLVM/BUILD
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-package(
-    default_visibility = ["//visibility:public"],
-    features = ["layering_check"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-cc_library(
-    name = "LLVMToLLVM",
-    srcs = [
-        "FastExpConversion.cpp",
-    ],
-    hdrs = [
-        "Passes.h",
-    ],
-    deps = [
-        "@llvm-project//llvm:Support",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:LLVMDialect",
-        "@llvm-project//mlir:Pass",
-        "@llvm-project//mlir:Transforms",
-    ],
-)
diff --git a/iree/compiler/Conversion/LLVMToLLVM/CMakeLists.txt b/iree/compiler/Conversion/LLVMToLLVM/CMakeLists.txt
deleted file mode 100644
index 583a8fb..0000000
--- a/iree/compiler/Conversion/LLVMToLLVM/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-################################################################################
-# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
-# iree/compiler/Conversion/LLVMToLLVM/BUILD                                    #
-#                                                                              #
-# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
-# CMake-only content.                                                          #
-#                                                                              #
-# To disable autogeneration for this file entirely, delete this header.        #
-################################################################################
-
-iree_add_all_subdirs()
-
-iree_cc_library(
-  NAME
-    LLVMToLLVM
-  HDRS
-    "Passes.h"
-  SRCS
-    "FastExpConversion.cpp"
-  DEPS
-    LLVMSupport
-    MLIRIR
-    MLIRLLVMIR
-    MLIRPass
-    MLIRTransforms
-  PUBLIC
-)
-
-### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/compiler/Conversion/LLVMToLLVM/FastExpConversion.cpp b/iree/compiler/Conversion/LLVMToLLVM/FastExpConversion.cpp
deleted file mode 100644
index df84517..0000000
--- a/iree/compiler/Conversion/LLVMToLLVM/FastExpConversion.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace {
-
-// Fast polynomial approximation of exp(x) using its reduced range exp(y)
-// where y is in the range [0, ln(2)], let y = x - floor(x / ln(2)) * ln(2)
-// = x - k * ln(2), exp(x) = exp(y) * 2^k. exp(y) is computed with 4th degree
-// polyomial: exp(y) = c0 + c1 * y + c2 * y^2 + c3 * y^3 + c4 * y^4
-struct FastExpConversionPattern : public OpRewritePattern<LLVM::ExpOp> {
-  using OpRewritePattern<LLVM::ExpOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(LLVM::ExpOp op,
-                                PatternRewriter &rewriter) const override {
-    constexpr float ln2Const = 0.693147181f;
-    constexpr float ln2InvConst = 1.44269504f;
-
-    // Least squares polynomial fit computed :
-    // cValues = np.polyfit(np.linspace(0, math.log(2), 10000), np.exp(x), 4)
-    constexpr float cValues[5] = {0.05924867f, 0.15514645f, 0.50308552f,
-                                  0.99968939f, 1.00000721531f};
-    auto loc = op.getLoc();
-    Value x = op.getOperand();
-
-    auto floatType = Float32Type::get(rewriter.getContext());
-    auto i32Type = IntegerType::get(rewriter.getContext(), 32);
-
-    Value ln2 = rewriter.create<LLVM::ConstantOp>(
-        loc, floatType, rewriter.getF32FloatAttr(ln2Const));
-    Value ln2Inv = rewriter.create<LLVM::ConstantOp>(
-        loc, floatType, rewriter.getF32FloatAttr(ln2InvConst));
-
-    // Compute reduced range input y = x - floor(x / ln(2)) * ln(2)
-    Value xL2Inv = rewriter.create<LLVM::FMulOp>(loc, floatType, x, ln2Inv);
-    Value kF32 = rewriter.create<LLVM::FFloorOp>(loc, floatType, xL2Inv);
-    Value kLn2 = rewriter.create<LLVM::FMulOp>(loc, floatType, kF32, ln2);
-    Value y = rewriter.create<LLVM::FSubOp>(loc, floatType, x, kLn2);
-
-    SmallVector<Value, 4> PConst(5);
-    for (int i = 0; i < 5; ++i) {
-      PConst[i] = rewriter.create<LLVM::ConstantOp>(
-          loc, floatType, rewriter.getF32FloatAttr(cValues[i]));
-    }
-    // Evaluate exp(y) = sum(c[i] * y**i, i)
-    Value expY = rewriter.create<LLVM::FMulOp>(loc, floatType, y, PConst[0]);
-    expY = rewriter.create<LLVM::FAddOp>(loc, floatType, expY, PConst[1]);
-    expY = rewriter.create<LLVM::FMulOp>(loc, floatType, expY, y);
-    expY = rewriter.create<LLVM::FAddOp>(loc, floatType, expY, PConst[2]);
-    expY = rewriter.create<LLVM::FMulOp>(loc, floatType, expY, y);
-    expY = rewriter.create<LLVM::FAddOp>(loc, floatType, expY, PConst[3]);
-    expY = rewriter.create<LLVM::FMulOp>(loc, floatType, expY, y);
-    expY = rewriter.create<LLVM::FAddOp>(loc, floatType, expY, PConst[4]);
-
-    // Compute exp2(k) with integer bitshift:
-    // exp2(k) = f32_bitcast((127 + k) << 23)
-    Value fPBias = rewriter.create<LLVM::ConstantOp>(
-        loc, i32Type, rewriter.getI32IntegerAttr(127));
-    Value k = rewriter.create<LLVM::FPToSIOp>(loc, i32Type, kF32);
-    Value kPlusfPBias = rewriter.create<LLVM::AddOp>(loc, i32Type, k, fPBias);
-    Value shiftConst = rewriter.create<LLVM::ConstantOp>(
-        loc, i32Type, rewriter.getI32IntegerAttr(23));
-    Value twoPowkI =
-        rewriter.create<LLVM::ShlOp>(loc, i32Type, kPlusfPBias, shiftConst);
-    Value twoPowk = rewriter.create<LLVM::BitcastOp>(loc, floatType, twoPowkI);
-    expY = rewriter.create<LLVM::FMulOp>(loc, floatType, expY, twoPowk);
-    rewriter.replaceOp(op, {expY});
-    // TODO(ataei): Handle overflow and underflow cases (e.g |k| > 128).
-    return success();
-  }
-};
-
-struct FastExpConversionPass
-    : public PassWrapper<FastExpConversionPass, OperationPass<ModuleOp>> {
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<LLVM::LLVMDialect>();
-  }
-  void runOnOperation() override;
-};
-
-}  // namespace
-
-void populateFastExpConversionPatterns(OwningRewritePatternList &patterns,
-                                       MLIRContext *context) {
-  patterns.insert<FastExpConversionPattern>(context);
-}
-
-void FastExpConversionPass::runOnOperation() {
-  auto moduleOp = getOperation();
-  auto context = moduleOp.getContext();
-  OwningRewritePatternList patterns;
-  populateFastExpConversionPatterns(patterns, context);
-  (void)applyPatternsAndFoldGreedily(moduleOp, std::move(patterns));
-}
-
-std::unique_ptr<OperationPass<ModuleOp>>
-createFastExpApproximationConversionPass() {
-  return std::make_unique<FastExpConversionPass>();
-}
-
-static PassRegistration<OperationPass<ModuleOp>> pass(
-    "iree-codegen-linalg-to-llvm-fast-exp-conversion-pass",
-    "Convert llvm.intr.exp into its fast polynomial approximation version",
-    [] { return std::make_unique<FastExpConversionPass>(); });
-
-}  // namespace iree_compiler
-}  // namespace mlir
diff --git a/iree/compiler/Conversion/LLVMToLLVM/Passes.h b/iree/compiler/Conversion/LLVMToLLVM/Passes.h
deleted file mode 100644
index e40fe90..0000000
--- a/iree/compiler/Conversion/LLVMToLLVM/Passes.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef IREE_COMPILER_CONVERSION_LLVMTOLLVM_PASSES_H_
-#define IREE_COMPILER_CONVERSION_LLVMTOLLVM_PASSES_H_
-
-#include "mlir/Pass/Pass.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-// Creates a pass to rewrite llvm.intr.exp using its reduced range polynomial
-// approximation.
-std::unique_ptr<OperationPass<ModuleOp>>
-createFastExpApproximationConversionPass();
-
-}  // namespace iree_compiler
-}  // namespace mlir
-#endif  // IREE_COMPILER_CONVERSION_LLVMTOLLVM_PASSES_H_
diff --git a/iree/compiler/Conversion/LinalgToLLVM/BUILD b/iree/compiler/Conversion/LinalgToLLVM/BUILD
index ce8cd2b..cbbeb45 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/BUILD
+++ b/iree/compiler/Conversion/LinalgToLLVM/BUILD
@@ -40,7 +40,6 @@
         "//iree/compiler/Conversion/Common",
         "//iree/compiler/Conversion/HLOToHLO",
         "//iree/compiler/Conversion/HLOToLinalg",
-        "//iree/compiler/Conversion/LLVMToLLVM",
         "//iree/compiler/Dialect/HAL/IR",
         "//iree/compiler/Dialect/HAL/IR:HALDialect",
         "//iree/compiler/Dialect/IREE/IR",
diff --git a/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt b/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
index 32bb64d..cc3cccd 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
+++ b/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
@@ -51,7 +51,6 @@
     iree::compiler::Conversion::Common
     iree::compiler::Conversion::HLOToHLO
     iree::compiler::Conversion::HLOToLinalg
-    iree::compiler::Conversion::LLVMToLLVM
     iree::compiler::Dialect::HAL::IR
     iree::compiler::Dialect::HAL::IR::HALDialect
     iree::compiler::Dialect::IREE::IR
diff --git a/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp b/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
index 140a7e3..37e1bc0 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
@@ -642,6 +642,13 @@
                                        std::move(vectorToLoopsPatterns));
   }
 
+  // math dialect elementry functions -> polynomial form.
+  {
+    OwningRewritePatternList mathPatterns;
+    populateMathPolynomialApproximationPatterns(mathPatterns, &getContext());
+    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(mathPatterns));
+  }
+
   auto module = getOperation();
 
   LLVMTypeConverter converter(&getContext());
diff --git a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
index c080d51..2fa6794 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
@@ -17,7 +17,6 @@
 #include "iree/compiler/Conversion/Common/Attributes.h"
 #include "iree/compiler/Conversion/Common/Passes.h"
 #include "iree/compiler/Conversion/HLOToHLO/Passes.h"
-#include "iree/compiler/Conversion/LLVMToLLVM/Passes.h"
 #include "iree/compiler/Conversion/LinalgToLLVM/Passes.h"
 #include "iree/compiler/Dialect/Shape/Transforms/Passes.h"
 #include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
@@ -40,12 +39,6 @@
                    "linag.matmul"),
     llvm::cl::init(false));
 
-static llvm::cl::opt<bool> fastExpConversion(
-    "iree-codegen-linalg-to-llvm-fast-exp",
-    llvm::cl::desc("If true convert llvm.intr.exp into its range reduced "
-                   "polynomial approximation."),
-    llvm::cl::init(false));
-
 void addLinalgToLLVMPasses(OpPassManager &passManager) {
   // Distribute linalg op among a 3d grid of parallel threads. Tile each
   // workgroup thread memory then vectorize the linalg op.
@@ -85,11 +78,6 @@
 
   nestedModulePM.addPass(createCanonicalizerPass());
   nestedModulePM.addPass(createCSEPass());
-
-  // Approximate llvm.intr.exp with a 4-th order ploynmial in range[0, ln2].
-  if (fastExpConversion) {
-    nestedModulePM.addPass(createFastExpApproximationConversionPass());
-  }
 }
 
 void buildLLVMTransformPassPipeline(OpPassManager &passManager) {
diff --git a/iree/test/e2e/llvm_specific/BUILD b/iree/test/e2e/llvm_specific/BUILD
index dffd947..9c7d633 100644
--- a/iree/test/e2e/llvm_specific/BUILD
+++ b/iree/test/e2e/llvm_specific/BUILD
@@ -34,15 +34,3 @@
     driver = "dylib",
     target_backend = "dylib-llvm-aot",
 )
-
-iree_check_single_backend_test_suite(
-    name = "check_llvm-aot-exponential_fast",
-    srcs = [
-        "exponential.mlir",
-    ],
-    compiler_flags = [
-        "-iree-codegen-linalg-to-llvm-fast-exp=true",
-    ],
-    driver = "dylib",
-    target_backend = "dylib-llvm-aot",
-)
diff --git a/iree/test/e2e/llvm_specific/CMakeLists.txt b/iree/test/e2e/llvm_specific/CMakeLists.txt
index 6627261..b7f1add 100644
--- a/iree/test/e2e/llvm_specific/CMakeLists.txt
+++ b/iree/test/e2e/llvm_specific/CMakeLists.txt
@@ -23,17 +23,4 @@
     "-iree-codegen-linalg-to-llvm-conv-img2col-conversion=true"
 )
 
-iree_check_single_backend_test_suite(
-  NAME
-    check_llvm-aot-exponential_fast
-  SRCS
-    "exponential.mlir"
-  TARGET_BACKEND
-    "dylib-llvm-aot"
-  DRIVER
-    "dylib"
-  COMPILER_FLAGS
-    "-iree-codegen-linalg-to-llvm-fast-exp=true"
-)
-
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/test/e2e/llvm_specific/exponential.mlir b/iree/test/e2e/llvm_specific/exponential.mlir
deleted file mode 100644
index 6e91326..0000000
--- a/iree/test/e2e/llvm_specific/exponential.mlir
+++ /dev/null
@@ -1,27 +0,0 @@
-func @tensor() attributes { iree.module.export } {
-  %input = iree.unfoldable_constant dense<[0.0, 1.0, 2.0, 4.0]> : tensor<4xf32>
-  %result = "mhlo.exponential"(%input) : (tensor<4xf32>) -> tensor<4xf32>
-  check.expect_almost_eq_const(%result, dense<[1.0, 2.7183, 7.3891, 54.5981]> : tensor<4xf32>) : tensor<4xf32>
-  return
-}
-
-func @scalar() attributes { iree.module.export } {
-  %input = iree.unfoldable_constant dense<1.0> : tensor<f32>
-  %result = "mhlo.exponential"(%input) : (tensor<f32>) -> tensor<f32>
-  check.expect_almost_eq_const(%result, dense<2.7183> : tensor<f32>) : tensor<f32>
-  return
-}
-
-func @double() attributes { iree.module.export } {
-  %input = iree.unfoldable_constant dense<1.0> : tensor<f64>
-  %result = "mhlo.exponential"(%input) : (tensor<f64>) -> tensor<f64>
-  check.expect_almost_eq_const(%result, dense<2.7183> : tensor<f64>) : tensor<f64>
-  return
-}
-
-func @negative() attributes { iree.module.export } {
-  %input = iree.unfoldable_constant dense<-1.0> : tensor<f32>
-  %result = "mhlo.exponential"(%input) : (tensor<f32>) -> tensor<f32>
-  check.expect_almost_eq_const(%result, dense<0.367879> : tensor<f32>) : tensor<f32>
-  return
-}