Bump to  llvm/llvm-project@b959532 (#18384)

This PR contains revert of the following commits from upstream 

-
https://github.com/llvm/llvm-project/commit/b6603e1bf11dee4761e49af6581c8b8f074b705d
: Causes build failure in
https://github.com/llvm/torch-mlir/blob/main/lib/Dialect/Torch/Transforms/InlineGlobalSlots.cpp
-
https://github.com/iree-org/llvm-project/commit/d7073c527457dc0a71126381afb3c6f0efa1821c
: Causes various errors with EmitC and failure in
https://github.com/iree-org/iree/blob/main/compiler/src/iree/compiler/Codegen/Common/TypePropagationPass.cpp
-
https://github.com/llvm/llvm-project/commit/1387ba48a312b6e9b174d850f8c9a1322f44c623
: Causes failure in
https://github.com/iree-org/iree/blob/main/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir

---------

Signed-off-by: MaheshRavishankar <mahesh.ravishankar@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
index 086f1a4..a7f8804 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
@@ -144,6 +144,9 @@
       auto tilingOp = sliceOp.getSource().getDefiningOp<TilingInterface>();
       if (!tilingOp)
         continue;
+      if (isa<tensor::PadOp>(sliceOp.getSource().getDefiningOp())) {
+        continue;
+      }
       // Restrict to fully parallel ops for now for simplicity.
       auto isParallel = [](utils::IteratorType it) {
         return linalg::isParallelIterator(it);
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp
index 6380fa0..b801927 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp
@@ -117,11 +117,18 @@
   IRRewriter rewriter(context);
   SmallVector<OpFoldResult> tileSizesOfr =
       getAsIndexOpFoldResult(context, tileSizes);
+  scf::SCFTilingOptions tilingOptions;
+  tilingOptions.setTileSizes(tileSizesOfr);
+  scf::SCFTileAndFuseOptions tileAndFuseOptions;
+  tileAndFuseOptions.setTilingOptions(tilingOptions);
+  tileAndFuseOptions.setFusionControlFn(
+      [](tensor::ExtractSliceOp sliceOp, OpResult origProducer,
+         bool isDestinationOperand) -> std::tuple<bool, bool> {
+        return {!isa<tensor::PadOp>(origProducer.getOwner()), false};
+      });
   FailureOr<scf::SCFTileAndFuseResult> tileAndFuseResult =
-      scf::tileConsumerAndFuseProducersUsingSCF(
-          rewriter, consumerOp,
-          scf::SCFTileAndFuseOptions().setTilingOptions(
-              scf::SCFTilingOptions().setTileSizes(tileSizesOfr)));
+      scf::tileConsumerAndFuseProducersUsingSCF(rewriter, consumerOp,
+                                                tileAndFuseOptions);
 
   if (failed(tileAndFuseResult)) {
     return consumerOp.emitOpError("failed tiling and fusing producers");
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index 154ab37..659949a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -546,7 +546,7 @@
                                      /*scopeline=*/1,
                                      LLVM::DISubprogramFlags::Definition |
                                          LLVM::DISubprogramFlags::Optimized,
-                                     subroutineTypeAttr);
+                                     subroutineTypeAttr, /*retainedNodes =*/{});
 }
 
 // Returns the most local DISubprogramAttr starting from |forOp|.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp
index 40c8314..ce7e3b7 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp
@@ -183,6 +183,9 @@
     // Traverse the slices in BFS fashion.
     tensor::ExtractSliceOp candidateSliceOp = candidates.front();
     candidates.pop_front();
+    if (candidateSliceOp.getSource().getDefiningOp<tensor::PadOp>()) {
+      continue;
+    }
 
     // Materialize the slice of the producer in place.
     std::optional<scf::SCFFuseProducerOfSliceResult> fusedProducer =
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 085448c..f6935c7 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 085448c918aa3b730cdd3e497892cfeff0ed60a6
+Subproject commit f6935c777f675490ecb2327887dbac5c7d7fce1f