Fix low-frequency typos in compiler/Codegen. NFC. (3/6) (#23603)

Preparation for adding a typos pre-commit spell checker (6/6). Rename
gpu_pack_to_instrinsics.mlir -> gpu_pack_to_intrinsics.mlir.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir
index 8d2616d..162cf2d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir
@@ -55,7 +55,7 @@
 
 // -----
 
-func.func @batch_mmt4d_with_extened_inputs(%arg0: tensor<1x10x32x8x1xi8>, %arg1: tensor<1x80x32x4x1xi8>, %arg2: tensor<1x10x80x8x4xi32>) -> tensor<1x10x80x8x4xi32> attributes {
+func.func @batch_mmt4d_with_extended_inputs(%arg0: tensor<1x10x32x8x1xi8>, %arg1: tensor<1x80x32x4x1xi8>, %arg2: tensor<1x10x80x8x4xi32>) -> tensor<1x10x80x8x4xi32> attributes {
   hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "mmt4d", target_triple="x86_64-xyz-xyz", cpu_features=""}>
 } {
   %c0_i32 = arith.constant 0 : i32
@@ -83,7 +83,7 @@
 }
 
 // CHECK:      #[[MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-// CHECK:      func.func @batch_mmt4d_with_extened_inputs
+// CHECK:      func.func @batch_mmt4d_with_extended_inputs
 // CHECK-SAME:   %[[LHS:.+]]: tensor<1x10x32x8x1xi8>,
 // CHECK-SAME:   %[[RHS:.+]]: tensor<1x80x32x4x1xi8>,
 // CHECK-SAME:   %[[OUT:.+]]: tensor<1x10x80x8x4xi32>
diff --git a/compiler/src/iree/compiler/Codegen/Common/ConvertUnsupportedFloatArithPass.cpp b/compiler/src/iree/compiler/Codegen/Common/ConvertUnsupportedFloatArithPass.cpp
index 3f9d38f..377a122 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ConvertUnsupportedFloatArithPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ConvertUnsupportedFloatArithPass.cpp
@@ -7,7 +7,7 @@
 //===--------------- ConvertUnsupportedFloatArithPass.cpp ----------------===//
 //
 //   Emulate arith and vector floating point operations that use float types
-//   which are unspported on a target by inserting extf/truncf pairs around all
+//   which are unsupported on a target by inserting extf/truncf pairs around all
 //   such operations in order to produce arithmetic that can be performed while
 //   preserving the original rounding behavior.
 //
diff --git a/compiler/src/iree/compiler/Codegen/Common/DecomposeConvolutionToLowerDimOps.cpp b/compiler/src/iree/compiler/Codegen/Common/DecomposeConvolutionToLowerDimOps.cpp
index 8f54333..e5f37c2 100644
--- a/compiler/src/iree/compiler/Codegen/Common/DecomposeConvolutionToLowerDimOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/DecomposeConvolutionToLowerDimOps.cpp
@@ -97,7 +97,7 @@
   }
 
   // 3. Calculate new tiling levels.
-  // Note that this will basically erase the _H_ dims from the orignal lowering
+  // Note that this will basically erase the _H_ dims from the original lowering
   // config.
   auto dims = linalg::inferConvolutionDims(convOp);
   SmallVector<unsigned> hDimsToErase = {dims->outputImage[0],
diff --git a/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp b/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
index cce4970..f981bc8 100644
--- a/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
@@ -75,7 +75,7 @@
   std::optional<PackUnPackControlFn> controlFn;
 };
 
-/// A warpper pattern that calls linalg::lowerUnPack on linalg::UnPackOp. It
+/// A wrapper pattern that calls linalg::lowerUnPack on linalg::UnPackOp. It
 /// lowers a linalg.unpack op to tensor.empty + linalg.transpose +
 /// tensor.collapse_shape + tensor.extract_slice ops.
 struct LowerUnPackPattern : public OpRewritePattern<linalg::UnPackOp> {
diff --git a/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp b/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp
index 4d54bc2..0edce93 100644
--- a/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp
@@ -161,7 +161,7 @@
 
   RewritePatternSet patterns(ctx);
 
-  // Try to flatten memrefs as a prerequiste for narrow type emulation,
+  // Try to flatten memrefs as a prerequisite for narrow type emulation,
   // so we can have simplified checks in the emulation patterns.
   memref::populateFlattenMemrefsPatterns(patterns);
 
diff --git a/compiler/src/iree/compiler/Codegen/Common/ForOpCanonicalizationPass.cpp b/compiler/src/iree/compiler/Codegen/Common/ForOpCanonicalizationPass.cpp
index 8f230df..c188248 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ForOpCanonicalizationPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ForOpCanonicalizationPass.cpp
@@ -144,7 +144,7 @@
       //   %to_clone = vector.extract %arg1[%arg0] : f32 from vector<4xf32>
       //   ...
       // }
-      // %new_clone = vector.extact %cst[%c1] : f32 from vector<4xf32>
+      // %new_clone = vector.extract %cst[%c1] : f32 from vector<4xf32>
       // ```
       IRMapping mapping;
       mapping.map(iterArg, initArgs[index]);
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUCreateFastSlowPath.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUCreateFastSlowPath.cpp
index b9f4cea..3cc78b0 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUCreateFastSlowPath.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUCreateFastSlowPath.cpp
@@ -134,7 +134,7 @@
   };
   scf::IfOp::create(rewriter, padOp.getLoc(), ifCond, thenBuilder, elseBuilder);
 
-  // All of these ops have been cloned to both regions. Erease them now.
+  // All of these ops have been cloned to both regions. Erase them now.
   for (Operation *op : llvm::reverse(cloneOps)) {
     rewriter.eraseOp(op);
   }
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistributeCopyUsingForall.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistributeCopyUsingForall.cpp
index 9122303..7c975de 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistributeCopyUsingForall.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistributeCopyUsingForall.cpp
@@ -25,7 +25,7 @@
 
 namespace {
 //====---------------------------------------------------------------------===//
-// Pass to lower workgroup memory copy to distibuted
+// Pass to lower workgroup memory copy to distributed
 // transfer_read/transfer_write ops.
 //====---------------------------------------------------------------------===//
 
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.cpp
index a38a177..5fff53c 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.cpp
@@ -17,7 +17,7 @@
 namespace mlir::iree_compiler {
 
 namespace {
-/// Applies tranformation to drop unit dims in destination vector.transfer_read
+/// Applies transformation to drop unit dims in destination vector.transfer_read
 /// destination so that the resulting vector is 2D.
 //
 /// Example:
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
index 328c892..65e73ce 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
@@ -163,7 +163,7 @@
       }
     }
 
-    // Fuse the candidate immeidate operands into the tiled loop.
+    // Fuse the candidate immediate operands into the tiled loop.
     OpBuilder::InsertionGuard guard(rewriter);
     while (!candidates.empty()) {
       tensor::ExtractSliceOp sliceOp = candidates.back();
@@ -213,7 +213,7 @@
                                          bool fuseInputProducer,
                                          bool coalesceLoops) {
   {
-    // Tile again at the workgroup level since redution dimension were
+    // Tile again at the workgroup level since reduction dimension were
     // ignored. Dimensions already tiled will be ignore since we tile to the
     // same size.
     RewritePatternSet wgTilingPatterns(funcOp.getContext());
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTileAndConvertConvToMatmul.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTileAndConvertConvToMatmul.cpp
index 19be905..5053f34 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTileAndConvertConvToMatmul.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTileAndConvertConvToMatmul.cpp
@@ -99,7 +99,7 @@
 void GPUTileAndConvertConvToMatmulPass::runOnOperation() {
   MLIRContext *context = &getContext();
   mlir::FunctionOpInterface funcOp = getOperation();
-  // Collect candiates that need to be tiled to convert to matmul.
+  // Collect candidates that need to be tiled to convert to matmul.
   IRRewriter rewriter(funcOp);
   SmallVector<linalg::LinalgOp> convCandidates;
   funcOp->walk([&](linalg::LinalgOp linalgOp) {
@@ -139,7 +139,7 @@
                                         targetTileMap))) {
     funcOp.emitError() << "tiling of level  convolution failed\n";
   }
-  // Collect candiates again since the old candidates are not valid
+  // Collect candidates again since the old candidates are not valid
   // after convolution tiling.
   convCandidates = {};
   funcOp->walk([&](linalg::LinalgOp linalgOp) {
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVectorDistribution.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVectorDistribution.cpp
index da283d2..912d1e9 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVectorDistribution.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVectorDistribution.cpp
@@ -364,7 +364,7 @@
     LLVM_DEBUG(llvm::dbgs() << "Layout Analysis Failed\n");
     return failure();
   }
-  LLVM_DEBUG(llvm::dbgs() << "Layout Analysis Succeded\n");
+  LLVM_DEBUG(llvm::dbgs() << "Layout Analysis Succeeded\n");
   LLVM_DEBUG(llvm::dbgs() << "\n\n");
 
   // Go to each operation, and set its distribution signature.
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h
index f37a5bf..7af1c32e 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h
@@ -33,7 +33,7 @@
   // be vector registers carried between stages.
   loadStoreStage0 = 1,
   // Schedule optimized when using nvidia tensorcore with async copies. It will
-  // set all the copies in stage 0 then it will prefecth part of loads in `depth
+  // set all the copies in stage 0 then it will prefetch part of loads in `depth
   // - 2` stage and keep the rest of the load and compute into `depth - 1`.
   nvidiaTensorCore = 2,
 };
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td
index 4661f2c..9b456ea 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td
@@ -245,7 +245,7 @@
   let options = [
     Option<"epiloguePeeling", "epilogue-peeling", "bool",
             /*default=*/"true",
-           "Try to use un-peeling epilogue when false, peeled epilouge o.w.">,
+           "Try to use un-peeling epilogue when false, peeled epilogue o.w.">,
     Option<"depth", "pipeline-depth", "int64_t",
             /*default=*/"2",
            "Number of stages ">,
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel
index 59c0261..3afc12d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel
@@ -53,7 +53,7 @@
             "gpu_nested_layout_vector_distribution_mask.mlir",
             "gpu_nested_layout_vector_distribution_multi_reduce.mlir",
             "gpu_nested_layout_vector_distribution_step.mlir",
-            "gpu_pack_to_instrinsics.mlir",
+            "gpu_pack_to_intrinsics.mlir",
             "gpu_pad_convs.mlir",
             "gpu_pad_operands.mlir",
             "gpu_pipeline.mlir",
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt
index 1b1d474..6489b96 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt
@@ -48,7 +48,7 @@
     "gpu_nested_layout_vector_distribution_mask.mlir"
     "gpu_nested_layout_vector_distribution_multi_reduce.mlir"
     "gpu_nested_layout_vector_distribution_step.mlir"
-    "gpu_pack_to_instrinsics.mlir"
+    "gpu_pack_to_intrinsics.mlir"
     "gpu_pad_convs.mlir"
     "gpu_pad_operands.mlir"
     "gpu_pipeline.mlir"
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_instrinsics.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_intrinsics.mlir
similarity index 100%
rename from compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_instrinsics.mlir
rename to compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_intrinsics.mlir
diff --git a/compiler/src/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp b/compiler/src/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
index f27cf8a..2ad5ce1 100644
--- a/compiler/src/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
@@ -336,7 +336,7 @@
   funcPassManager.addPass(createCSEPass());
   // There are redundant memcpy (with linalg.generic form) ops created, which
   // can be deleted by canonicalizer. We have to run it again because the
-  // memrefs are unified in CSE pass, so we can truely remove redundant memcpy.
+  // memrefs are unified in CSE pass, so we can truly remove redundant memcpy.
   funcPassManager.addPass(createIREECodegenCanonicalizerPass());
   funcPassManager.addPass(createCleanupBufferAllocViewPass());
 }
diff --git a/compiler/src/iree/compiler/Codegen/Common/LinkTuningSpecsPass.cpp b/compiler/src/iree/compiler/Codegen/Common/LinkTuningSpecsPass.cpp
index 0039de5..1afb63a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/LinkTuningSpecsPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/LinkTuningSpecsPass.cpp
@@ -201,7 +201,8 @@
   llvm::DenseSet<StringRef> seenNames;
   SmallVector<NamedSequenceOp> nameConflictOps;
 
-  // Detect name conflicts across named sequence ops from differnt tuning specs.
+  // Detect name conflicts across named sequence ops from different tuning
+  // specs.
   for (NamedSequenceOp op : namedSequenceOpsToMove) {
     StringRef name = op.getName();
     if (!seenNames.insert(name).second) {
@@ -332,7 +333,7 @@
     auto symbol = SymbolRefAttr::get(
         parentSymbol, FlatSymbolRefAttr::get(spec.getSymNameAttr()));
 
-    // Surpress silenceable errors so that failures to match in child tuning
+    // Suppress silenceable errors so that failures to match in child tuning
     // specs can be ignored.
     operand = transform::IncludeOp::create(
                   builder, loc, anyOpType, symbol,
@@ -402,7 +403,7 @@
   module->setAttr(kTuningSpecDefaultEntrypointAttrName, builder.getUnitAttr());
 
   // Step 2-c: Create a new block inside the NamedSequenceOp and merge the
-  // ForeachMatchOp from each inner module into one ForachMatchOp.
+  // ForeachMatchOp from each inner module into one ForeachMatchOp.
   Type anyOpType = builder.getType<transform::AnyOpType>();
   SmallVector<Type, 4> resultTypes = {anyOpType};
   SmallVector<Attribute> mergedMatchers;
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp
index e61c9e7..9914e5e 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp
@@ -283,7 +283,7 @@
 };
 
 //===---------------------------------------------------------------------===//
-// Patterns for layout transfers. They decompse load/store ops into
+// Patterns for layout transfers. They decompose load/store ops into
 // set_encoding/unset_encoding + load/store, if the converted types mismatch.
 //===---------------------------------------------------------------------===//
 
diff --git a/compiler/src/iree/compiler/Codegen/Common/Passes.h b/compiler/src/iree/compiler/Codegen/Common/Passes.h
index ec4b081..3cd6955 100644
--- a/compiler/src/iree/compiler/Codegen/Common/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/Common/Passes.h
@@ -131,7 +131,7 @@
                                   PatternBenefit baseBenefit = 1);
 
 /// Collect patterns to fold tensor.extract_slice -> vector.transfer_read and
-/// vector.transfer_write -> tensor.insert_slice op chains into vector tranfer
+/// vector.transfer_write -> tensor.insert_slice op chains into vector transfer
 /// read and write ops.
 void populateVectorTransferTensorSliceTransforms(RewritePatternSet &patterns,
                                                  PatternBenefit benefit = 1);
diff --git a/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp b/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp
index 2d20262..d2223a6 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp
@@ -56,7 +56,7 @@
 ///   the static values that the workload of the dispatch corresponds to.
 // TODO: Remove the use of static loop ranges. This is used to set the number of
 // workgroups to a static value. Ideally this should not be done and the static
-// and dyamic cases are handled the same way. When the tile+distribute moves
+// and dynamic cases are handled the same way. When the tile+distribute moves
 // away from using `scf.for` to using a construct that better captures
 // distribution (like `scf.forall`) this information can be dropped.
 static LogicalResult
diff --git a/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp b/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp
index 5b8f500..21ac9d6 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp
@@ -77,8 +77,8 @@
     int64_t innerTileIdx = i + innerTiles.size();
     // Compute the LCM with the initial multiples for both the inner tile and
     // the corresponding outer tile. The multiples for the packedMultiples will
-    // then be these LCMs, and the multiple for the unPackedMultipes will be the
-    // product of these LCMs.
+    // then be these LCMs, and the multiple for the unPackedMultiples will be
+    // the product of these LCMs.
     int64_t lcmInnerTileMultiple = tile;
     int64_t lcmOuterTileMultiple = 1;
     if (initialPackedMultiples) {
@@ -218,7 +218,7 @@
   return expandedMultiples;
 }
 
-/// Find a set of required workgroup tile size mulitples for the given OpResult
+/// Find a set of required workgroup tile size multiples for the given OpResult
 /// by walking the producer chain of the OpResult's owner, and finding ops that
 /// require specific tile size multiples. For now, the only ops that need
 /// special workgroup tile size multiples are pack and unpack ops. The returned
@@ -291,7 +291,7 @@
       });
 }
 
-/// Find a set of required workgroup tile size mulitples for the given OpOperand
+/// Find a set of required workgroup tile size multiples for the given OpOperand
 /// by walking the use chain of the OpOperand's owner, and finding ops that
 /// require specific tile size multiples. For now, the only ops that need
 /// special workgroup tile size multiples are pack and unpack ops. The returned
diff --git a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
index d5c5ca5..f0c225b 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
@@ -730,7 +730,7 @@
 
 // Important note: this transform is load-bearing and is the glue between
 // different dialects that want to operate on tensors.
-// Originaly, it used to just call `addIREEComprehensiveBufferizePasses` but
+// Originally, it used to just call `addIREEComprehensiveBufferizePasses` but
 // this introduces a lot of complexity in the registration process due to the
 // use of nested pass pipelines, to a point that it is a major endeavor to
 // connect a new dialect.
@@ -814,7 +814,7 @@
                                                      Location loc, Value from,
                                                      Value to) {
   // Insert barriers for copies from and to shared memory. We use
-  // workgroup-scope barriers here because we do not currenly produce code that
+  // workgroup-scope barriers here because we do not currently produce code that
   // destructively overwrites global memory or uses it as a communication
   // mechanism, thus eliminating the need for us to force glomal reads/writes to
   // conclude at the barrier. This produces performance improvements on backends
diff --git a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensionsOps.td b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensionsOps.td
index 19ca7fc..ba66871 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensionsOps.td
+++ b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensionsOps.td
@@ -92,7 +92,7 @@
   let description = [{
     Indicates that tensor.extract_slice -> vector.transfer_read and
     vector.transfer_write -> tensor.insert_slice op chains should be folded into
-    vector tranfer read and write ops
+    vector transfer read and write ops
   }];
 
   let cppNamespace = "mlir::iree_compiler::IREE::transform_dialect";
@@ -595,7 +595,7 @@
     However this can still be unsafe wrt parallelism so use carefully!
 
     Sharing consists in rewriting all uses of the operands passed as
-    `shared_outs` that are also captured wihtin the `scf.forall` region
+    `shared_outs` that are also captured within the `scf.forall` region
     into the matching `shared_outs` bbarg.
 
     Only those operands whose indices are specified in `share_operands` are
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir b/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
index b057f32..c6d1b54 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
@@ -1417,7 +1417,7 @@
   #hal.pipeline.binding<storage_buffer>
 ]>
 #map = affine_map<()[s0] -> (s0 * 32)>
-func.func @cast_follwed_by_store() {
+func.func @cast_followed_by_store() {
   %cst = arith.constant 0.000000e+00 : f32
   %c4 = arith.constant 4 : index
   %c64 = arith.constant 64 : index
@@ -1450,7 +1450,7 @@
   }
   return
 }
-// CHECK-LABEL: func.func @cast_follwed_by_store()
+// CHECK-LABEL: func.func @cast_followed_by_store()
 //   CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32
 //   CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4x32x1024xf32, #hal.descriptor_type<storage_buffer>>
 //   CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4x1024x64xf32, #hal.descriptor_type<storage_buffer>>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir
index f867c8b..a47d3b2 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir
@@ -58,7 +58,7 @@
 #pipeline_layout = #hal.pipeline.layout<bindings = [
   #hal.pipeline.binding<storage_buffer>
 ]>
-hal.executable private @err_mistmatched_workgroup_size {
+hal.executable private @err_mismatched_workgroup_size {
   hal.executable.variant public @err_mismatched_workgroup_size target(#hal.executable.target<"", "", {}>) {
     // expected-error @+1 {{failed to reconcile workgroup sizes}}
     hal.executable.export public @entry_point layout(#pipeline_layout)
@@ -83,7 +83,7 @@
 #pipeline_layout = #hal.pipeline.layout<bindings = [
   #hal.pipeline.binding<storage_buffer>
 ]>
-hal.executable private @err_mistmatched_workgroup_size2 {
+hal.executable private @err_mismatched_workgroup_size2 {
   hal.executable.variant public @err_mismatched_workgroup_size2 target(#hal.executable.target<"", "", {}>) {
     // expected-error @+1 {{failed to reconcile workgroup sizes}}
     hal.executable.export public @entry_point layout(#pipeline_layout)
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/vector_layout_analysis.mlir b/compiler/src/iree/compiler/Codegen/Common/test/vector_layout_analysis.mlir
index 72c55e2..af3e4fc 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/vector_layout_analysis.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/vector_layout_analysis.mlir
@@ -733,7 +733,8 @@
   %rootl = iree_vector_ext.to_layout %root to layout(#layout) : vector<64x64xf16>
   %reshape = vector.shape_cast %rootl : vector<64x64xf16> to vector<16x4x16x4xf16>
   // expected-remark @above {{subgroup_tile = [1, 1, 1, 1], batch_tile = [4, 1, 4, 1], outer_tile = [1, 1, 1, 1], thread_tile = [4, 4, 4, 1], element_tile = [1, 1, 1, 4]}}
-  // expexted-remark @above {{subgroup_strides = [0, 0, 0, 0], thread_strides = [16, 4, 1, 0]}}
+  // TODO: The strides remark check was previously disabled by a misspelled directive.
+  // Enable once the pass emits: subgroup_strides = [0, 0, 0, 0], thread_strides = [16, 4, 1, 0]
   func.return %reshape : vector<16x4x16x4xf16>
 }
 
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.td b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.td
index a59e341..6b314d6 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.td
+++ b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.td
@@ -365,7 +365,7 @@
 
     Note that different `kind` attributes may adopt different conventions as to
     the intrinsic vector tile formats. Some may use only flattened 1-D vectors,
-    and it may be necessary to use `opaque = true` to accomodate them. Others
+    and it may be necessary to use `opaque = true` to accommodate them. Others
     may use higher-rank vectors with shapes reflecting semantics, allowing to
     use `opaque = false`, which is preferable when possible at all thanks to the
     stricter semantics. However, that is not only a function of the `kind`
@@ -418,7 +418,7 @@
 
   let arguments = (ins Variadic<AnyRankedTensorOrVector>:$inputs,
       Variadic<AnyRankedTensorOrVector>:$outputs,
-      TypedArrayAttrBase<AffineMapAttr, "indxing affine maps">:$indexing_maps,
+      TypedArrayAttrBase<AffineMapAttr, "indexing affine maps">:$indexing_maps,
       IteratorTypeArrayAttr:$iterator_types,
       IREECodegen_AnyInnerTileDescAttr:$kind,
       IREECodegen_AnyInnerTiledSemanticscAttr:$semantics,
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/Utils/unittests/UtilsTest.cpp b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/Utils/unittests/UtilsTest.cpp
index 9131d95..026fc6d 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/Utils/unittests/UtilsTest.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/Utils/unittests/UtilsTest.cpp
@@ -64,7 +64,7 @@
   EXPECT_TRUE(dictAttr.contains("permutation"));
 
   // Verify if the sizes match. The check of values is done by the comparison
-  // between deserialzation result and the original struct.
+  // between deserialization result and the original struct.
   auto expandShapeArrayAttr =
       dyn_cast<ArrayAttr>(dictAttr.getNamed("expandShape")->getValue());
   EXPECT_EQ(expandShapeArrayAttr.size(), swizzle.expandShape.size());
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
index e62c482..b2e1071 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
@@ -548,7 +548,7 @@
     in a chained-matmul operation.
 
     If set to true (similar to MMAAttr), |col_major| indicates the computation
-    is perfomed as below:
+    is performed as below:
 
     ```
       C^T += B^T x A^T
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp
index 3378a4f..064adda 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp
@@ -36,7 +36,7 @@
 #define DEBUG_TYPE "iree-gpu-config-utils"
 
 static llvm::cl::opt<bool> clGPUTestCpromotion(
-    "iree-codegen-test-c-promtion",
+    "iree-codegen-test-c-promotion",
     llvm::cl::desc("C promote in specific case of elemetwise operations that "
                    "codegen cant yet support without it if also doing padding"),
     llvm::cl::init(true));
@@ -1450,7 +1450,7 @@
   const unsigned scaleToByte =
       std::max(8 / distInfo.minBitwidth, static_cast<unsigned>(1));
 
-  // Distribute workload to the given `numThreads` by allowing a potental loss.
+  // Distribute workload to the given `numThreads` by allowing a potential loss.
   auto distributeToThreads = [&](int64_t numThreads,
                                  std::optional<int64_t> lossFactor =
                                      std::nullopt) -> int64_t {
@@ -1628,7 +1628,7 @@
   // Heuristic value chosen to limit maximum vector sizes when tiling below.
   const unsigned maxVectorSize = 32;
 
-  // Try to tile all reductions by some small factor, preferrably 4, when
+  // Try to tile all reductions by some small factor, preferably 4, when
   // possible. This gives us a chance to perform vector4 load if an input has
   // its innnermost dimension being reduction. It also avoids generating too
   // many instructions when unrolling vector later. We limit the expected
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp
index 4227e7a..a019044 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp
@@ -1159,7 +1159,7 @@
                                          StringRef resolver) {
   if (resolver == kDataTilingEncodingLayoutResolverName) {
     // Return a GPUEncodingResolverAttr with an empty configuration. The
-    // addtional attributes will be attached by the `cloneWithSimplifiedConfig`
+    // additional attributes will be attached by the `cloneWithSimplifiedConfig`
     // interface method when the resolver needs to be configured.
     return IREE::GPU::GPUEncodingResolverAttr::get(target.getContext(), {});
   }
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ReductionConfigUtils.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ReductionConfigUtils.cpp
index 7eef109..c9866b9 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ReductionConfigUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ReductionConfigUtils.cpp
@@ -459,7 +459,7 @@
       loweringConfigs.push_back({linalgOp, *loweringConfig});
     }
   }
-  // Only set lowering configs once we've sucessfully determined them for all
+  // Only set lowering configs once we've successfully determined them for all
   // operations, to avoid leaving the IR in an inconsistent state on failure.
   for (auto &[linalgOp, loweringConfig] : loweringConfigs) {
     setLoweringConfig(linalgOp, loweringConfig);
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/VectorExtAttrs.td b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/VectorExtAttrs.td
index c9c9bee..8744ae2 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/VectorExtAttrs.td
+++ b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/VectorExtAttrs.td
@@ -37,7 +37,7 @@
     `vector<subgroup1 x subgroup2 x batch1 x batch2 x ... x element1 x element2>`
 
     Now, when the vector<subgroup x batch x outer x thread x element> is
-    indexed, the indices of 'subgroup' and `thread` are not directly refferring
+    indexed, the indices of 'subgroup' and `thread` are not directly referring
     to the subgroup_id and thread_id in the GPU context. lets define them
     as virtual_subgroup_id and virtual_thread_id and they hold the following
     definition:
@@ -83,7 +83,7 @@
 
     The total number of subgroups used (computed by multiplying each dim in
     subgroup_tile) should be a multiple of number of subgroups in the
-    harware. If the total number of subgroups used exceeds the number of
+    hardware. If the total number of subgroups used exceeds the number of
     subgroups of the hardware, then the subgroup used (say x) is
     x mod num_subgroups:
 
@@ -99,7 +99,7 @@
     #### Threads per Subgroup:
 
     This level of tiling is also known as "thread distribution" within a subgroup.
-    The logic is quite similiar to subgroup distribution using the tile sizes
+    The logic is quite similar to subgroup distribution using the tile sizes
     and the 'thread_strides'.
 
     #### Element distribution on a thread
diff --git a/compiler/src/iree/compiler/Codegen/ExternalInterfaces/CPUEncodingExternalModels.cpp b/compiler/src/iree/compiler/Codegen/ExternalInterfaces/CPUEncodingExternalModels.cpp
index 8e2fa9c..d89f69c 100644
--- a/compiler/src/iree/compiler/Codegen/ExternalInterfaces/CPUEncodingExternalModels.cpp
+++ b/compiler/src/iree/compiler/Codegen/ExternalInterfaces/CPUEncodingExternalModels.cpp
@@ -366,7 +366,7 @@
 }
 
 //===----------------------------------------------------------------------===//
-// Interface methods implementaion for iree_cpu.cpu_encoding_resolver.
+// Interface methods implementation for iree_cpu.cpu_encoding_resolver.
 //===----------------------------------------------------------------------===//
 
 // Enumerate tile sizes to choose from on riscv32.
@@ -720,7 +720,7 @@
       return info;
     }
     auto narrowDim = IREE::Encoding::getPo2MatmulNarrowDim(encoding);
-    // Choose a final matmul TileMxNxK from the above-enumarated tile shapes,
+    // Choose a final matmul TileMxNxK from the above-enumerated tile shapes,
     // taking narrow dimensions into account.
     TileMxNxK chosenTileMxNxK =
         chooseMatmulTile(enumeratedTileMxNxK, narrowDim);
@@ -837,7 +837,7 @@
 };
 
 //===----------------------------------------------------------------------===//
-// Interface methods implementaion for iree_cpu.vmvx_encoding_resolver.
+// Interface methods implementation for iree_cpu.vmvx_encoding_resolver.
 //===----------------------------------------------------------------------===//
 
 // Enumerate tile sizes to choose from when no specific architecture is
@@ -903,7 +903,7 @@
       return info;
     }
     auto narrowDim = IREE::Encoding::getPo2MatmulNarrowDim(encoding);
-    // Choose a final matmul TileMxNxK from the above-enumarated tile shapes,
+    // Choose a final matmul TileMxNxK from the above-enumerated tile shapes,
     // taking narrow dimensions into account.
     TileMxNxK chosenTileMxNxK =
         chooseMatmulTile(enumeratedTileMxNxK, narrowDim);
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/UKernelOpInterface.td b/compiler/src/iree/compiler/Codegen/Interfaces/UKernelOpInterface.td
index f5667c9..603a279 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/UKernelOpInterface.td
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/UKernelOpInterface.td
@@ -24,7 +24,7 @@
       /*methodName=*/"lowerToFunctionCall",
       /*args=*/(ins "RewriterBase &":$rewriter),
       /*methodBody=*/"",
-      /*defautImplementation=*/"return failure();"
+      /*defaultImplementation=*/"return failure();"
     >,
   ];
 }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
index 766aa6a..ae8fb1f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
@@ -1077,7 +1077,7 @@
 
   populateComplexToLLVMConversionPatterns(typeConverter, patterns);
   populateMathToLLVMConversionPatterns(typeConverter, patterns);
-  // Note: workaround needed due to `memref.subview` returnd from an `if`.
+  // Note: workaround needed due to `memref.subview` returned from an `if`.
   memref::populateExpandStridedMetadataPatterns(patterns);
   iree_compiler::populateIREEResolveExtractStridedMetadataPatterns(patterns);
   populateFinalizeMemRefToLLVMConversionPatterns(typeConverter, patterns);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index 90dc0b2..29b5185 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -934,7 +934,7 @@
       // CPU features are typically prefixed with a +, e.g. +avx,+avx2,+fma.
       featureString.consume_front("+");
       // Silently skip unknown CPU features, more flexible for now. Note that
-      // some featurs occurring here are not standard CPU features but internal
+      // some features occurring here are not standard CPU features but internal
       // things such as the "+reserve-x18" that we add on arm64.
       if (featureToBitPattern.count(featureString)) {
         specifiedCpuDataField0 |= featureToBitPattern.lookup(featureString);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h
index ab341cd..c7f8826 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h
@@ -351,7 +351,7 @@
   /// Given a calling convention `cConv`, and callee with return of
   /// `resultTypes` and operands with type `argTypes`, along with extra fields
   /// to append to argument list specified in `extraFields`; modify the `callOp`
-  /// to implement the specified ABI. The calleee signature is expected to have
+  /// to implement the specified ABI. The callee signature is expected to have
   /// been/to be modified separately, i.e. it isnt done within this method.
   FailureOr<SmallVector<Value>>
   materializeABI(Operation *callOp, StringRef symbolName,
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index 85b9601..fab61b4 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -489,7 +489,7 @@
     int64_t newSize = std::min(currSize * 2, workload[index]);
     int64_t vectorSize = vectorSizeHints ? vectorSizeHints.value()[index] : 0;
 
-    // Chech if it's the ideal size with vector size hint. And skip if the new
+    // Check if it's the ideal size with vector size hint. And skip if the new
     // size will break the ideal size.
     if (vectorSize > 1 &&
         (currSize % vectorSize == 0 && workload[index] % currSize == 0) &&
@@ -519,7 +519,7 @@
     int64_t nwg = llvm::divideCeil(workload[i], distributedTileSizes[i]);
     int64_t newSize = llvm::divideCeil(workload[i], nwg);
 
-    // Chech if it's the ideal size with vector size hint. And skip if the new
+    // Check if it's the ideal size with vector size hint. And skip if the new
     // size will break the ideal size.
     int64_t vectorSize = vectorSizeHints ? vectorSizeHints.value()[i] : 0;
     if (vectorSize > 1 &&
@@ -810,7 +810,7 @@
 }
 
 // Clamps in-place `vecTileSizes`, ensuring that the resulting vector tile sizes
-// for each opearand of `op` satisfy two requirements:
+// for each operand of `op` satisfy two requirements:
 // 1. No resulting operand tile size exceeds `eachOperandMaxTileBits`.
 // 2. The sum of all resulting operand tile size does not exceed
 // `allOperandsMaxTileBits`.
@@ -1494,7 +1494,7 @@
   if (targetAttr && isRISCV(targetAttr.getConfiguration())) {
     // RISC-V natively supports scalar x vector operations so we don't have to
     // vectorize dimension k. Vectorizing dimension k results in a vector load
-    // and a sequence of vrgather ops to implemement the broadcast explicitly.
+    // and a sequence of vrgather ops to implement the broadcast explicitly.
     // We should tile and/or unroll that dimension without vectorization, which
     // is not possible right now.
     sizes.append({8, 32, 1});
@@ -1609,7 +1609,7 @@
     return;
   }
 
-  // nativeVectorSize is cacluated with VLEN and LMUL=2.
+  // nativeVectorSize is calculated with VLEN and LMUL=2.
   int64_t nativeVectorSize = getNativeVectorSizeInBytes(entryPointFn);
   int64_t elementSize;
   if (elementType->isF16()) {
@@ -4158,7 +4158,7 @@
     return failure();
   }
 
-  // The transform dialect codegen has differnet logics and codegen flow.
+  // The transform dialect codegen has different logics and codegen flow.
   // Ignore the tile sizes adjustment.
   auto pipeline = getTranslationInfo(entryPointFn).getPassPipeline().getValue();
   if (pipeline != DispatchLoweringPassPipeline::TransformDialectCodegen) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
index 05fe6e8..1670e6a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
@@ -171,7 +171,7 @@
   LoweringConfigAttrInterface loweringConfig = getRootLoweringConfig(funcOp);
   OpPassManager passManager(func::FuncOp::getOperationName());
   switch (pipeline) {
-  // No pipleline specified, nothing to do.
+  // No pipeline specified, nothing to do.
   case IREE::Codegen::DispatchLoweringPassPipeline::None:
     return;
   case IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault: {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
index df7e37f..c7da5af 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
@@ -643,7 +643,7 @@
       .addPass(createCPUPropagateDataLayoutPass)
       .addPass(createRematerializeParallelOpsPass)
       // TODO(#13888): This(createExpandF16OpToF32Pass()) pass is being added
-      // way to late and should insted be be done during lowering to LLVM.
+      // way to late and should instead be be done during lowering to LLVM.
       .addPass(createExpandF16OpToF32Pass)
       .addPass(createConvertAccGEMMToGEMMPass)
       // TODO: Remove the following pass the plumb support for
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
index c07f05c..3656a74 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
@@ -87,7 +87,7 @@
   bool lowerToAVX2 = false;
 };
 
-/// Populates the passes to lower linalg ops on buffers. Currenly this
+/// Populates the passes to lower linalg ops on buffers. Currently this
 /// pipeline is only used for dispatches that just copy data from input
 /// interfaces to output interface.
 void addCPUBufferOpsTileAndVectorizePipeline(
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.td b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.td
index 3f18973..0983e77 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.td
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.td
@@ -32,11 +32,11 @@
 def ExpandF16OpToF32Pass :
     Pass<"iree-llvmcpu-expand-f16-op-to-f32", ""> {
   let summary =
-      "Preform f16 opertaions by expanding them to f32.";
+      "Preform f16 operations by expanding them to f32.";
   let description = [{
-    Pass to handel F16 bit operations, but converting f16 operands to F32.
-    Currently this pass is handeling fmaxf conversion from f16 to f32,
-    and then returing a f16 output back after preforming the operation.
+    Pass to handle F16 bit operations, but converting f16 operands to F32.
+    Currently this pass is handling fmaxf conversion from f16 to f32,
+    and then returning a f16 output back after preforming the operation.
     Can handle more operations if required in future.
   }];
 }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir
index 8fbb96c..6e35441 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir
@@ -77,7 +77,7 @@
 // CHECK-LABEL: llvm.func @interleave_and_bitcast_lowering(
 // vector.interleave should be gone entirely
 //   CHECK-NOT:   vector.interleave
-// 2D vector.bitcast tha followed should be replaced with 1D vector.bitcast
+// 2D vector.bitcast than followed should be replaced with 1D vector.bitcast
 //       CHECK:   llvm.bitcast {{.*}} : vector<4xi8> to vector<8xi4>
 //   CHECK-NOT:   vector.bitcast %{{.*}} : vector<4x4xi8> to vector<4x8xi4>
 
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir
index 3c9c5ad..f537baa 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir
@@ -2,7 +2,7 @@
 // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --iree-llvmcpu-reassociate-fp-reductions=false --split-input-file %s | FileCheck %s
 // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --iree-llvmcpu-mlir-opt-level=O0 --split-input-file %s | FileCheck %s
 
-// CPU backend enables fp reassociation strating from O2, so the checks should be the same.
+// CPU backend enables fp reassociation starting from O2, so the checks should be the same.
 // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --iree-llvmcpu-reassociate-fp-reductions=true --split-input-file %s | FileCheck %s --check-prefix=REORDERCHECK
 // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --iree-llvmcpu-mlir-opt-level=O2 --split-input-file %s | FileCheck %s --check-prefix=REORDERCHECK
 
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUConfigureTensorLayouts.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUConfigureTensorLayouts.cpp
index 4bdcc79..13c2847 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUConfigureTensorLayouts.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUConfigureTensorLayouts.cpp
@@ -203,7 +203,7 @@
 
   // MMA intrinsics can be weird and usually don't have a single subgroup
   // iteration space, so we need to find their value subgroup iteration space
-  // indvidually.
+  // individually.
   auto getFragmentLayout = [&](int operandIndex, int64_t outerDim,
                                int64_t innerDim,
                                AffineMap map) -> VectorLayoutInterface {
@@ -353,7 +353,7 @@
 /// is only defined on standard "@" function, it may be a different
 /// transformation for other indexing maps.
 ///
-/// For linalg operands, since the indexing maps are part of the op defination,
+/// For linalg operands, since the indexing maps are part of the op definition,
 /// we can achieve the same transformation by simply swapping the operands.
 static void swapOperandsToTransposeIntrinsic(RewriterBase &rewriter,
                                              linalg::GenericOp contractOp) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp
index 852238b..1ee77ac 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp
@@ -66,7 +66,7 @@
 
 static LogicalResult tileToSerialLoops(mlir::FunctionOpInterface funcOp) {
   {
-    // Tile again at the workgroup level since redution dimension were
+    // Tile again at the workgroup level since reduction dimension were
     // ignored. Dimensions already tiled will be ignore since we tile to the
     // same size.
     if (failed(tileReductionLoops(funcOp))) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLBufferInstructionsOptimization.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLBufferInstructionsOptimization.cpp
index f83ed9c..2212b89 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLBufferInstructionsOptimization.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLBufferInstructionsOptimization.cpp
@@ -87,7 +87,7 @@
 // %read = vector.transfer_read %memref : memref<1x?xbf16, amdgpu.raw_fat_buffer> // no mask!
 // %masked_read = arith.select %cmp, %read, %padding : vector<1x8xbf16>
 // clang-format on
-// Note we currently dont support cases where muliple masks are ANDed or ORed
+// Note we currently dont support cases where multiple masks are ANDed or ORed
 // together to form the final mask to a read but such support can be added where
 // we track a set of valid masks and add that an AND or OR of valid masks is
 // valid.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td
index de3a38c..a84197f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td
@@ -418,7 +418,7 @@
     This applies software pipelining to a given scf.for loop. The pipelining
     strategy will look for a copy to shared memory and pipeline it to overlap
     it with the rest of the loop.
-    It is user responsability to ensure that there are no dependency between
+    It is user responsibility to ensure that there are no dependency between
     `depth` iterations of the loop by using multi-buffering.
 
     `depth` will indicate how many stages the software pipeline should have.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp
index 3df57bd..b47d9f1 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp
@@ -419,7 +419,7 @@
   }
 
   // Pack all the allocations into one i8 alloc.
-  // We may need to add extra barriers to make sure we are done writting or
+  // We may need to add extra barriers to make sure we are done writing or
   // reading from the previous alias group before starting a new one.
   for (size_t i = 0; i < aliasGroups.size(); i++) {
     for (Operation *alloc : aliasGroups[i]) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_igemm_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_igemm_tile_and_fuse.mlir
index 0143899..25f1a57 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_igemm_tile_and_fuse.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_igemm_tile_and_fuse.mlir
@@ -338,7 +338,7 @@
 //           CHECK: promote_operands = [0, 1, 2]
 
 // -----
-// Check that we dont c promote if there is no additonal operand
+// Check that we dont c promote if there is no additional operand
 func.func @nhwc_conv_mfma_truncf(%3: tensor<2x35x35x128xf32>, %4: tensor<3x3x128x64xf32>, %5 : tensor<2x33x33x64xf32>) -> tensor<2x33x33x64xf16> {
   %cst = arith.constant 0.000000e+00 : f32
   %empty = tensor.empty() : tensor<2x33x33x64xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute_reduction_gfx942.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute_reduction_gfx942.mlir
index 42cdd2b..10f7982 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute_reduction_gfx942.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute_reduction_gfx942.mlir
@@ -181,7 +181,7 @@
 
 // The same IR as 'test_multiple_reduction' but with an unsupported operation,
 // preventing this from going down vector distribute. Previously lowering configs
-// would be attached to the suppported operations even though the full dispatch
+// would be attached to the supported operations even though the full dispatch
 // is unsupported.
 func.func @test_negative_multiple_reduction() {
   %cst = arith.constant 0.000000e+00 : f32
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_lower_to_llvmgpu.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_lower_to_llvmgpu.mlir
index 61c08d4..fc3b9df 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_lower_to_llvmgpu.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_lower_to_llvmgpu.mlir
@@ -82,7 +82,7 @@
   }
 }
 // The purpose of this test is to make sure that we dont merge basic blocks
-// If some pass accidentaly does this then we would have arguments to
+// If some pass accidentally does this then we would have arguments to
 // ^bb4 and also have 4 preds instead of 2 due to back edges.
 // CHECK-LABEL: func @no_merge_basic_blocks(
 //       CHECK:   ^bb4:
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir
index f3fa9f9..a063c6c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir
@@ -38,7 +38,7 @@
 
 
 // -----
-// Test that maximum and minum are converted to max and min on rocm
+// Test that maximum and minimum are converted to max and min on rocm
 #pipeline_layout = #hal.pipeline.layout<bindings = [
   #hal.pipeline.binding<storage_buffer>,
   #hal.pipeline.binding<storage_buffer>,
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/vector_to_gpu.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/vector_to_gpu.mlir
index 15339be..e38ccac 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/vector_to_gpu.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/vector_to_gpu.mlir
@@ -125,7 +125,7 @@
 
 // -----
 
-func.func @ksplitmatmul_4D_allone(%a: memref<128x16x32x256xf32>) -> vector<1x1x1x1xf32> {
+func.func @ksplitmatmul_4D_alone(%a: memref<128x16x32x256xf32>) -> vector<1x1x1x1xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %c2 = arith.constant 2 : index
   %c3 = arith.constant 3 : index
@@ -135,7 +135,7 @@
   return %0 : vector<1x1x1x1xf32>
 }
 
-// CHECK-LABEL: func.func @ksplitmatmul_4D_allone
+// CHECK-LABEL: func.func @ksplitmatmul_4D_alone
 //   CHECK-DAG: %[[ID:.+]] = arith.constant 0 : index
 //   CHECK-DAG: arith.constant 0.000000e+00 : f32
 //       CHECK: %[[M:.*]] = memref.subview
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
index e296200..ca91dab 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
@@ -222,7 +222,7 @@
   SymbolTable symbolTable(module);
   InterfaceResourceMap interfaceToResourceInfo;
 
-  // We insert each new global variable at the begining of the module,
+  // We insert each new global variable at the beginning of the module,
   // therefore, to preserve the original order, we process all functions and all
   // subspan ops in the reverse order.
   auto functions = llvm::to_vector(module.getOps<func::FuncOp>());
@@ -735,8 +735,8 @@
   patterns.add<HALInterfaceLoadConstantConverter>(typeConverter, context,
                                                   supportsAssume);
 
-  // Performs a prelimiary step to analyze all hal.interface.binding.subspan ops
-  // and creates spirv.GlobalVariables.
+  // Performs a preliminary step to analyze all hal.interface.binding.subspan
+  // ops and creates spirv.GlobalVariables.
   InterfaceResourceMap interfaceToResourceVars =
       useIndirectBindings ? createIndirectResourceVariables(moduleOp)
                           : createResourceVariables(moduleOp);
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.h b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.h
index 6d59589..7e0af22 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.h
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.h
@@ -88,7 +88,7 @@
 /// `translation_info` attribute to the entry point containing `rootOp` and a
 /// `lowering_config` attribute to `rootOp`.
 ///
-/// Returns success when a configuration is successfullly attached as attribute.
+/// Returns success when a configuration is successfully attached as attribute.
 /// Returns failure otherwise.
 
 LogicalResult setAdrenoCodeGenConfig(IREE::GPU::TargetAttr target,
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp
index d2c9a06..4a38927 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp
@@ -148,7 +148,7 @@
 addSPIRVBufferizePasses(OpPassManager &funcPassManager,
                         BufferizationOptions::AllocationFn allocationFn) {
   // Resolve dim ops first so that we don't have compute Linalg ops lingering on
-  // becuase of dim op usage. This avoids bufferizing those compute ops just for
+  // because of dim op usage. This avoids bufferizing those compute ops just for
   // their shape dimensions.
   funcPassManager.addPass(memref::createResolveShapedTypeResultDimsPass());
   addBufferizePasses(funcPassManager, allocationFn);
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp
index 1eb3469..819b31c 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp
@@ -106,8 +106,8 @@
   auto funcOp = getOperation();
 
   // Collect all storage buffer subspan ops with 1-D static shapes. We only need
-  // to handle such cases here--high-D static shapes are expected to be flattend
-  // into 1-D by a previous pass.
+  // to handle such cases here--high-D static shapes are expected to be
+  // flattened into 1-D by a previous pass.
   SmallVector<IREE::HAL::InterfaceBindingSubspanOp> subspanOps;
   funcOp.walk([&](IREE::HAL::InterfaceBindingSubspanOp subspanOp) {
     if (is1DStaticShapedStorageBuffer(subspanOp)) {
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMaterializeExecutableConditions.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMaterializeExecutableConditions.cpp
index 4075085..2c0e2a2 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMaterializeExecutableConditions.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMaterializeExecutableConditions.cpp
@@ -44,7 +44,7 @@
   // Note that i32 or i1 is assumed to always exist and does not appear in
   // this bitfield.
   uint32_t computeInt;
-  // Storage bitwidth requirement bitfiled:
+  // Storage bitwidth requirement bitfield:
   // * 0b01: 8-bit
   // * 0b10: 16-bit
   uint32_t storage;
@@ -169,7 +169,7 @@
 
     // We explicitly perform exact match here given that 1) we need to have the
     // corresponding query in the runtime, and 2) we are not using a lot of
-    // configuarations in CodeGen yet.
+    // configurations in CodeGen yet.
     if (inputType.isF16() && outputType.isF16()) {
       if (mSize == 16 && nSize == 16 && kSize == 16) {
         features.coopMatrix |= 0b1;
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp
index 6e3b1a0..a9d4619 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp
@@ -362,7 +362,7 @@
       return WalkResult::advance();
     });
     if (!rootOp) {
-      funcOp.emitError("expected lowering confg on a (batch) matmul op");
+      funcOp.emitError("expected lowering config on a (batch) matmul op");
       return signalPassFailure();
     }
 
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
index cc89a03..1428d14 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
@@ -141,7 +141,7 @@
 
     // GPU subgroup MMA ops do not care about the memref element type. But we
     // still need to make sure we can load/store with good strides.
-    // The `leadingDimension` attributes specifies the stride (numer of
+    // The `leadingDimension` attributes specifies the stride (number of
     // *elements*) over the memref for the leading dimension.
     auto memrefType = cast<MemRefType>(memrefVal.getType());
     std::optional<unsigned> elementBits =
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir
index 6236e16..a5381cf 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir
@@ -79,7 +79,7 @@
 
 // -----
 
-// K dim size not divisble by 32.
+// K dim size not divisible by 32.
 
 func.func @batch_matmul_16x1024x1024x80(%3: tensor<16x1024x80xf16>, %4: tensor<16x80x1024xf16>) -> tensor<16x1024x1024xf16> {
   %cst = arith.constant 0.000000e+00 : f16
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
index 725002a..267074f 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
@@ -79,7 +79,7 @@
 
 // -----
 
-// K dim size not divisble by 32.
+// K dim size not divisible by 32.
 
 func.func @batch_matmul_16x1024x1024x80(%3: tensor<16x1024x80xf16>, %4: tensor<16x80x1024xf16>) -> tensor<16x1024x1024xf16> {
   %cst = arith.constant 0.000000e+00 : f16
diff --git a/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h b/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h
index 87fe5b1..9680289 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h
+++ b/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h
@@ -153,7 +153,7 @@
 bool hasAMDGPUFatRawBufferAddressSpace(MemRefType memrefType);
 
 /// Return true if the given memref has one of the global address spaces - no
-/// adress space, explicit integer 0, #gpu.address_space<global>, or
+/// address space, explicit integer 0, #gpu.address_space<global>, or
 /// #amdgpu.address_space<fat_raw_buffer>
 bool hasGlobalMemoryAddressSpace(MemRefType memrefType);
 
diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
index ad89a84..95393fd 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
@@ -881,7 +881,7 @@
   auto stepApplyOp = forOp.getStep().getDefiningOp<affine::AffineApplyOp>();
 
   if (!lbApplyOp || !stepApplyOp) {
-    // Try to see if this is a specical case where we have:
+    // Try to see if this is a special case where we have:
     //   scf.for %iv = %id to %ub step %count
     std::optional<unsigned> idDim;
     if (auto ifx = dyn_cast_if_present<ProcessorIDInterface>(
@@ -1177,7 +1177,7 @@
   return std::nullopt;
 }
 
-/// Get strides for row-major oredering of a tensor with the given `shape`.
+/// Get strides for row-major ordering of a tensor with the given `shape`.
 static SmallVector<int64_t> getStridesFromShape(ArrayRef<int64_t> shape) {
   if (shape.empty()) {
     return {};
diff --git a/compiler/src/iree/compiler/Codegen/VMVX/VMVXLowerExecutableTargetPass.cpp b/compiler/src/iree/compiler/Codegen/VMVX/VMVXLowerExecutableTargetPass.cpp
index 695af39..84d4091 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/VMVXLowerExecutableTargetPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/VMVX/VMVXLowerExecutableTargetPass.cpp
@@ -70,7 +70,7 @@
   auto target = IREE::HAL::ExecutableTargetAttr::lookup(funcOp);
   bool enableUKernels = target && hasUkernel(target.getConfiguration());
   switch (translationInfo.getDispatchLoweringPassPipeline()) {
-  // No pipleline specified, nothing to do.
+  // No pipeline specified, nothing to do.
   case IREE::Codegen::DispatchLoweringPassPipeline::None:
     return;
   case IREE::Codegen::DispatchLoweringPassPipeline::VMVXDefault:
diff --git a/compiler/src/iree/compiler/Codegen/VMVX/VMVXLowerLinalgMicrokernels.cpp b/compiler/src/iree/compiler/Codegen/VMVX/VMVXLowerLinalgMicrokernels.cpp
index 650e870..5cbf771 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/VMVXLowerLinalgMicrokernels.cpp
+++ b/compiler/src/iree/compiler/Codegen/VMVX/VMVXLowerLinalgMicrokernels.cpp
@@ -569,7 +569,7 @@
     OpOperand *result = op.getDpsInitOperand(0);
 
     // Returns an emitter for a generic binary compatible operation where
-    // |binaryOp| has a 1:1 correspondance with |opcode|.
+    // |binaryOp| has a 1:1 correspondence with |opcode|.
     auto configureGenericBinary =
         [&](Operation *binaryOp,
             StringRef opcode) -> std::optional<BinaryEmitter> {
@@ -751,7 +751,7 @@
     OpOperand *result = op.getDpsInitOperand(0);
 
     // Returns an emitter for a generic binary compatible operation where
-    // |binaryOp| has a 1:1 correspondance with |opcode|.
+    // |binaryOp| has a 1:1 correspondence with |opcode|.
     auto configureGenericUnary =
         [&](Operation *unaryOp,
             StringRef opcode) -> std::optional<UnaryEmitter> {