Fix promote i1 to i8 order to avoid invalid IR (#6119)

It turns out the first use is not guaranteed to be the first use in order.
Updated so that we can insert just before the first use.
diff --git a/iree/compiler/Dialect/Flow/Transforms/PromoteI1ToI8Pass.cpp b/iree/compiler/Dialect/Flow/Transforms/PromoteI1ToI8Pass.cpp
index c134793..02afb20 100644
--- a/iree/compiler/Dialect/Flow/Transforms/PromoteI1ToI8Pass.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/PromoteI1ToI8Pass.cpp
@@ -59,7 +59,13 @@
     // needed as it is possible we are reusing an existing ConstantOp
     // containing the same values that occurs in a future line. Moving to the
     // first use case avoids declaring out of order operations.
-    rewriter.setInsertionPoint(*op.getResult().getUsers().begin());
+    Operation *firstUser = *op.getResult().getUsers().begin();
+    for (auto checkOp : op.getResult().getUsers()) {
+      if (checkOp->isBeforeInBlock(firstUser)) {
+        firstUser = checkOp;
+      }
+    }
+    rewriter.setInsertionPoint(firstUser);
 
     auto initTensor = rewriter.create<linalg::InitTensorOp>(
         loc, ArrayRef<Value>({}), resultTy.getShape(),