Move ClockGate to use HasBlackBoxResource.

- Bring in lowRISC primitives from
  f243e6802143374741739d2c164c4f2f61697669
- Move ClockGate to use HasBlackBoxResource
- Adjustments for CoreMiniAxi and Matcha to accomadate changes.

Change-Id: I4f9dc17cd6d91493d1a09f9069a4daf01a853858
diff --git a/hdl/chisel/kelvin.core.in b/hdl/chisel/kelvin.core.in
index 95f7fa3..8e19e7b 100644
--- a/hdl/chisel/kelvin.core.in
+++ b/hdl/chisel/kelvin.core.in
@@ -8,7 +8,6 @@
   files_rtl:
     files:
       - kelvin.sv
-      - ClockGate.sv
       - Sram_1rw_256x256.v
       - Sram_1rwm_256x288.v
     file_type: systemVerilogSource
diff --git a/hdl/chisel/src/chai/BUILD b/hdl/chisel/src/chai/BUILD
index 2fe1b1e..0591bf8 100644
--- a/hdl/chisel/src/chai/BUILD
+++ b/hdl/chisel/src/chai/BUILD
@@ -37,7 +37,6 @@
     emit_class = "chai.EmitChAI",
     module_name = "ChAI",
     verilog_deps = [
-        "//hdl/verilog:clock_gate",
         "//hdl/verilog:sram_1rw_256x256",
         "//hdl/verilog:sram_1rw_256x288",
         "//hdl/verilog:tlul_adapter_sram",
diff --git a/hdl/chisel/src/chai/ChAI.scala b/hdl/chisel/src/chai/ChAI.scala
index 91aaf5c..d8d0683 100644
--- a/hdl/chisel/src/chai/ChAI.scala
+++ b/hdl/chisel/src/chai/ChAI.scala
@@ -18,6 +18,8 @@
 import chisel3.util._
 
 import bus._
+import java.nio.file.{Paths, Files, StandardOpenOption}
+import java.nio.charset.{StandardCharsets}
 import _root_.circt.stage.ChiselStage
 
 case class Parameters() {
@@ -118,5 +120,33 @@
 
 object EmitChAI extends App {
   val p = new Parameters()
-  ChiselStage.emitSystemVerilogFile(new ChAI(p), args)
+  var chiselArgs = List[String]()
+  var targetDir: Option[String] = None
+  for (arg <- args) {
+    if (arg.startsWith("--target-dir")) {
+      targetDir = Some(arg.split("=")(1))
+    } else {
+      chiselArgs = chiselArgs :+ arg
+    }
+  }
+
+  lazy val core = new ChAI(p)
+  val systemVerilogSource = ChiselStage.emitSystemVerilog(
+    core, chiselArgs.toArray)
+  // CIRCT adds a little extra data to the sv file at the end. Remove it as we
+  // don't want it (it prevents the sv from being verilated).
+  val resourcesSeparator =
+      "// ----- 8< ----- FILE \"firrtl_black_box_resource_files.f\" ----- 8< -----"
+  val strippedVerilogSource = systemVerilogSource.split(resourcesSeparator)(0)
+
+  targetDir match {
+    case Some(targetDir) => {
+      var svRet = Files.write(
+          Paths.get(targetDir + "/" + core.name + ".sv"),
+          strippedVerilogSource.getBytes(StandardCharsets.UTF_8),
+          StandardOpenOption.CREATE)
+      ()
+    }
+    case None => ()
+  }
 }
diff --git a/hdl/chisel/src/kelvin/BUILD b/hdl/chisel/src/kelvin/BUILD
index 35aac9a..75af30b 100644
--- a/hdl/chisel/src/kelvin/BUILD
+++ b/hdl/chisel/src/kelvin/BUILD
@@ -114,10 +114,37 @@
     ],
 )
 
+# Dependencies for `clock_gate` target. This resources need to be in their own
+# target so resource_strip_prefix can be set correctly.
+chisel_library(
+    name = "clock_gate_deps",
+    srcs = [],
+    resources = [
+        "//third_party/ip/lowrisc:prim_clock_gating.sv",
+        "//third_party/ip/lowrisc:prim_generic_clock_gating.sv",
+        "//third_party/ip/lowrisc:prim_xilinx_clock_gating.sv",
+    ],
+    resource_strip_prefix = "third_party/ip",
+    visibility = ["//visibility:private"],
+)
+
+chisel_library(
+    name = "clock_gate",
+    srcs = [
+        "ClockGate.scala",
+    ],
+    resources = [
+        "//hdl/verilog:ClockGate.sv",
+    ],
+    resource_strip_prefix = "hdl/verilog",
+    deps = [
+        ":clock_gate_deps",
+    ],
+)
+
 chisel_library(
     name = "kelvin_base",
     srcs = [
-        "ClockGate.scala",
         "DBus2Axi.scala",
         "DBusMux.scala",
         "IBus2Axi.scala",
@@ -128,6 +155,7 @@
         "Parameters.scala",
     ],
     deps = [
+        ":clock_gate",
         "//hdl/chisel/src/bus",
         "//hdl/chisel/src/common",
     ],
@@ -209,6 +237,7 @@
         "TCM.scala",
     ],
     deps = [
+        ":clock_gate",
         ":kelvin_base",
         ":kelvin_scalar",
         ":kelvin_vector",
@@ -289,9 +318,6 @@
     ],
     module_name = "CoreMiniAxi",
     systemc = False,
-    verilog_deps = [
-        "//hdl/verilog:clock_gate",
-    ],
     verilog_file_path = "CoreMiniAxi.sv",
 )
 
diff --git a/hdl/chisel/src/kelvin/ClockGate.scala b/hdl/chisel/src/kelvin/ClockGate.scala
index d66a7b0..f56654e 100644
--- a/hdl/chisel/src/kelvin/ClockGate.scala
+++ b/hdl/chisel/src/kelvin/ClockGate.scala
@@ -16,12 +16,15 @@
 
 import chisel3._
 import chisel3.util._
-import common._
 
-class ClockGate extends BlackBox {
+class ClockGate extends BlackBox with HasBlackBoxResource {
   val io = IO(new Bundle {
     val clk_i  = Input(Clock())
     val enable = Input(Bool())  // '1' passthrough, '0' disable.
     val clk_o  = Output(Clock())
   })
+  addResource("ClockGate.sv")
+  addResource("lowrisc/prim_clock_gating.sv")
+  addResource("lowrisc/prim_generic_clock_gating.sv")
+  addResource("lowrisc/prim_xilinx_clock_gating.sv")
 }
diff --git a/hdl/chisel/src/matcha/BUILD b/hdl/chisel/src/matcha/BUILD
index 30fe3ef..79647b9 100644
--- a/hdl/chisel/src/matcha/BUILD
+++ b/hdl/chisel/src/matcha/BUILD
@@ -27,6 +27,7 @@
     deps = [
         "//hdl/chisel/src/bus:bus",
         "//hdl/chisel/src/common:common",
+        "//hdl/chisel/src/kelvin:clock_gate",
         "//hdl/chisel/src/kelvin:kelvin",
         "//hdl/chisel/src/kelvin:kelvin_base",
     ],
@@ -38,7 +39,6 @@
     emit_class = "matcha.EmitKelvin",
     module_name = "Kelvin",
     verilog_deps = [
-        "//hdl/verilog:clock_gate",
         "//hdl/verilog:sram_1rw_256x256",
         "//hdl/verilog:sram_1rw_256x288",
     ],
@@ -53,7 +53,6 @@
     emit_class = "matcha.EmitKelvin",
     module_name = "KelvinScalar",
     verilog_deps = [
-        "//hdl/verilog:clock_gate",
         "//hdl/verilog:sram_1rw_256x256",
         "//hdl/verilog:sram_1rw_256x288",
     ],
@@ -73,7 +72,6 @@
     emit_class = "matcha.EmitKelvin",
     module_name = "KelvinMini",
     verilog_deps = [
-        "//hdl/verilog:clock_gate",
         "//hdl/verilog:sram_1rw_256x256",
         "//hdl/verilog:sram_1rw_256x288",
     ],
diff --git a/hdl/verilog/BUILD b/hdl/verilog/BUILD
index 121d0f7..bedcf9d 100644
--- a/hdl/verilog/BUILD
+++ b/hdl/verilog/BUILD
@@ -31,7 +31,7 @@
     srcs = ["ClockGate.sv"],
     visibility = ["//visibility:public"],
     deps = [
-        "//third_party/ip/lowrisc:prim",
+        "//third_party/ip/lowrisc:prim_clock_gate",
     ],
 )
 
diff --git a/hdl/verilog/ClockGate.sv b/hdl/verilog/ClockGate.sv
index 02c7f09..90170c2 100644
--- a/hdl/verilog/ClockGate.sv
+++ b/hdl/verilog/ClockGate.sv
@@ -18,7 +18,7 @@
   output        clk_o
 );
 
-prim_clock_gating u_cg(
+lowrisc_prim_clock_gating u_cg(
   .clk_i(clk_i),
   .en_i(enable),
   .test_en_i('0),
diff --git a/third_party/ip/lowrisc/BUILD b/third_party/ip/lowrisc/BUILD
index 0a97a9c..c97c602 100644
--- a/third_party/ip/lowrisc/BUILD
+++ b/third_party/ip/lowrisc/BUILD
@@ -14,6 +14,25 @@
 
 load("@rules_hdl//verilog:providers.bzl", "verilog_library")
 
+exports_files(
+    [
+        "prim_clock_gating.sv",
+        "prim_generic_clock_gating.sv",
+        "prim_xilinx_clock_gating.sv",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+verilog_library(
+    name = "prim_clock_gate",
+    srcs = [
+        "prim_clock_gating.sv",
+        "prim_generic_clock_gating.sv",
+        "prim_xilinx_clock_gating.sv",
+    ],
+    visibility = ["//visibility:public"],
+)
+
 verilog_library(
     name = "prim_generic",
     srcs = [
diff --git a/third_party/ip/lowrisc/README.md b/third_party/ip/lowrisc/README.md
new file mode 100644
index 0000000..5b8b32a
--- /dev/null
+++ b/third_party/ip/lowrisc/README.md
@@ -0,0 +1,4 @@
+# LowRISC OpenTitan
+
+Kelvin leverages some primitives from [LowRISC OpenTitan](https://github.com/lowRISC/opentitan).
+We include these in the Kelvin tree directly to simplify the SystemVerilog bundling process.
diff --git a/third_party/ip/lowrisc/prim_clock_gating.sv b/third_party/ip/lowrisc/prim_clock_gating.sv
new file mode 100644
index 0000000..954253a
--- /dev/null
+++ b/third_party/ip/lowrisc/prim_clock_gating.sv
@@ -0,0 +1,44 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Abstract primitives wrapper.
+//
+// This file is a stop-gap until the DV file list is generated by FuseSoC.
+// Its contents are taken from the file which would be generated by FuseSoC.
+// https://github.com/lowRISC/ibex/issues/893
+
+package lowrisc_prim_pkg;
+
+  // Implementation target specialization
+  typedef enum integer {
+    ImplGeneric,
+    ImplXilinx
+  } impl_e;
+endpackage : lowrisc_prim_pkg
+
+`ifndef LOWRISC_PRIM_DEFAULT_IMPL
+  `define LOWRISC_PRIM_DEFAULT_IMPL lowrisc_prim_pkg::ImplGeneric
+`endif
+
+module lowrisc_prim_clock_gating (
+  input        clk_i,
+  input        en_i,
+  input        test_en_i,
+  output logic clk_o
+);
+  parameter lowrisc_prim_pkg::impl_e Impl = `LOWRISC_PRIM_DEFAULT_IMPL;
+
+  if (Impl == lowrisc_prim_pkg::ImplGeneric) begin : gen_generic
+    lowrisc_prim_generic_clock_gating u_impl_generic (
+      .*
+    );
+  end else if (Impl == lowrisc_prim_pkg::ImplXilinx) begin : gen_xilinx
+    lowrisc_prim_xilinx_clock_gating u_impl_xilinx (
+      .*
+    );
+  end else begin : gen_failure
+    // TODO: Find code that works across tools and causes a compile failure
+  end
+
+endmodule
diff --git a/third_party/ip/lowrisc/prim_generic_clock_gating.sv b/third_party/ip/lowrisc/prim_generic_clock_gating.sv
new file mode 100644
index 0000000..4c2e5ff
--- /dev/null
+++ b/third_party/ip/lowrisc/prim_generic_clock_gating.sv
@@ -0,0 +1,28 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Common Library: Clock Gating cell
+//
+// The logic assumes that en_i is synchronized (so the instantiation site might need to put a
+// synchronizer before en_i).
+
+module lowrisc_prim_generic_clock_gating #(
+  parameter bit NoFpgaGate = 1'b0, // this parameter has no function in generic
+  parameter bit FpgaBufGlobal = 1'b1 // this parameter has no function in generic
+) (
+  input        clk_i,
+  input        en_i,
+  input        test_en_i,
+  output logic clk_o
+);
+
+  logic en_latch /* verilator clock_enable */;
+  always_latch begin
+    if (!clk_i) begin
+      en_latch = en_i | test_en_i;
+    end
+  end
+  assign clk_o = en_latch & clk_i;
+
+endmodule
diff --git a/third_party/ip/lowrisc/prim_xilinx_clock_gating.sv b/third_party/ip/lowrisc/prim_xilinx_clock_gating.sv
new file mode 100644
index 0000000..db8abea
--- /dev/null
+++ b/third_party/ip/lowrisc/prim_xilinx_clock_gating.sv
@@ -0,0 +1,44 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+module lowrisc_prim_xilinx_clock_gating #(
+  parameter bit NoFpgaGate = 1'b0,
+  parameter bit FpgaBufGlobal = 1'b1
+) (
+  input        clk_i,
+  input        en_i,
+  input        test_en_i,
+  output logic clk_o
+);
+
+  if (NoFpgaGate) begin : gen_no_gate
+    assign clk_o = clk_i;
+  end else begin : gen_gate
+    if (FpgaBufGlobal) begin : gen_bufgce
+      // By default, we use BUFG(CE)s, i.e., global clock buffers (with enable input).
+      // These resources are scarce (32 in monolithic 7 series devices) and under some
+      // circumstances cannot be cascaded. They should especially be used for (gating)
+      // clocks that span big parts of the design/multiple clock regions.
+      BUFGCE #(
+        .SIM_DEVICE("ULTRASCALE_PLUS")
+      ) u_bufgce (
+        .I (clk_i),
+        .CE(en_i | test_en_i),
+        .O (clk_o)
+      );
+    end else begin : gen_bufhce
+      // The BUFH(CE) is a horizontal or local clock buffer (with enable input). Every clock
+      // region has 12 of these buffers. They should be used for (gating) clocks that are
+      // being used locally.
+      BUFHCE u_bufhce (
+        .I (clk_i),
+        .CE(en_i | test_en_i),
+        .O (clk_o)
+      );
+    end
+  end
+
+
+
+endmodule