Add back vsliden and vslidep encoding

It is heavily used in the test/example code
Also add the documentation on how they are used

Change-Id: I7170161d037850a6f8c3959df1154dc36c0b0c7b
diff --git a/docs/kelvin_isa.md b/docs/kelvin_isa.md
index 9054d28..33cbf66 100644
--- a/docs/kelvin_isa.md
+++ b/docs/kelvin_isa.md
@@ -1716,29 +1716,54 @@
 
 Slide next register by index.
 
+For the horizontal mode, it treats the stripmine `vm` register based on
+`vs1` as a contiguous block, and only the first `index` elements from `vs2`
+will be used.
+For the vertical mode, each stripmine vector register `op_index` is mapped
+separatedly. it mimics the imaging tiling process shift of
+
+ ```
+   |--------|--------|
+   | 4xVLEN | 4xVLEN |
+   |  (vs1) |  (vs2) |
+   |--------|--------|
+```
+
+The vertical mode can also support the non-stripmine version to handle
+the last columns of the image.
+
 **Encodings**
 
+Horizontal slide:
+
 vslidehn.[b,h,w].[1,2,3,4].vv.m vd, vs1, vs2 \
+vslidehn.[b,h,w].[1,2,3,4].vx.m vd, vs1, xs2
+
+Vertical slide:
+
+vsliden.[b,h,w].[1,2,3,4].vv vd, vs1, vs2 \
 vslidevn.[b,h,w].[1,2,3,4].vv.m vd, vs1, vs2 \
-vslidehn.[b,h,w].[1,2,3,4].vx.m vd, vs1, xs2 \
 vslidevn.[b,h,w].[1,2,3,4].vx.m vd, vs1, xs2
 
 **Operation**
 
 ```
 assert vd != vs1 && vd != vs2
-if Op.h
-  va = {{vs1+3},{vs1+2},{vs1+1},{vs1+0}}
-  vb = {{vs2+0},{vs1+3},{vs1+2},{vs1+1}}
-if Op.v
-  va = {{vs1+3},{vs1+2},{vs1+1},{vs1+0}}
-  vb = {{vs2+3},{vs2+2},{vs2+1},{vs2+0}}
-for M in Op.m
+if Op.h  // A contiguous horizontal slide based on vs1
+  va = {{vs1},{vs1+1},{vs1+2},{vs1+3}}
+  vb = {{vs1+1},{vs1+2},{vs1+3},{vs2}}
+if Op.v  // vs1/vs2 vertical slide
+  va = {{vs1},{vs1+1},{vs1+2},{vs1+3}}
+  vb = {{vs2},{vs2+1},{vs2+2},{vs2+3}}
+
+sm = Op.m ? 4 : 1
+
+for M in sm
   for L in Op.typelen
     if (L + index < Op.typelen)
       vd[L] = va[M][L + index]
     else
-      vd[L] = vb[M][L + index - Op.typelen]
+      vd[L] = is_vx ? xs2 : vb[M][L + index - Op.typelen]
 ```
 
 --------------------------------------------------------------------------------
@@ -1747,27 +1772,55 @@
 
 Slide previous register by index.
 
+For the horizontal mode, it treats the stripmine `vm` register based on
+**`vs2`** as a contiguous block, and only the _LAST_ `index` elements from
+stripmine vm register based on `vs1` will be used AT THE BEGINNING.
+For the vertical mode, each stripmine vector register `op_index` is mapped
+separatedly. it mimics the imaging tiling process shift of
+
+```
+  |--------|--------|
+  | 4xVLEN | 4xVLEN |
+  |  (vs1) |  (vs2) |
+  |--------|--------|
+```
+
+The vertical mode can also support the non-stripmine version to handle
+the last columns of the image.
+
 **Encodings**
 
+Horizontal slide:
+
 vslidehp.[b,h,w].[1,2,3,4].vv.m vd, vs1, vs2 \
-vslidevp.[b,h,w].[1,2,3,4].vv.m vd, vs1, vs2
+vslidehp.[b,h,w].[1,2,3,4].vx.m vd, vs1, xs2
+
+Vertical slide:
+
+vslidep.[b,h,w].[1,2,3,4].vv vd, vs1, vs2 \
+vslidevp.[b,h,w].[1,2,3,4].vv.m vd, vs1, vs2 \
+vslidevp.[b,h,w].[1,2,3,4].vv.m vd, vs1, xs2
 
 **Operation**
 
 ```
 assert vd != vs1 && vd != vs2
-if Op.h
-  va = {{vs1+3},{vs1+2},{vs1+1},{vs1+0}}
-  vb = {{vs2+0},{vs1+3},{vs1+2},{vs1+1}}
-if Op.v
-  va = {{vs1+3},{vs1+2},{vs1+1},{vs1+0}}
-  vb = {{vs1+2},{vs1+1},{vs1+0},{vs2+3}}
-for M in Op.m
+
+if Op.h  // A continuous horizontal slide based on vs2
+  va = {{vs1+3},{vs2},{vs2+1},{vs2+2}}
+  vb = {{vs2},{vs2+1},{vs2+2},{vs2+3}}
+if Op.v  // vs1/vs2 vertical slide
+  va = {{vs1},{vs1+1},{vs1+2},{vs1+3}}
+  vb = {{vs2},{vs2+1},{vs2+2},{vs2+3}}
+
+sm = Op.m ? 4 : 1
+
+for M in sm
   for L in Op.typelen
-    if (L >= index)
-      vd[L] = va[M][L - index]
+    if (L < index)
+      vd[L] = va[M][Op.typelen + L - index]
     else
-      vd[L] = vb[M][Op.typelen + L - index]
+      vd[L] = is_vx ? xs2 : vb[M][L - index]
 ```
 
 --------------------------------------------------------------------------------
diff --git a/host_tools/encoding_main.cc b/host_tools/encoding_main.cc
index 57f5854..af3556d 100644
--- a/host_tools/encoding_main.cc
+++ b/host_tools/encoding_main.cc
@@ -964,16 +964,19 @@
   const uint32_t vmatch_base = VMatch(base);
   const uint32_t vmask_base = VMask(base);
 
-  std::vector<std::string> slide_group = {"vslidehn", "vslidevn", "vslidehp",
-                                          "vslidevp"};
+  std::vector<std::string> slide_group = {"vsliden", "vslidehn", "vslidevn",
+                                          "vslidep", "vslidehp", "vslidevp"};
 
   bool has_range = CheckVariant(name, slide_group);
   int range = has_range ? 3 : 0;
-  bool is_m = has_range;
+  bool is_m0 = CheckVariant(name, {"vsliden", "vslidep"});
+  bool is_m1 =
+      CheckVariant(name, {"vslidehn", "vslidehp", "vslidevn", "vslidevp"});
 
   for (int sz = 0; sz < 3; ++sz) {
     for (auto m : {false, true}) {
-      if (!m && is_m) continue;
+      if (!m && is_m1) continue;
+      if (m && is_m0) continue;
       for (int n = 0; n <= range; ++n) {
         for (auto x : {false, true}) {
           std::string op = name;
@@ -1131,8 +1134,10 @@
 
   Space();
   Comment("110 Shuffle");
+  Encode110("vsliden", 0);
   Encode110("vslidevn", 0);
   Encode110("vslidehn", 4);
+  Encode110("vslidep", 8);
   Encode110("vslidevp", 8);
   Encode110("vslidehp", 12);
   Encode110("vsel", 16);