Merge branch '25-rename-refs-to-ids'

See PR #35
DigitalCommons · Oct 28, 2024 · 7a118eb · 7a118eb
2 parents 0c818ec + 859958a
commit 7a118eb
Show file tree

Hide file tree

Showing 5 changed files with 213 additions and 19 deletions.
diff --git a/apps/back-end/src/routes.ts b/apps/back-end/src/routes.ts
@@ -118,15 +118,17 @@ export function MykomapRouter(
     },
 
     async getDatasetItem({
-      params: { datasetId, datasetItemId },
+      params: { datasetId, datasetItemIdOrIx },
       request,
       reply,
     }) {
       if (
+        // datasetItemIdOrIx could be either an ID or an Index. But for the purposes here,
+        // which is a stub implementation, we don't distinguish.
         !sendJson(
           request,
           reply,
-          filePath("datasets", datasetId, "items", String(datasetItemId)),
+          filePath("datasets", datasetId, "items", String(datasetItemIdOrIx)),
         )
       )
         throw new TsRestResponseError(contract.getDatasetItem, {

diff --git a/libs/common/src/api/contract.ts b/libs/common/src/api/contract.ts
@@ -26,11 +26,22 @@ function ZodRegex(rx: RegExp, message: string) {
 
 const Location = z.array(z.number()).min(2).max(2);
 const DatasetId = z.string().regex(Rx.UrlSafeBase64);
-const DatasetItemId = z.coerce.number().int().nonnegative();
 const DatasetItem = z.object({}).passthrough();
 const Dataset = z.array(Location);
 const NCName = ZodRegex(Rx.NCName, "Invalid NCName format");
 const QName = ZodRegex(Rx.QName, "Invalid QName format");
+const DatasetItemId = ZodRegex(
+  Rx.DatasetItemId,
+  "Invalid DatasetItemId format",
+);
+const DatasetItemIx = ZodRegex(
+  Rx.DatasetItemIx,
+  "Invalid DatasetItemIx format",
+);
+const DatasetItemIdOrIx = ZodRegex(
+  Rx.DatasetItemIdOrIx,
+  "Invalid DatasetItemIdOrIx format",
+);
 // Developer note: PrefixUri is regex based, as it attempts to avoid the .url() deficiencies in
 // https://github.com/colinhacks/zod/issues/2236. But also our concept of a URI is narrowed, see
 // documentation for Rx.PrefixUri.
@@ -64,6 +75,8 @@ export const schemas = {
   ConfigData,
   DatasetId,
   DatasetItemId,
+  DatasetItemIdOrIx,
+  DatasetItemIx,
   DatasetItem,
   Dataset,
   I18nVocabDefs,
@@ -141,16 +154,16 @@ export const contract = c.router({
   },
   getDatasetItem: {
     method: "GET",
-    path: "/dataset/:datasetId/item/:datasetItemId",
+    path: "/dataset/:datasetId/item/:datasetItemIdOrIx",
     summary: "obtains a dataset item by its unique ID",
     description:
-      "Obtains a single dataset item by its ID and the dataset's ID.",
+      "Obtains a single dataset item by its ID or its index, and the dataset's ID.",
     pathParams: z.object({
       datasetId: DatasetId.openapi({
         // description: "uniquely specifies the dataset wanted",
       }),
-      datasetItemId: DatasetItemId.openapi({
-        // description: "uniquely specifies the dataset item wanted",
+      datasetItemIdOrIx: DatasetItemId.openapi({
+        // description: "uniquely specifies the dataset item wanted within the dataset",
       }),
     }),
     responses: {

diff --git a/libs/common/src/api/mykomap-openapi.json b/libs/common/src/api/mykomap-openapi.json
@@ -127,9 +127,7 @@
                 "schema": {
                   "type": "array",
                   "items": {
-                    "type": "integer",
-                    "minimum": 0,
-                    "nullable": true
+                    "type": "string"
                   }
                 }
               }
@@ -176,9 +174,9 @@
         }
       }
     },
-    "/dataset/{datasetId}/item/{datasetItemId}": {
+    "/dataset/{datasetId}/item/{datasetItemIdOrIx}": {
       "get": {
-        "description": "Obtains a single dataset item by its ID and the dataset's ID.",
+        "description": "Obtains a single dataset item by its ID or its index, and the dataset's ID.",
         "summary": "obtains a dataset item by its unique ID",
         "tags": [],
         "parameters": [
@@ -192,13 +190,11 @@
             }
           },
           {
-            "name": "datasetItemId",
+            "name": "datasetItemIdOrIx",
             "in": "path",
             "required": true,
             "schema": {
-              "type": "integer",
-              "minimum": 0,
-              "nullable": true
+              "type": "string"
             }
           }
         ],

diff --git a/libs/common/src/rxdefs.ts b/libs/common/src/rxdefs.ts
@@ -1,6 +1,6 @@
 /** Regular expression definitions */
 import RxUtils from "./rxutils.js";
-const { min, seq, conc, maybe } = RxUtils;
+const { min, seq, conc, maybe, oneOf } = RxUtils;
 
 /** A regex testing for an *URL-safe* base64 string (RFC4648 sect 5) */
 export const UrlSafeBase64 = /^[A-Za-z0-9_-]+$/imsu;
@@ -131,3 +131,53 @@ const Path = seq(min(0, seq("/", PathSegment)), maybe("/"));
  *
  */
 export const PrefixUri = conc(Scheme, Domain, Path, maybe("#"));
+
+/** A valid (non-percent-encoded) URI path character, excluding `@`
+ *
+ * Characters can be any of:
+ * - unreserved chars /[A-Za-z0-9._~-]/
+ * - percent-encoded chars i./%[A-Za-z0-9]{2}/
+ * - sub-delims chars /[!$&'()*+,;=]/
+ * - colon chars /[:]/
+ *
+ * Modified version of PathChar above.
+ */
+const NonAtPathChar = /[A-Za-z0-9._~!$&'()*+,;=:-]/;
+
+/** Match a DatasetItemId (identifier)
+ *
+ * This ID needs to be flexible enough to match user-supplied IDs. It can be
+ * anything an URI path segment contains. Percent-encoding is needed for
+ * anything not *literally* allowed in a path segment.
+ *
+ * It also needs to be distinct from DatasetItemIx, however. Therefore we don't use
+ * the exact same definition as PathSegment: we disallow a `@` character at the start. If the
+ * ID needs such a thing, it must use percent encoding for that character, i.e.
+ * `%40`.
+ *
+ */
+export const DatasetItemId = seq(
+  min(1, PctEnc, NonAtPathChar),
+  min(0, PctEnc, PathChar),
+);
+
+/** Match a DatasetItemIx (index)
+ *
+ * This is basically a non-negative integer, representing an offset into the dataset.
+ * However, it needs to be a bit distinct from a DatasetItemId, so the rule is that
+ * it starts with an `@` symbol.
+ *
+ */
+export const DatasetItemIx = seq(/@/, min(1, /\d/));
+
+/** Match a DatasetItemId or a DatasetItemIx,
+ *
+ * Nominally this means:
+ *
+ *    oneOf(DatasetItemId, DatasetItemIx);
+ *
+ * However, we can simplify that just by using PathSegment, whch amounts to the
+ * same thing (as the former is designed to match PathSegment patterns, but exclude
+ * DatasetItemIx patterns, so recombined they are equivalent to PathSegment)
+ */
+export const DatasetItemIdOrIx = PathSegment;
diff --git a/libs/common/test/validation.test.ts b/libs/common/test/validation.test.ts
@@ -7,7 +7,16 @@ import { slurpJsonSync } from "./file-utils.js";
 import { globSync } from "glob";
 import { join } from "node:path";
 
-const { DatasetId, QName, PrefixUri, Iso639Set1Code, ConfigData } = schemas;
+const {
+  DatasetId,
+  DatasetItemId,
+  DatasetItemIx,
+  DatasetItemIdOrIx,
+  QName,
+  PrefixUri,
+  Iso639Set1Code,
+  ConfigData,
+} = schemas;
 
 /** Creates expectations on validating each of an array of cases
  *
@@ -52,7 +61,131 @@ test("testing DatasetId validation", async (t) => {
     "01234",
     "Quick-Brown-Fox_42",
   ]);
-  expectInvalid(DatasetId, ["", " ", "/", "?", "&", ":", ".", "="]);
+  expectInvalid(DatasetId, [
+    "",
+    " ",
+    "/",
+    "?",
+    "&",
+    ":",
+    ".",
+    "=",
+    "a a",
+    " a",
+    "a ",
+  ]);
+});
+
+test("testing DatasetItemId validation", async (t) => {
+  expectValid(DatasetItemId, [
+    "0",
+    "A",
+    "z",
+    "_",
+    "-",
+    "-1",
+    "01234",
+    "Quick-Brown-Fox_42",
+    "Azaz09._~!$&'()*+,;=:@-",
+    "%61", // `a`
+    "%61%41", // `aA`
+    "%61%20%0a", // `a <line feed>`
+    "%401", // `@1`
+    "A@",
+    "%20foo%20bar%20", // embedded spaces allowed if encoded
+  ]);
+  expectInvalid(DatasetItemId, [
+    "",
+    " ",
+    "/",
+    "@",
+    "@1",
+    "@12334567890",
+    " foo", // no literal spaces
+    "foo ",
+    " foo ",
+  ]);
+});
+
+test("testing DatasetItemIx validation", async (t) => {
+  expectValid(DatasetItemIx, ["@1", "@12334567890"]);
+  expectInvalid(DatasetItemIx, [
+    "",
+    " ",
+    "/",
+    "@",
+    "0",
+    "A",
+    "z",
+    "_",
+    "-",
+    "-1",
+    "01234",
+    "Quick-Brown-Fox_42",
+    "Azaz09._~!$&'()*+,;=:@-",
+    "%61", // `a`
+    "%61%41", // `aA`
+    "%61%20%0a", // `a <line feed>`
+    "%401", // `@1`
+    "A@",
+    " @1", // no leading or trailing or embedded spaces
+    "@1 ",
+    " @1 ",
+    "@ 1",
+    "@1 1",
+    "%401", // No percent encoding
+    "@%31",
+  ]);
+});
+
+test("testing DatasetItemIdOrIx validation", async (t) => {
+  expectValid(DatasetItemIdOrIx, [
+    "0",
+    "A",
+    "z",
+    "_",
+    "-",
+    "-1",
+    "01234",
+    "Quick-Brown-Fox_42",
+    "Azaz09._~!$&'()*+,;=:@-",
+    "%61", // `a`
+    "%61%41", // `aA`
+    "%61%20%0a", // `a <line feed>`
+    "%401", // `@1`
+    "A@",
+    "@",
+    "@-1",
+    "-@1",
+    "0",
+    "-1",
+    "A",
+    "z",
+    "_",
+    "-",
+    "01234",
+    "Quick-Brown-Fox_42",
+    "Azaz09._~!$&'()*+,;=:@-",
+    "%61", // `a`
+    "%61%41", // `aA`
+    "%61%20%0a", // `a <line feed>`
+    "%401", // `@1`
+    "A@",
+    "@12334567890",
+  ]);
+  expectInvalid(DatasetItemIdOrIx, [
+    "",
+    " ",
+    "/",
+    " @1", // no leading or trailing or embedded spaces
+    "@1 ",
+    " @1 ",
+    "@ 1",
+    "@1 1",
+    " foo",
+    "foo ",
+    " foo ",
+  ]);
 });
 
 test("testing QName validation", async (t) => {