Merge pull request #32 from DigitalCommons/add-vocabs-route

Add vocabs API route
DigitalCommons · Oct 22, 2024 · 7bb9fb1 · 7bb9fb1
2 parents 537e5c3 + ddcf330
commit 7bb9fb1
Show file tree

Hide file tree

Showing 8 changed files with 757 additions and 61 deletions.
diff --git a/apps/back-end/src/routes.ts b/apps/back-end/src/routes.ts
@@ -137,6 +137,18 @@ export function MykomapRouter(
       return reply;
     },
 
+    async getConfig({ params: { datasetId }, request, reply }) {
+      // Validate the parameters some more
+
+      if (!sendJson(request, reply, filePath("datasets", datasetId, "config")))
+        throw new TsRestResponseError(contract.getDataset, {
+          status: 404,
+          body: { message: `unknown datasetId '${datasetId}'` },
+        });
+
+      return reply;
+    },
+
     async getVersion(req) {
       return {
         body: __BUILD_INFO__,

diff --git a/apps/back-end/test/data/datasets/test-A/config.json b/apps/back-end/test/data/datasets/test-A/config.json
@@ -0,0 +1,30 @@
+{
+  "prefixes": {
+    "https://dev.lod.coop/essglobal/2.1/standard/activities-modified/": "am"
+  },
+  "vocabs": {
+    "am": {
+      "en": {
+        "title": "Activities (Modified)",
+        "terms": {
+          "AM10": "Arts, Media, Culture & Leisure",
+          "AM20": "Campaigning, Activism & Advocacy",
+          "AM30": "Community & Collective Spaces",
+          "AM40": "Education",
+          "AM50": "Energy",
+          "AM60": "Food",
+          "AM70": "Goods & Services",
+          "AM80": "Health, Social Care & Wellbeing",
+          "AM90": "Housing",
+          "AM100": "Money & Finance",
+          "AM110": "Nature, Conservation & Environment",
+          "AM120": "Reduce, Reuse, Repair & Recycle",
+          "AM130": "Agriculture",
+          "AM140": "Industry",
+          "AM150": "Utilities",
+          "AM160": "Transport"
+        }
+      }
+    }
+  }
+}
diff --git a/apps/back-end/test/validation.test.ts b/apps/back-end/test/validation.test.ts
@@ -3,7 +3,7 @@
 import { expect, test } from "vitest";
 import { schemas } from "@mykomap/common";
 
-const { DatasetId, QName } = schemas;
+const { DatasetId, QName, PrefixUri } = schemas;
 
 test("testing DatasetId validation", async (t) => {
   const expectTrue = ["0", "A", "z", "_", "-", "01234", "Quick-Brown-Fox_42"];
@@ -42,3 +42,86 @@ test("testing QName validation", async (t) => {
     expect(QName.safeParse(it).success, `parsing '${it}'`).toBeFalsy(),
   );
 });
+
+test("testing PrefixUri validation", async (t) => {
+  const expectTrue = [
+    "http://a",
+    "http://a/",
+    "http://e.a",
+    "http://e.a/",
+    "http://example.com",
+    "http://EXAMPLE.COM",
+    "http://example.Com",
+    "https://example.com",
+    "http://www.example.com",
+    "http://www.example.com/",
+    "https://w3-example/",
+    "https://w3-example1.com/",
+    "http://example.com#",
+    "http://example.com/#",
+    "http://example.com/foo",
+    "http://example.com/foo/",
+    "http://example.com/foo/bar",
+    "http://example.com/foo/bar/",
+    "http://example.com/foo#",
+    "http://example.com/foo/#",
+    "http://example.com/foo/bar#",
+    "http://example.com/foo/bar/#",
+    "http://example.com/%2e%4F",
+    "http://example.com/A-Za-z0-9._~!$&'()*+,;=:@-%20/",
+  ];
+  const expectFalse = [
+    "http://",
+    "http://-",
+    "http://.",
+    "http://.a",
+    "http://-a",
+    "http://a-",
+    "http://a.",
+    "http://3a.com",
+    "http://a.3com",
+    "http://-a.com",
+    "http://a-.com",
+    "http://a.-com",
+    "http://a.-com",
+    "http://a.com-",
+    "http://a_b",
+    "http://a_b.c",
+    "http://a@b",
+    "http://[email protected]",
+    "http://a:b.c",
+    "http://b.c:8000",
+    "HTTP://example.com",
+    "Http://example.com",
+    "htt://example.com",
+    "httpss://example.com",
+    "http//example.com",
+    "http:/example.com",
+    "http:///example.com",
+    "http//:/example.com",
+    "http/example.com",
+    "http/:example.com",
+    "http//:example.com",
+    "http://example.com?",
+    "http://example.com/?",
+    "http://example.com?q",
+    "http://example.com/?q",
+    "http://example.com/foo?q",
+    "http://example.com/foo/?q",
+    "http://example.com/#?",
+    "http://example.com#?",
+    "http://example.com#q",
+    "http://example.com/#q",
+    'http://example.com/foo"bar',
+    "http://example.com//foobar",
+    "http://example.com/foo//bar",
+    "http://example.com/foobar//",
+  ];
+
+  expectTrue.forEach((it) =>
+    expect(PrefixUri.safeParse(it).success, `parsing '${it}'`).toBeTruthy(),
+  );
+  expectFalse.forEach((it) =>
+    expect(PrefixUri.safeParse(it).success, `parsing '${it}'`).toBeFalsy(),
+  );
+});
diff --git a/libs/common/src/api/contract.ts b/libs/common/src/api/contract.ts
@@ -1,73 +1,55 @@
 import { initContract } from "@ts-rest/core";
 import { z } from "zod";
 import { extendZodWithOpenApi } from "@anatine/zod-openapi";
+import * as Rx from "../rxdefs.js";
+import RxUtils from "../rxutils.js";
+import { Iso639Set1Codes } from "../iso639-1.js";
 
 extendZodWithOpenApi(z);
 
 const c = initContract();
 
-/** A regex testing for an *URL-safe* base64 string (RFC4648 sect 5) */
-const UrlSafeBase64Rx = /^[A-Z0-9_-]+$/i;
-
-/** A string regular expression matching an XML NCName
- *
- * - An NCName is an XML Name, but with no colons allowed.
- * - An XML Name is a complicated beast, but loosely a Unicode version of \w,
- *   or in other words, a unicode alphanumeric symbolic identifier.
- *   - It can contain digits, letters, hyphens, periods and underscores and
- *     certain unicode equivalents.
- *   - But it must not start with digits, a hyphen or a period, nor certain
- *     unicode equivalents.
- *
- * The regex for an XML Name is adapted from O'Reilly Regex Cookbook section 8.4,
- * "XML 1.0 names (approximate)" - but the colon is removed.
- *
- * Paraphrasing that book's explanation:
- * - the name start character can be a [:_] or
- * - any of the following Unicode categories:
- *   - Lowercase letter (Ll)
- *   - Uppercase letter (Lu)
- *   - Titlecase letter (Lt)
- *   - Letter without case (Lo)
- *   - Letter number (Nl)
- * - subsequent characters can also include [.-] or
- *   - Mark (M)
- *   - Modifier letter (Lm)
- *   - Decimal digit (Nd)
+/** Helper function to generate Zod refinements from a RegExp
  *
- * This definition is not compiled, as it is intended for composition below.
- * Therefore it is wrapped in a non-capturing group to isolate it without affecting
- * the captures which may be defined around it.
+ * It promotes the RegExp to be a unicode, entire-string match, if it is
+ * not already.
  *
- * This regex requires node 10+ to be able to use /u and \p.
+ * It also sets the validation error message attribute from the message parameter.
  *
+ * @returns a Zod validator generated by the Zod.string().refine() method
  */
-const NCName =
-  "(?:[_\\p{Ll}\\p{Lu}\\p{Lt}\\p{Lo}\\p{Nl}][_.\\p{L}\\p{M}\\p{Nd}\\p{Nl}-]*)";
-
-/** Match a QName
- *
- * Paraphrasing https://en.wikipedia.org/wiki/QName
- * - A QName is an NCName (see above)
- * - Or two of them delimited by a colon.
- *
- * Note: for our purposes, we *require* a colon delimiter - it can't just be a NCName.
- * This is because we need an abbreviation with which to look up the URL prefix.
- *
- * FIXME Perhaps we should also disallow common URI scheme prefixes.
- * FIXME Maybe we don't care about unicode?
- *
- * This regex requires node 10+ to be able to use /u and \p.
- *
- */
-const QNameRx = new RegExp(`^${NCName}[:]${NCName}$`, "gsu");
+function ZodRegex(rx: RegExp, message: string) {
+  return z.string().refine((v: any) => RxUtils.uaon(rx).test(String(v)), {
+    message,
+  });
+}
 
 const Location = z.array(z.number()).min(2).max(2);
-const DatasetId = z.string().regex(UrlSafeBase64Rx);
+const DatasetId = z.string().regex(Rx.UrlSafeBase64);
 const DatasetItemId = z.coerce.number().int().nonnegative();
 const DatasetItem = z.object({}).passthrough();
 const Dataset = z.array(Location);
-const QName = z.string().regex(QNameRx);
+const NCName = ZodRegex(Rx.NCName, "Invalid NCName format");
+const QName = ZodRegex(Rx.QName, "Invalid QName format");
+// Developer note: PrefixUri is regex based, as it attempts to avoid the .url() deficiencies in
+// https://github.com/colinhacks/zod/issues/2236. But also our concept of a URI is narrowed, see
+// documentation for Rx.PrefixUri.
+const PrefixUri = ZodRegex(Rx.PrefixUri, "Invalid prefix URI format");
+const PrefixIndex = z.record(PrefixUri, NCName);
+// Zod.enum needs some hand-holding to be happy with using object keys, as it wants a
+// guaranteed non-zero length list
+const [lang0, ...langs] = Object.keys(Iso639Set1Codes);
+const Iso639Set1Code = z.enum([lang0, ...langs]);
+const VocabDef = z.object({
+  title: z.string(),
+  terms: z.record(NCName, z.string()),
+});
+const I18nVocabDefs = z.record(Iso639Set1Code, VocabDef);
+const VocabIndex = z.record(NCName, I18nVocabDefs);
+const ConfigData = z.object({
+  prefixes: PrefixIndex,
+  vocabs: VocabIndex,
+});
 const VersionInfo = z.object({
   name: z.string(),
   buildTime: z.string().datetime({ offset: false }),
@@ -79,12 +61,19 @@ const ErrorInfo = z.object({ message: z.string() }).passthrough();
 
 export const schemas = {
   Location,
+  ConfigData,
   DatasetId,
   DatasetItemId,
   DatasetItem,
   Dataset,
+  I18nVocabDefs,
+  Iso639Set1Code,
+  NCName,
+  PrefixUri,
+  PrefixIndex,
   QName,
   VersionInfo,
+  VocabIndex,
   ErrorInfo,
 };
 
@@ -176,6 +165,32 @@ export const contract = c.router({
       }),
     },
   },
+  getConfig: {
+    method: "GET",
+    path: "/dataset/:datasetId/config",
+    summary: "obtain various configured parameters for a map",
+    description:
+      "Obtains configured parameters for a map, which amongst other things, " +
+      "include default values for various options, and definitions of " +
+      "vocabulary terms with their localised labels, that are used to " +
+      "interpret identifers in the data and/or elsewhere.",
+    pathParams: z.object({
+      datasetId: DatasetId.openapi({
+        // description: "uniquely specifies the dataset wanted",
+      }),
+    }),
+    responses: {
+      200: ConfigData.openapi({
+        // description: "variuos configured parameters for a map",
+      }),
+      400: ErrorInfo.openapi({
+        // description: "bad input parameter",
+      }),
+      404: ErrorInfo.openapi({
+        // description: "no such map",
+      }),
+    },
+  },
   getVersion: {
     method: "GET",
     path: "/version",