Skip to content

Commit f378630

Browse files
authored
Merge pull request #23 from DigitalCommons/20_backend_class_structure
[CWM] Create back-end class structure
2 parents 06bcde9 + b25c7b7 commit f378630

File tree

8 files changed

+538
-8
lines changed

8 files changed

+538
-8
lines changed

.github/workflows/docs.yml

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
name: docs
55
on:
66
push:
7+
paths:
8+
- "docs/**"
79
branches:
810
- main
911
permissions:

apps/back-end/src/routes.ts

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { contract } from "@mykomap/common";
44
import { FastifyPluginOptions, FastifyReply, FastifyRequest } from "fastify";
55
import fs from "node:fs";
66
import path from "node:path";
7+
import { initDatasets } from "./services/datasetService.js";
78

89
/** Provides the shared configuration options for the Mykomap router implementation. */
910
export interface MykomapRouterConfig extends FastifyPluginOptions {
@@ -77,6 +78,9 @@ export function MykomapRouter(
7778
`'${opts.mykomap.dataRoot}'.`,
7879
);
7980

81+
// TODO: uncomment this when the test/data has been created with the updated structure
82+
// initDatasets(opts.mykomap.dataRoot);
83+
8084
// Concatenates the path components into an absolute file path
8185
const filePath = (...components: string[]): string => {
8286
const p = path.join(opts.mykomap.dataRoot ?? "", ...components) + ".json";

apps/back-end/src/services/Dataset.ts

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import fs from "node:fs";
2+
import path from "node:path";
3+
import { TsRestResponseError } from "@ts-rest/core";
4+
5+
import { contract } from "@mykomap/common";
6+
7+
export class Dataset {
8+
id: string;
9+
folderPath: string;
10+
searchable: ({ [prop: string]: string } & { searchString: string })[];
11+
12+
constructor(id: string, dataRoot: string) {
13+
this.id = id;
14+
this.folderPath = path.join(dataRoot, "datasets", id);
15+
this.searchable = JSON.parse(
16+
fs.readFileSync(path.join(this.folderPath, "searchable.json"), "utf8"),
17+
);
18+
}
19+
20+
getItem = (itemId: number) => {
21+
if (
22+
!fs.existsSync(
23+
path.join(this.folderPath, "initiatives", `${itemId}.json`),
24+
)
25+
) {
26+
throw new TsRestResponseError(contract.getDatasetItem, {
27+
status: 404,
28+
body: {
29+
message: `can't retrieve data for dataset ${this.id} item ${itemId}`,
30+
},
31+
});
32+
}
33+
34+
return JSON.parse(
35+
fs.readFileSync(
36+
path.join(this.folderPath, "initiatives", `${itemId}.json`),
37+
"utf8",
38+
),
39+
);
40+
};
41+
42+
getConfig = () => {
43+
// TODO: implementation
44+
return {};
45+
};
46+
47+
getLocations = (): fs.ReadStream =>
48+
fs.createReadStream(path.join(this.folderPath, "locations.json"), "utf8");
49+
50+
search = (filter?: string[], text?: string): number[] => {
51+
// TODO: implementation
52+
return [];
53+
};
54+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import fs from "node:fs";
2+
import path from "node:path";
3+
import { TsRestResponseError } from "@ts-rest/core";
4+
5+
import { contract } from "@mykomap/common";
6+
import { Dataset } from "./Dataset.js";
7+
8+
const datasets: { [id: string]: Dataset } = {};
9+
10+
/**
11+
* This method instantiates a Dataset object for each of the datasets in the dataRoot/datasets
12+
* directory in the filesystem.
13+
*/
14+
export const initDatasets = (dataRoot: string) => {
15+
const datasetIds = fs
16+
.readdirSync(path.join(dataRoot, "datasets"), { withFileTypes: true })
17+
.filter((f) => f.isDirectory())
18+
.map((f) => f.name);
19+
20+
console.log("Found datasets:", datasetIds);
21+
22+
for (const datasetId of datasetIds) {
23+
datasets[datasetId] = new Dataset(datasetId, dataRoot);
24+
}
25+
};
26+
27+
const getDatasetOrThrow404 = (datasetId: string): Dataset => {
28+
const dataset = datasets[datasetId];
29+
30+
if (!dataset)
31+
throw new TsRestResponseError(contract.searchDataset, {
32+
status: 404,
33+
body: { message: `dataset ${datasetId} doesn't exist` },
34+
});
35+
36+
return dataset;
37+
};
38+
39+
export const getDatasetItem = (datasetId: string, datasetItemId: number) => {
40+
const dataset = getDatasetOrThrow404(datasetId);
41+
return dataset.getItem(datasetItemId);
42+
};
43+
44+
export const getDatasetConfig = (datasetId: string) => {
45+
const dataset = getDatasetOrThrow404(datasetId);
46+
return dataset.getConfig();
47+
};
48+
49+
export const getDatasetLocations = (datasetId: string): fs.ReadStream => {
50+
const dataset = getDatasetOrThrow404(datasetId);
51+
return dataset.getLocations();
52+
};
53+
54+
export const searchDataset = (
55+
datasetId: string,
56+
filter?: string[],
57+
text?: string,
58+
): number[] => {
59+
const dataset = getDatasetOrThrow404(datasetId);
60+
return dataset.search(filter, text);
61+
};

docs/architecture.md

+111
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,114 @@
1818
subscribe to the Redux store without React, but this would be more complicated.
1919

2020
## Back-end architecture
21+
22+
![Diagram](images/architecture-back-end.drawio.svg)
23+
24+
### Dataset files
25+
26+
All persistent data is stored on the back-end server as JSON files, in the following folder structure
27+
as seen from the SERVER_DATA_ROOT location:
28+
29+
```
30+
├── datasets
31+
│ ├── some-dataset
32+
│ │ ├── locations.json (array of lng-lat coordinates for each initiative)
33+
│ │ ├── searchable.json (array of the property values and searchable strings for each initiative)
34+
│ │ ├── initiatives
35+
│ │ | ├── 0.json (full info of first initiative in the above aggregate JSONs)
36+
│ │ | ├── 1.json
37+
│ │ | ├── ...
38+
│ ├── other-dataset
39+
│ │ ├── ...
40+
│ ├── ...
41+
```
42+
43+
Additionally, for each dataset there's a `config.json`. This contains config for displaying the map
44+
in the UI, including the vocabs (translations of data IDs), default sidebar panel, and popup
45+
appearance. This config is not generated into the above folder structure, but kept in source control
46+
in the `@mykomap/config` library.
47+
48+
### Example file contents
49+
50+
`locations.json`:
51+
52+
```
53+
[ [1.21419, 50.45254], [0.21002, 49.33954], … ]
54+
```
55+
56+
`searchable.json`:
57+
58+
```
59+
{
60+
"fields": ["coun", "sz", "searchString"],
61+
"values": [
62+
["GB", "Small", "some co-op 2 green lane london n4 9qr"],
63+
["GB", "Large", "another co-op 15 brown street sheffield s7 0hg"],
64+
...
65+
]
66+
}
67+
```
68+
69+
#### Potential optimisation:
70+
71+
Since there will be one row per item, with 100k items, every 10 characters adds a new megabyte. The really bulky bit is the text searchString part, so maybe it could be kept in its own plain text file, with one line per item. Searching it could be done by streaming it from disk, which avoids loading the entire file permanently into memory (for each dataset).
72+
73+
For instance, this [SO thread](https://stackoverflow.com/questions/20187145/how-to-search-stream-for-string-in-node-js) has some sample stream-searching code, and a reference to a module which performs the streaming by what appears to be a fast non-buffering algorithm.
74+
75+
`0.json`:
76+
77+
```
78+
{ name: "Some Co-op", "desc": "A co-op that sells stuff", "lng": 1.21419, "lat": 50.45254, "coun": "GB", "sz": "Small", ... }
79+
```
80+
81+
`config.json`:
82+
83+
```
84+
{
85+
"prefixes": {
86+
"https://example.com/sizes/1.1/": "sz",
87+
...
88+
},
89+
"vocabs": {
90+
"sz": {
91+
"EN": {
92+
"title": "Sizes",
93+
"terms": {
94+
"large": "Large",
95+
"medium": "Medium",
96+
"small": "Small"
97+
}
98+
}
99+
},
100+
...
101+
},
102+
"popupFields": {
103+
"sz": "text",
104+
"websites": "clickable-list",
105+
...
106+
},
107+
"ui": { ... },
108+
...
109+
}
110+
111+
```
112+
113+
### Data generation
114+
115+
These directories of JSONs, including the searchable strings in the `searchable.json` files, need to be pre-generated by a script. This script will be written in JS/TS and live in the monorepo, to be run on the back-end server.
116+
117+
The script will take the full data CSV for a map (generated by the data factory) as inputs, and write the full data into the required JSON files in the directory structure specified above.
118+
119+
#### Note:
120+
121+
We will need to manually copy the `standard.csv` from the data factory server to the back-end. Maybe in the future, the data factory pipeline can be enhanced to write the JSON files to the back-end server so that no manual duplication is necessary (and maybe we can eventually get rid of the separate data server altogether). Or, the bacl-end server could be given a URL to the appropriate `standard.csv` file(s) as published by the data factory and download it from there as part of a `build-data` script (possibly when notified by a webhook, or possibly polling and checking the file modification date)
122+
123+
### Dataset instances
124+
125+
- For each dataset available in the `datasets` directory on server start, a dataset instance is created
126+
by the Dataset service. Each Dataset instance has a:
127+
- `searchable` property, which is just the `searchable.json` loaded as an in-memory object
128+
- `getItem` method
129+
- `getConfig` method, which includes the vocabs
130+
- `getLocations` method, which returns a stream of the data
131+
- `search` method, which involves iterating through `searchable` to find matching initiatives

0 commit comments

Comments
 (0)