From a296945ff9c8add5262825d7492c8f6b926eeb91 Mon Sep 17 00:00:00 2001 From: Bryan Kok Date: Wed, 24 Feb 2021 19:43:48 +0800 Subject: [PATCH] Add Babelstone Han PUA to the ETL process, add new CharacterVariant --- etl/constants.ts | 5 +++++ etl/genBaseForwardReverse.ts | 8 ++++++++ etl/genReadings.ts | 17 +++++++++++++++++ etl/genVariants.ts | 16 ++++++++++++++++ src/types/common.ts | 2 ++ 5 files changed, 48 insertions(+) diff --git a/etl/constants.ts b/etl/constants.ts index d6d4882..2011253 100644 --- a/etl/constants.ts +++ b/etl/constants.ts @@ -33,3 +33,8 @@ export const CJKVI_VARIANTS_SUBDIR_NAME = "cjkvi-variants"; export const MANUAL_CJKVI_VARIANTS_SUBDIR_NAME = "manual-cjkvi-variants"; export const CJKVI_TABLES_SUBDIR_NAME = "cjkvi-tables"; + +export const BABELSTONE_PUA_JSON_NAME = "pua-latest.json"; +export const BABELSTONE_PUA_JSON_URL = + "https://babelstone-pua.transfusion.eu.org/pua-latest.json"; +export const BABELSTONE_SUBDIR_NAME = "babel"; diff --git a/etl/genBaseForwardReverse.ts b/etl/genBaseForwardReverse.ts index 8812e20..1d2a11e 100644 --- a/etl/genBaseForwardReverse.ts +++ b/etl/genBaseForwardReverse.ts @@ -24,6 +24,7 @@ import { writeJSON, // writeData, writeReverseMapProtobuf } from "./writer"; +import { getPUAData } from "./babelstone-pua-fetcher"; // import protobuf from "protobufjs"; // import { getRawIRGSources } from "./unihan-fetcher"; @@ -364,6 +365,13 @@ const main = async () => { resolvedIDSData = resolvedIDSData.concat(getAllResolvedIDSData(sourceFile)); } + const babelstonePUAData = await getPUAData(); + + for (let pua of babelstonePUAData.data) { + const entry = [pua.cp, pua.char, pua.ids]; + resolvedIDSData.push(entry); + } + const { baseRadicals, forwardMap, diff --git a/etl/genReadings.ts b/etl/genReadings.ts index 0f4ac4a..1708606 100644 --- a/etl/genReadings.ts +++ b/etl/genReadings.ts @@ -2,6 +2,7 @@ import { getRawReadings } from "./unihan-fetcher"; import { writeJSON } from "./writer"; import { JSON_FILE_NAMES } from "../src/constants"; +import { getPUAData } from "./babelstone-pua-fetcher"; const main = async () => { const RawReadingsString = (await getRawReadings()).split("\n"); @@ -19,6 +20,22 @@ const main = async () => { map[char][field] = entry[2]; } + const babelstonePUAData = await getPUAData(); + for (let { char, note, src, src_refs, enc_stat } of babelstonePUAData.data) { + map[char] = { + note, + src, + src_refs, + enc_stat, + }; + + for (let key of Object.keys(map[char])) { + if (map[char][key].length === 0) { + delete map[char][key]; + } + } + } + // write to file writeJSON(JSON.stringify(map), JSON_FILE_NAMES.readings); }; diff --git a/etl/genVariants.ts b/etl/genVariants.ts index c0c999d..b06e4bc 100644 --- a/etl/genVariants.ts +++ b/etl/genVariants.ts @@ -18,6 +18,7 @@ import { } from "./variants-fetcher"; import { Hanja as everydayHanja1800 } from "./hanja-for-everyday-use-1800.json"; +import { getPUAData } from "./babelstone-pua-fetcher"; const IVS = require("ivs"); const utfstring = require("utfstring"); @@ -221,6 +222,20 @@ const addRadicals = (map: VariantsSet) => { } }; +const addBabelstoneHanPUA = async (map: VariantsSet) => { + const createMapEntry = (char: string) => { + if (!(char in map)) { + map[char] = new Set(); + } + }; + + const babelstonePUAData = await getPUAData(); + for (let pua of babelstonePUAData.data) { + createMapEntry(pua.char); + map[pua.char].add(CharacterVariant.unicode_pua); + } +}; + export const generateVariantIslands = (inputMap: { [key: string]: Set; }) => { @@ -400,6 +415,7 @@ const main = async () => { addChinese(map); // jp-old-style.txt makes use of IVS characters which I strip await addJapaneseShinKyuPromise(map); + await addBabelstoneHanPUA(map); const mapArr = {} as { [key: string]: number[] }; for (let char in map) { diff --git a/src/types/common.ts b/src/types/common.ts index cb14e7a..eec03c9 100644 --- a/src/types/common.ts +++ b/src/types/common.ts @@ -15,6 +15,8 @@ export enum CharacterVariant { sawndip_simplified = 12, // TODO sawndip = 13, radical = 14, + + unicode_pua = 15, } // nothing to do with any existing standards, self-defined