Skip to content

Commit

Permalink
use raw jlpt data
Browse files Browse the repository at this point in the history
  • Loading branch information
CaptainDario committed Aug 7, 2023
1 parent d0f5374 commit 4482faf
Showing 1 changed file with 37 additions and 41 deletions.
78 changes: 37 additions & 41 deletions database_builder/lib/src/jlpt/jlpt.dart
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import 'dart:io';
import 'package:kana_kit/kana_kit.dart';
import 'package:path/path.dart' as p;

import 'package:csv/csv.dart';
import 'package:database_builder/database_builder.dart';
import 'package:isar/isar.dart';

Expand All @@ -13,58 +14,53 @@ Future<void> addJLPTToDict(Isar dictIsar) async {

List<List> jlptData = [];

for (int i = 1; i < 5; i++){
for (int i = 1; i < 6; i++){
String jlptFile = File(
p.join(RepoPathManager.getInputFilesPath(), "jlpt", "term_meta_bank_$i.json")
p.join(RepoPathManager.getInputFilesPath(), "jlpt", "n$i.csv")
).readAsStringSync();

// read json file
List json = jsonDecode(jlptFile);
KanaKit k = KanaKit(config:
KanaKitConfig(passRomaji: true, passKanji: true, upcaseKatakana: false)
);
for (var entry in json){
//
List<dynamic> jlptRows =
const CsvToListConverter(
fieldDelimiter: "\t",
eol: "\n",
).convert(jlptFile);
List<List<String>> jlptRowsSplit = jlptRows
.map((e) => (e[0] as String).split(",")).toList();


bool containsKanji = entry[2].containsKey("reading");
for (List row in jlptRowsSplit) {
if(row[0] == "jmdict_seq" || row[0] == "" || row[row.length-2] == "jmdict"){
continue;
}

jlptData.add([
containsKanji ? (entry[0]).toString().toHalfWidth() : null, // kanji if available
k.toHiragana(containsKanji ? entry[2]["reading"] : entry[0]), // hiragana
(containsKanji
? entry[2]["frequency"]["displayValue"]
: entry[2]["displayValue"]
).toString() // JLPT
]);
}
}

//add jlpt data to the matching ISAR entry
int i = 0;
for (List row in jlptData) {
List<JMdict> entries = dictIsar.jmdict.where()
.idEqualTo(int.parse(row[0]))
.findAllSync();

if(entries.length != 1){
entries = dictIsar.jmdict.filter()
.kanjiIndexesElementEqualTo(row[1])
.and()
.group((q) =>
q.hiraganasElementEqualTo(row[2])
.or()
.readingsElementEqualTo(row[2])
)
.findAllSync();
}

List<JMdict> matches = dictIsar.jmdict.where()
.optional(row[0] != null, (q) =>
q.kanjiIndexesElementEqualTo(row[0])
)
.filter()
.hiraganasElementEqualTo(row[1])
.findAllSync();
JMdict entry = entries.first;

for (JMdict match in matches) {
entry.jlptLevel ??= [];
entry.jlptLevel = List.from(entry.jlptLevel!)..add("N$i (${row[1] != '' ? row[1] : row[2]})");

List<String>? levels = List.from(match.jlptLevel ?? []);
levels.add("${row[2]}");
match.jlptLevel = levels.toSet().toList();

dictIsar.writeTxnSync(() {
dictIsar.jmdict.putSync(match);
dictIsar.jmdict.putSync(entry);
});
}
if(matches.isEmpty){
print("No JMdict entry found for JLPT $i/${jlptData.length}: $row");
}
i++;
}

print("Added JLPT N$i word info");
}

}

0 comments on commit 4482faf

Please sign in to comment.