Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ambiguity #114

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 3 additions & 13 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions migration/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,12 @@ reqwest = { version = "0.11", features = ["json", "rustls-tls"] }
tokio = { version = "1.5", features = ["full", "time"] }
futures-retry = "0.6"
anyhow = "*"
itertools = "*"
itertools = "0.10"
futures = "*"
dotenv = "0.15"
regex = "1.3"
rayon = "1.4"
chrono = "*"
rand = "*"
lazy_static = "*"
base64 = "*"
log = "0.4"
Expand Down
11 changes: 7 additions & 4 deletions migration/src/early_vocab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,12 @@ async fn parse_early_vocab(
english_gloss: vec![gloss],
line_break: None,
page_break: None,
position: dailp::PositionInDocument::new(
meta.id.clone(),
page_number,
index as i32 + 1,
position: dailp::PositionInDocument::IndependentPosition(
dailp::IndependentPosition::new(
meta.id.clone(),
page_number,
index as i32 + 1,
),
),
date_recorded: meta.date.clone(),
id,
Expand All @@ -187,6 +189,7 @@ async fn parse_early_vocab(
let doc = dailp::AnnotatedDoc {
meta,
segments: None,
characters: None,
};
crate::update_document(&[doc]).await?;

Expand Down
38 changes: 28 additions & 10 deletions migration/src/lexical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use crate::spreadsheets::{LexicalEntryWithForms, SheetResult};
use anyhow::Result;
use dailp::{
convert_udb, seg_verb_surface_forms, AnnotatedDoc, AnnotatedForm, Contributor, Date,
DocumentMetadata, LexicalConnection, MorphemeId, MorphemeSegment, PositionInDocument,
DocumentMetadata, IndependentPosition, LexicalConnection, MorphemeId, MorphemeSegment,
PositionInDocument,
};
use log::info;

Expand Down Expand Up @@ -100,6 +101,7 @@ pub async fn migrate_dictionaries() -> Result<()> {
is_reference: true,
},
segments: None,
characters: None,
},
AnnotatedDoc {
meta: DocumentMetadata {
Expand All @@ -115,6 +117,7 @@ pub async fn migrate_dictionaries() -> Result<()> {
is_reference: true,
},
segments: None,
characters: None,
},
];
crate::update_document(&docs).await?;
Expand Down Expand Up @@ -146,8 +149,11 @@ async fn parse_numerals(sheet_id: &str, doc_id: &str, year: i32) -> Result<()> {
let _numeric = values.next()?;
let simple_phonetics = values.next()?;
let syllabary = values.next()?;
let position =
PositionInDocument::new(dailp::DocumentId(doc_id.to_string()), page_num, key);
let position = PositionInDocument::IndependentPosition(IndependentPosition::new(
dailp::DocumentId(doc_id.to_string()),
page_num,
key,
));
let segments = vec![MorphemeSegment::new(root_dailp, gloss.clone(), None)];
Some(AnnotatedForm {
id: position.make_id(&gloss, true),
Expand Down Expand Up @@ -212,7 +218,11 @@ async fn parse_appendix(sheet_id: &str, to_skip: usize) -> Result<()> {
let mut values = row.into_iter();
let index = values.next()?.parse().unwrap_or(1);
let page_num = values.next()?;
let position = PositionInDocument::new(meta.id.clone(), page_num, index);
let position = PositionInDocument::IndependentPosition(IndependentPosition::new(
meta.id.clone(),
page_num,
index,
));
for _ in 0..to_skip {
values.next()?;
}
Expand Down Expand Up @@ -260,6 +270,7 @@ async fn parse_appendix(sheet_id: &str, to_skip: usize) -> Result<()> {
let doc = AnnotatedDoc {
meta,
segments: None,
characters: None,
};
let docs = vec![doc];
crate::update_document(&docs).await?;
Expand Down Expand Up @@ -296,8 +307,11 @@ fn parse_new_df1975(
let root_gloss = root_values.next().filter(|s| !s.is_empty())?;
let mut form_values = root_values.clone().skip(after_root + translations);
let date = Date::new(chrono::NaiveDate::from_ymd(year, 1, 1));
let pos =
PositionInDocument::new(doc_id.clone(), page_number, key.parse().unwrap_or(1));
let pos = PositionInDocument::IndependentPosition(IndependentPosition::new(
doc_id.clone(),
page_number,
key.parse().unwrap_or(1),
));
Some(LexicalEntryWithForms {
forms: seg_verb_surface_forms(
&pos,
Expand Down Expand Up @@ -349,8 +363,11 @@ async fn ingest_particle_index(document_id: &str) -> Result<()> {
let translation = row.next()?;
let source_str = row.next()?;
let source = MorphemeId::parse(&source_str)?;
let pos =
PositionInDocument::new(source.document_id.clone()?, source.gloss, index as i32);
let pos = PositionInDocument::IndependentPosition(IndependentPosition::new(
source.document_id.clone()?,
source.gloss,
index as i32,
));
Some(AnnotatedForm {
id: pos.make_raw_id(&translation, false),
simple_phonetics: Some(simple_phonetics),
Expand Down Expand Up @@ -388,7 +405,7 @@ async fn ingest_ac1995(sheet_id: &str) -> Result<()> {
let _romanized = row.next()?;
let normalized = row.next()?;
let translation = row.next()?;
let pos = PositionInDocument::new(meta.id.clone(), "1".to_owned(), index);
let pos = IndependentPosition::new(meta.id.clone(), "1".to_owned(), index);
Some(AnnotatedForm {
id: form_id,
simple_phonetics: Some(normalized),
Expand All @@ -401,7 +418,7 @@ async fn ingest_ac1995(sheet_id: &str) -> Result<()> {
segments: None,
date_recorded: meta.date.clone(),
source: syllabary,
position: pos,
position: PositionInDocument::IndependentPosition(pos),
})
})
.collect::<Vec<_>>();
Expand All @@ -410,6 +427,7 @@ async fn ingest_ac1995(sheet_id: &str) -> Result<()> {
crate::update_document(&[AnnotatedDoc {
meta,
segments: None,
characters: None,
}])
.await?;

Expand Down
3 changes: 2 additions & 1 deletion migration/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,10 @@ async fn fetch_sheet(
all_lines.append(&mut lines);
tokio::time::sleep(Duration::from_millis(1700)).await;
}
let characters = AnnotatedLine::chars_from_semantic(&all_lines);
let annotated = AnnotatedLine::many_from_semantic(&all_lines, &meta);
let segments = AnnotatedLine::lines_into_segments(annotated, &meta.id, &meta.date);
let doc = dailp::AnnotatedDoc::new(meta, segments);
let doc = dailp::AnnotatedDoc::new(meta, segments, characters);

Ok(Some((doc, refs)))
} else {
Expand Down
Loading