Skip to content

Commit

Permalink
Merge pull request #308 from microbiomedata/issue-258-add-external-se…
Browse files Browse the repository at this point in the history
…quencing-data-classes

Add classes for external sequencing data
  • Loading branch information
pkalita-lbl authored Feb 6, 2025
2 parents e5e1872 + a1a8ae5 commit bc77d3e
Show file tree
Hide file tree
Showing 20 changed files with 331 additions and 9 deletions.
20 changes: 20 additions & 0 deletions data_harmonizer/menu.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,26 @@
"name": "WaterInterface",
"status": "published",
"display": true
},
"MetagenomeSequencingNonInterleavedDataInterface": {
"name": "MetagenomeSequencingNonInterleavedDataInterface",
"status": "published",
"display": true
},
"MetagenomeSequencingInterleavedDataInterface": {
"name": "MetagenomeSequencingInterleavedDataInterface",
"status": "published",
"display": true
},
"MetatranscriptomeSequencingNonInterleavedDataInterface": {
"name": "MetatranscriptomeSequencingNonInterleavedDataInterface",
"status": "published",
"display": true
},
"MetatranscriptomeSequencingInterleavedDataInterface": {
"name": "MetatranscriptomeSequencingInterleavedDataInterface",
"status": "published",
"display": true
}
}
}
3 changes: 2 additions & 1 deletion project.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ test-deploy-docs-gh-action: clean schema-clean src/nmdc_submission_schema/schema
################################################

src/nmdc_submission_schema/schema/nmdc_submission_schema.yaml: local/with_modifications.yaml project/thirdparty/GoldEcosystemTree.json
$(RUN) inject-gold-pathway-terms -g $(word 2,$^) -i $< -o $@
$(RUN) inject-gold-pathway-terms -g $(word 2,$^) -i $< -o $<.tmp1
$(RUN) python src/nmdc_submission_schema/scripts/instrument_enums.py -i $<.tmp1 -o $@
#cp $< $@

# remove the multivalued true annotation from these gloabl slot definitions for the sake of linkml-convert
Expand Down
5 changes: 5 additions & 0 deletions sheets_and_friends/tsv_in/import_slots_regardless.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -552,3 +552,8 @@ Biosample local/nmdc.yaml window_vert_pos mixs_core_section 216 BuiltEnvInterfac
Biosample local/nmdc.yaml window_water_mold mixs_core_section 217 BuiltEnvInterface
Biosample local/nmdc.yaml xylene mixs_core_section 389 HcrCoresInterface|HcrFluidsSwabsInterface
Biosample local/nmdc.yaml zinc mixs_inspired_section 1004 SoilInterface
Instrument local/nmdc.yaml model sequencing_section 2 MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface
NucleotideSequencing local/nmdc.yaml processing_institution sequencing_section 3 MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface
NucleotideSequencing local/nmdc.yaml protocol_link sequencing_section 4 MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface
NucleotideSequencing local/nmdc.yaml insdc_bioproject_identifiers sequencing_section 5 MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface
NucleotideSequencing local/nmdc.yaml insdc_experiment_identifiers sequencing_section 6 MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface
24 changes: 20 additions & 4 deletions sheets_and_friends/tsv_in/modifications_long.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,11 @@ AirInterface|BiofilmInterface|BuiltEnvInterface|HcrCoresInterface|HcrFluidsSwabs
AirInterface|BiofilmInterface|BuiltEnvInterface|HcrCoresInterface|HcrFluidsSwabsInterface|HostAssociatedInterface|MiscEnvsInterface|PlantAssociatedInterface|SedimentInterface|SoilInterface|WastewaterSludgeInterface lat_lon replace_attribute required true
AirInterface|BiofilmInterface|BuiltEnvInterface|HcrCoresInterface|HcrFluidsSwabsInterface|HostAssociatedInterface|MiscEnvsInterface|PlantAssociatedInterface|SedimentInterface|SoilInterface|WastewaterSludgeInterface lat_lon replace_attribute string_serialization {lat lon}
PlantAssociatedInterface|SoilInterface light_regm replace_attribute recommended true
SedimentInterface|SoilInterface microbial_biomass_c remove_attribute string_serialization
SedimentInterface|SoilInterface microbial_biomass_c remove_attribute string_serialization
SedimentInterface|SoilInterface microbial_biomass_c replace_attribute pattern ^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? [^;|\t\r\n]+$
SedimentInterface|SoilInterface microbial_biomass_n remove_attribute string_serialization
SedimentInterface|SoilInterface microbial_biomass_n remove_attribute string_serialization
SedimentInterface|SoilInterface microbial_biomass_n replace_attribute pattern ^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? [^;|\t\r\n]+$
SoilInterface non_microb_biomass remove_attribute string_serialization
SoilInterface non_microb_biomass remove_attribute string_serialization
SoilInterface non_microb_biomass replace_attribute pattern ^([^;\t\r\x0A]+;[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? [^;\t\r\x0A]+\|)*([^;\t\r\x0A]+;[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? [^;\t\r\x0A]+)$
BuiltEnvInterface occup_samp replace_attribute range integer
AirInterface|BiofilmInterface|BuiltEnvInterface|HcrCoresInterface|HcrFluidsSwabsInterface|HostAssociatedInterface|MiscEnvsInterface|PlantAssociatedInterface|SedimentInterface|SoilInterface|WaterInterface|WastewaterSludgeInterface organism_count replace_attribute pattern ^(\S+.*\S+;[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? \S+.*\S+;(qPCR|ATP|MPN|other)\|)*(\S+.*\S+;[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? \S+.*\S+;(qPCR|ATP|MPN|other))$
Expand Down Expand Up @@ -251,7 +251,7 @@ EmslInterface sample_shipped replace_attribute recommended false
EmslInterface sample_type replace_attribute required true
EmslInterface sample_type replace_attribute recommended false
EmslInterface sample_type overwrite_examples examples soil - water extract
BuiltEnvInterface season remove_attribute string_serialization
BuiltEnvInterface season remove_attribute string_serialization
BuiltEnvInterface season replace_attribute range SeasonEnum
BuiltEnvInterface season overwrite_examples examples autumn
SoilInterface season_precpt overwrite_examples examples 0.4 inch|10.16 mm
Expand Down Expand Up @@ -317,3 +317,19 @@ WaterInterface env_local_scale replace_attribute any_of.0.range EnvLocalScaleW
WaterInterface env_local_scale replace_attribute any_of.1.range string
WaterInterface env_medium replace_attribute any_of.0.range EnvMediumWaterEnum
WaterInterface env_medium replace_attribute any_of.1.range string
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface model replace_attribute required true
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface model replace_attribute title instrument model
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface model replace_attribute description The model of the Illumina sequencing instrument used to generate the data.
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface model replace_attribute range IlluminaInstrumentModelEnum
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface processing_institution replace_attribute title processing institution
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface protocol_link replace_attribute title protocol
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface protocol_link replace_attribute description A URL to a description of the sequencing protocol used to generate the data.
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface protocol_link replace_attribute range string
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface insdc_bioproject_identifiers replace_attribute title INSDC bioproject identifier
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface insdc_bioproject_identifiers replace_attribute range string
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface insdc_bioproject_identifiers replace_attribute multivalued false
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface insdc_bioproject_identifiers overwrite_examples examples bioproject:PRJNA366857
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface insdc_experiment_identifiers replace_attribute title INSDC experiment identifiers
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface insdc_experiment_identifiers replace_attribute range string
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface insdc_experiment_identifiers replace_attribute multivalued false
MetagenomeSequencingNonInterleavedDataInterface|MetagenomeSequencingInterleavedDataInterface|MetatranscriptomeSequencingNonInterleavedDataInterface|MetatranscriptomeSequencingInterleavedDataInterface insdc_experiment_identifiers replace_attribute description If multiple identifiers are provided, separate them with a semicolon. The number of identifiers must match the number of sequencing files.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
metagenome_sequencing_interleaved_data:
- samp_name: sample name
analysis_type:
- metagenomics
model: solarix_7T
interleaved_url: https://example.com/data.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metagenome_sequencing_non_interleaved_data:
- samp_name: sample name
analysis_type:
- metagenomics
model: solarix_7T
read_1_url: https://example.com/read1.fastq
read_2_url: https://example.com/read2.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
metatranscriptome_sequencing_interleaved_data:
- samp_name: sample name
analysis_type:
- metatranscriptomics
model: solarix_7T
interleaved_url: https://example.com/data.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metatranscriptome_sequencing_non_interleaved_data:
- samp_name: sample name
analysis_type:
- metatranscriptomics
model: solarix_7T
read_1_url: https://example.com/read1.fastq
read_2_url: https://example.com/read2.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
metagenome_sequencing_interleaved_data:
- samp_name: sample name
analysis_type:
- metagenomics
model: novaseq_6000
processing_institution: UCSD
protocol_link: https://example.com/protocol.html
insdc_bioproject_identifiers: bioproject:PRJNA366857
insdc_experiment_identifiers: insdc.sra:ERX012345
interleaved_url: https://example.com/read1.fastq
interleaved_md5_checksum: 0123456789abcdef0123456789abcdef
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
metagenome_sequencing_interleaved_data:
- samp_name: sample name
analysis_type:
- metagenomics
model: novaseq_6000
interleaved_url: https://example.com/data.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
metagenome_sequencing_non_interleaved_data:
- samp_name: sample name
analysis_type:
- metagenomics
model: novaseq_6000
processing_institution: UCSD
protocol_link: https://example.com/protocol.html
insdc_bioproject_identifiers: bioproject:PRJNA366857
insdc_experiment_identifiers: insdc.sra:ERX012345
read_1_url: https://example.com/read1.fastq
read_1_md5_checksum: 0123456789abcdef0123456789abcdef
read_2_url: https://example.com/read2.fastq
read_2_md5_checksum: 0123456789abcdef0123456789abcdef

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metagenome_sequencing_non_interleaved_data:
- samp_name: sample name
analysis_type:
- metagenomics
model: novaseq_6000
read_1_url: https://example.com/read1.fastq
read_2_url: https://example.com/read2.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
metatranscriptome_sequencing_interleaved_data:
- samp_name: sample name
analysis_type:
- metatranscriptomics
model: novaseq_6000
processing_institution: UCSD
protocol_link: https://example.com/protocol.html
insdc_bioproject_identifiers: bioproject:PRJNA366857
insdc_experiment_identifiers: insdc.sra:ERX012345
interleaved_url: https://example.com/read1.fastq
interleaved_md5_checksum: 0123456789abcdef0123456789abcdef
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
metatranscriptome_sequencing_interleaved_data:
- samp_name: sample name
analysis_type:
- metatranscriptomics
model: novaseq_6000
interleaved_url: https://example.com/data.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
metatranscriptome_sequencing_non_interleaved_data:
- samp_name: sample name
analysis_type:
- metatranscriptomics
model: novaseq_6000
processing_institution: UCSD
protocol_link: https://example.com/protocol.html
insdc_bioproject_identifiers: bioproject:PRJNA366857
insdc_experiment_identifiers: insdc.sra:ERX012345
read_1_url: https://example.com/read1.fastq
read_1_md5_checksum: 0123456789abcdef0123456789abcdef
read_2_url: https://example.com/read2.fastq
read_2_md5_checksum: 0123456789abcdef0123456789abcdef

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metatranscriptome_sequencing_non_interleaved_data:
- samp_name: sample name
analysis_type:
- metatranscriptomics
model: novaseq_6000
read_1_url: https://example.com/read1.fastq
read_2_url: https://example.com/read2.fastq
Loading

0 comments on commit bc77d3e

Please sign in to comment.