-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathterms.yaml
10458 lines (10458 loc) · 322 KB
/
terms.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
id: http://w3id.org/mixs/terms
name: terms
imports:
- linkml:types
- ranges
prefixes:
linkml: https://w3id.org/linkml/
mixs.vocab: https://w3id.org/mixs/vocab/
MIXS: https://w3id.org/mixs/terms/
default_prefix: mixs.vocab
slots:
core field:
abstract: true
description: basic fields
investigation field:
abstract: true
description: field describing aspect of the investigation/study to which the sample
belongs
nucleic acid sequence source field:
abstract: true
sequencing field:
abstract: true
mixs extension field:
abstract: true
environment field:
abstract: true
description: field describing environmental aspect of a sample
submitted_to_insdc:
is_a: investigation field
title: submitted to insdc
description: Depending on the study (large-scale e.g. done with next generation
sequencing technology, or small-scale) sequences have to be submitted to SRA
(Sequence Read Archive), DRA (DDBJ Read Archive) or via the classical Webin/Sequin
systems to Genbank, ENA and DDBJ. Although this field is mandatory, it is meant
as a self-test field, therefore it is not necessary to include this field in
contextual data submitted to databases
range: string
multivalued: false
examples:
- value: 'yes'
comments:
- 'Expected value: boolean'
aliases:
- submitted to insdc
deprecated: Deprecated in mixs6
string_serialization: '{boolean}'
slot_uri: MIXS:0000004
investigation_type:
is_a: investigation field
title: investigation type
description: Nucleic Acid Sequence Report is the root element of all MIGS/MIMS
compliant reports as standardized by Genomic Standards Consortium. This field
is either eukaryote,bacteria,virus,plasmid,organelle, metagenome,mimarks-survey,
mimarks-specimen, metatranscriptome, single amplified genome, metagenome-assembled
genome, or uncultivated viral genome
range: investigation_type_enum
multivalued: false
examples:
- value: metagenome
comments:
- 'Expected value: eukaryote, bacteria_archaea, plasmid, virus, organelle, metagenome,mimarks-survey,
mimarks-specimen, metatranscriptome, single amplified genome, metagenome-assembled
genome, or uncultivated viral genomes'
aliases:
- investigation type
deprecated: Deprecated in mixs6
string_serialization: '[eukaryote|bacteria_archaea|plasmid|virus|organelle|metagenome|metatranscriptome|mimarks-survey|mimarks-specimen|misag|mimag|miuvig]'
slot_uri: MIXS:0000007
samp_name:
is_a: investigation field
title: sample name
description: A local identifier or name that for the material sample used for
extracting nucleic acids, and subsequent sequencing. It can refer either to
the original material collected or to any derived sub-samples. It can have any
format, but we suggest that you make it concise, unique and consistent within
your lab, and as informative as possible. INSDC requires every sample name from
a single Submitter to be unique. Use of a globally unique identifier for the
field source_mat_id is recommended in addition to sample_name.
range: string
multivalued: false
examples:
- value: ISDsoil1
comments:
- 'Expected value: text'
aliases:
- sample name
string_serialization: '{text}'
slot_uri: MIXS:0001107
samp_taxon_id:
is_a: investigation field
title: Taxonomy ID of DNA sample
description: "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa\
\ sample. Use 'synthetic metagenome\u2019 for mock community/positive controls,\
\ or 'blank sample' for negative controls."
range: string
multivalued: false
examples:
- value: Gut Metagenome [NCBI:txid749906]
comments:
- 'Expected value: Taxonomy ID'
aliases:
- Taxonomy ID of DNA sample
string_serialization: '{text} [NCBI:txid]'
slot_uri: MIXS:0001320
project_name:
is_a: investigation field
title: project name
description: Name of the project within which the sequencing was organized
range: string
multivalued: false
examples:
- value: Forest soil metagenome
comments: []
aliases:
- project name
string_serialization: '{text}'
slot_uri: MIXS:0000092
experimental_factor:
is_a: investigation field
title: experimental factor
description: Experimental factors are essentially the variable aspects of an experiment
design which can be used to describe an experiment, or set of experiments, in
an increasingly detailed manner. This field accepts ontology terms from Experimental
Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI). For
a browser of EFO (v 2.95) terms, please see http://purl.bioontology.org/ontology/EFO;
for a browser of OBI (v 2018-02-12) terms please see http://purl.bioontology.org/ontology/OBI
range: string
multivalued: false
examples:
- value: time series design [EFO:EFO_0001779]
comments:
- 'Expected value: text or EFO and/or OBI'
aliases:
- experimental factor
string_serialization: '{termLabel} {[termID]}|{text}'
slot_uri: MIXS:0000008
lat_lon:
is_a: environment field
title: geographic location (latitude and longitude)
description: The geographical origin of the sample as defined by latitude and
longitude. The values should be reported in decimal degrees and in WGS84 system
range: string
multivalued: false
examples:
- value: 50.586825 6.408977
comments:
- 'Expected value: decimal degrees, limit to 8 decimal points'
aliases:
- geographic location (latitude and longitude)
string_serialization: '{float} {float}'
slot_uri: MIXS:0000009
depth:
is_a: environment field
title: depth
description: The vertical distance below local surface, e.g. for sediment or soil
samples depth is measured from sediment or soil surface, respectively. Depth
can be reported as an interval for subsurface samples.
range: quantity value
multivalued: false
examples:
- value: 10 meter
comments:
- 'Expected value: measurement value'
aliases:
- depth
slot_uri: MIXS:0000018
alt:
is_a: environment field
title: altitude
description: Altitude is a term used to identify heights of objects such as airplanes,
space shuttles, rockets, atmospheric balloons and heights of places such as
atmospheric layers and clouds. It is used to measure the height of an object
which is above the earth's surface. In this context, the altitude measurement
is the vertical distance between the earth's surface above sea level and the
sampled position in the air
range: quantity value
multivalued: false
examples:
- value: 100 meter
comments:
- 'Expected value: measurement value'
aliases:
- altitude
slot_uri: MIXS:0000094
elev:
is_a: environment field
title: elevation
description: Elevation of the sampling site is its height above a fixed reference
point, most commonly the mean sea level. Elevation is mainly used when referring
to points on the earth's surface, while altitude is used for points above the
surface, such as an aircraft in flight or a spacecraft in orbit.
range: quantity value
multivalued: false
examples:
- value: 100 meter
comments:
- 'Expected value: measurement value'
aliases:
- elevation
slot_uri: MIXS:0000093
temp:
is_a: environment field
title: temperature
description: Temperature of the sample at the time of sampling.
range: quantity value
multivalued: false
examples:
- value: 25 degree Celsius
comments:
- 'Expected value: measurement value'
- 'Preferred unit: degree Celsius'
aliases:
- temperature
slot_uri: MIXS:0000113
geo_loc_name:
is_a: environment field
title: geographic location (country and/or sea,region)
description: The geographical origin of the sample as defined by the country or
sea name followed by specific region name. Country or sea names should be chosen
from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology
(http://purl.bioontology.org/ontology/GAZ)
range: string
multivalued: false
examples:
- value: 'USA: Maryland, Bethesda'
comments:
- 'Expected value: country or sea name (INSDC or GAZ): region(GAZ), specific location
name'
aliases:
- geographic location (country and/or sea,region)
string_serialization: '{term}: {term}, {text}'
slot_uri: MIXS:0000010
collection_date:
is_a: environment field
title: collection date
description: 'The time of sampling, either as an instance (single point in time)
or interval. In case no exact time is available, the date/time can be right
truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10;
2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant'
range: date
multivalued: false
examples:
- value: 2018-05-11T10:00:00+01:00; 2018-05-11
comments:
- 'Expected value: date and time'
aliases:
- collection date
slot_uri: MIXS:0000011
neg_cont_type:
is_a: investigation field
title: negative control type
description: The substance or equipment used as a negative control in an investigation
range: neg_cont_type_enum
multivalued: false
examples:
- value: ''
comments:
- 'Expected value: enumeration or text'
aliases:
- negative control type
string_serialization: '[distilled water|phosphate buffer|empty collection device|empty
collection tube|DNA-free PCR mix|sterile swab |sterile syringe]'
slot_uri: MIXS:0001321
pos_cont_type:
is_a: investigation field
title: positive control type
description: The substance, mixture, product, or apparatus used to verify that
a process which is part of an investigation delivers a true positive.
range: string
multivalued: false
examples:
- value: ''
comments: []
aliases:
- positive control type
string_serialization: '{term} or {text}'
slot_uri: MIXS:0001322
env_broad_scale:
is_a: environment field
title: broad-scale environmental context
description: "Report the major environmental system the sample or specimen came\
\ from. The system(s) identified should have a coarse spatial grain, to provide\
\ the general environmental context of where the sampling was done (e.g. in\
\ the desert or a rainforest). We recommend using subclasses of EnvO\u2019s\
\ biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation\
\ about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS"
range: string
multivalued: false
examples:
- value: oceanic epipelagic zone biome [ENVO:01000033] for annotating a water
sample from the photic zone in middle of the Atlantic Ocean
comments:
- 'Expected value: The major environment type(s) where the sample was collected.
Recommend subclasses of biome [ENVO:00000428]. Multiple terms can be separated
by one or more pipes.'
aliases:
- broad-scale environmental context
string_serialization: '{termLabel} {[termID]}'
slot_uri: MIXS:0000012
env_local_scale:
is_a: environment field
title: local environmental context
description: "Report the entity or entities which are in the sample or specimen\u2019\
s local vicinity and which you believe have significant causal influences on\
\ your sample or specimen. We recommend using EnvO terms which are of smaller\
\ spatial grain than your entry for env_broad_scale. Terms, such as anatomical\
\ sites, from other OBO Library ontologies which interoperate with EnvO (e.g.\
\ UBERON) are accepted in this field. EnvO documentation about how to use the\
\ field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS."
range: string
multivalued: false
examples:
- value: 'litter layer [ENVO:01000338]; Annotating a pooled sample taken from
various vegetation layers in a forest consider: canopy [ENVO:00000047]|herb
and fern layer [ENVO:01000337]|litter layer [ENVO:01000338]|understory [01000335]|shrub
layer [ENVO:01000336].'
comments:
- 'Expected value: Environmental entities having causal influences upon the entity
at time of sampling.'
aliases:
- local environmental context
string_serialization: '{termLabel} {[termID]}'
slot_uri: MIXS:0000013
env_medium:
is_a: environment field
title: environmental medium
description: 'Report the environmental material(s) immediately surrounding the
sample or specimen at the time of sampling. We recommend using subclasses of
''environmental material'' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO
documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS
. Terms from other OBO ontologies are permissible as long as they reference
mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities
(e.g. a tree, a leaf, a table top).'
range: string
multivalued: false
examples:
- value: 'soil [ENVO:00001998]; Annotating a fish swimming in the upper 100 m
of the Atlantic Ocean, consider: ocean water [ENVO:00002151]. Example: Annotating
a duck on a pond consider: pond water [ENVO:00002228]|air [ENVO_00002005]'
comments:
- 'Expected value: The material displaced by the entity at time of sampling. Recommend
subclasses of environmental material [ENVO:00010483].'
aliases:
- environmental medium
string_serialization: '{termLabel} {[termID]}'
slot_uri: MIXS:0000014
env_package:
is_a: mixs extension field
title: environmental package
description: MIxS extension for reporting of measurements and observations obtained
from one or more of the environments where the sample was obtained. All environmental
packages listed here are further defined in separate subtables. By giving the
name of the environmental package, a selection of fields can be made from the
subtables and can be reported
range: env_package_enum
multivalued: false
examples:
- value: soil
comments:
- 'Expected value: enumeration'
aliases:
- environmental package
deprecated: Deprecated in mixs6
string_serialization: '[air|built environment|host-associated|human-associated|human-skin|human-oral|human-gut|human-vaginal|hydrocarbon
resources-cores|hydrocarbon resources-fluids/swabs|microbial mat/biofilm|misc
environment|plant-associated|sediment|soil|wastewater/sludge|water]'
slot_uri: MIXS:0000019
subspecf_gen_lin:
is_a: nucleic acid sequence source field
title: subspecific genetic lineage
description: Information about the genetic distinctness of the sequenced organism
below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any
relevant genetic typing schemes like Group I plasmid. Subspecies should not
be recorded in this term, but in the NCBI taxonomy. Supply both the lineage
name and the lineage rank separated by a colon, e.g., biovar:abc123.
range: string
multivalued: false
examples:
- value: serovar:Newport
comments:
- 'Expected value: Genetic lineage below lowest rank of NCBI taxonomy, which is
subspecies, e.g. serovar, biotype, ecotype.'
aliases:
- subspecific genetic lineage
string_serialization: '{rank name}:{text}'
slot_uri: MIXS:0000020
ploidy:
is_a: nucleic acid sequence source field
title: ploidy
description: The ploidy level of the genome (e.g. allopolyploid, haploid, diploid,
triploid, tetraploid). It has implications for the downstream study of duplicated
gene and regions of the genomes (and perhaps for difficulties in assembly).
For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic
Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer
to http://purl.bioontology.org/ontology/PATO
range: string
multivalued: false
examples:
- value: allopolyploidy [PATO:0001379]
comments:
- 'Expected value: PATO'
aliases:
- ploidy
string_serialization: '{termLabel} {[termID]}'
slot_uri: MIXS:0000021
num_replicons:
is_a: nucleic acid sequence source field
title: number of replicons
description: Reports the number of replicons in a nuclear genome of eukaryotes,
in the genome of a bacterium or archaea or the number of segments in a segmented
virus. Always applied to the haploid chromosome count of a eukaryote
range: integer
multivalued: false
examples:
- value: '2'
comments:
- 'Expected value: for eukaryotes and bacteria: chromosomes (haploid count); for
viruses: segments'
aliases:
- number of replicons
slot_uri: MIXS:0000022
extrachrom_elements:
is_a: nucleic acid sequence source field
title: extrachromosomal elements
description: Do plasmids exist of significant phenotypic consequence (e.g. ones
that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids
(borrelia has 15+ plasmids)
range: integer
multivalued: false
examples:
- value: '5'
comments:
- 'Expected value: number of extrachromosmal elements'
aliases:
- extrachromosomal elements
slot_uri: MIXS:0000023
estimated_size:
is_a: nucleic acid sequence source field
title: estimated size
description: The estimated size of the genome prior to sequencing. Of particular
importance in the sequencing of (eukaryotic) genome which could remain in draft
form for a long or unspecified period.
range: string
multivalued: false
examples:
- value: 300000 bp
comments:
- 'Expected value: number of base pairs'
aliases:
- estimated size
string_serialization: '{integer} bp'
slot_uri: MIXS:0000024
ref_biomaterial:
is_a: nucleic acid sequence source field
title: reference for biomaterial
description: Primary publication if isolated before genome publication; otherwise,
primary genome report.
range: string
multivalued: false
examples:
- value: doi:10.1016/j.syapm.2018.01.009
comments:
- 'Expected value: PMID, DOI or URL'
aliases:
- reference for biomaterial
string_serialization: '{PMID}|{DOI}|{URL}'
slot_uri: MIXS:0000025
source_mat_id:
is_a: nucleic acid sequence source field
title: source material identifiers
description: A unique identifier assigned to a material sample (as defined by
http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular
digital record of a material sample) used for extracting nucleic acids, and
subsequent sequencing. The identifier can refer either to the original material
collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher,
/bio_material, or /culture_collection may or may not share the same value as
the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id
may both contain 'UAM:Herps:14' , referring to both the specimen voucher and
sampled tissue with the same identifier. However, the /culture_collection qualifier
may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id
would refer to an identifier from some derived culture from which the nucleic
acids were extracted (e.g. xatc123 or ark:/2154/R2).
range: string
multivalued: false
examples:
- value: MPI012345
comments:
- 'Expected value: for cultures of microorganisms: identifiers for two culture
collections; for other material a unique arbitrary identifer'
aliases:
- source material identifiers
string_serialization: '{text}'
slot_uri: MIXS:0000026
pathogenicity:
is_a: nucleic acid sequence source field
title: known pathogenicity
description: To what is the entity pathogenic
range: string
multivalued: false
examples:
- value: human, animal, plant, fungi, bacteria
comments:
- 'Expected value: names of organisms that the entity is pathogenic to'
aliases:
- known pathogenicity
string_serialization: '{text}'
slot_uri: MIXS:0000027
biotic_relationship:
is_a: nucleic acid sequence source field
title: observed biotic relationship
description: Description of relationship(s) between the subject organism and other
organism(s) it is associated with. E.g., parasite on species X; mutualist with
species Y. The target organism is the subject of the relationship, and the other
organism(s) is the object
range: biotic_relationship_enum
multivalued: false
examples:
- value: free living
comments:
- 'Expected value: enumeration'
aliases:
- observed biotic relationship
string_serialization: '[free living|parasitism|commensalism|symbiotic|mutualism]'
slot_uri: MIXS:0000028
specific_host:
is_a: nucleic acid sequence source field
title: host scientific name
description: Report the host's taxonomic name and/or NCBI taxonomy ID.
range: string
multivalued: false
examples:
- value: Homo sapiens and/or 9606
comments:
- 'Expected value: host scientific name, taxonomy ID'
aliases:
- host scientific name
string_serialization: '{text}|{NCBI taxid}'
slot_uri: MIXS:0000029
host_spec_range:
is_a: nucleic acid sequence source field
title: host specificity or range
description: The range and diversity of host species that an organism is capable
of infecting, defined by NCBI taxonomy identifier.
range: integer
multivalued: false
examples:
- value: '9606'
comments:
- 'Expected value: NCBI taxid'
aliases:
- host specificity or range
slot_uri: MIXS:0000030
health_disease_stat:
is_a: nucleic acid sequence source field
title: health or disease status of specific host at time of collection
description: Health or disease status of specific host at time of collection
range: health_disease_stat_enum
multivalued: false
examples:
- value: dead
comments:
- 'Expected value: enumeration'
aliases:
- health or disease status of specific host at time of collection
deprecated: Deprecated in mixs6
string_serialization: '[healthy|diseased|dead|disease-free|undetermined|recovering|resolving|pre-existing
condition|pathological|life threatening|congenital]'
slot_uri: MIXS:0000031
host_disease_stat:
is_a: nucleic acid sequence source field
title: host disease status
description: List of diseases with which the host has been diagnosed; can include
multiple diagnoses. The value of the field depends on host; for humans the terms
should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org,
non-human host diseases are free text
range: string
multivalued: false
examples:
- value: rabies [DOID:11260]
comments:
- 'Expected value: disease name or Disease Ontology term'
aliases:
- host disease status
string_serialization: '{termLabel} {[termID]}|{text}'
slot_uri: MIXS:0000031
trophic_level:
is_a: nucleic acid sequence source field
title: trophic level
description: Trophic levels are the feeding position in a food chain. Microbes
can be a range of producers (e.g. chemolithotroph)
range: trophic_level_enum
multivalued: false
examples:
- value: heterotroph
comments:
- 'Expected value: enumeration'
aliases:
- trophic level
string_serialization: '[autotroph|carboxydotroph|chemoautotroph|chemoheterotroph|chemolithoautotroph|chemolithotroph|chemoorganoheterotroph|chemoorganotroph|chemosynthetic|chemotroph|copiotroph|diazotroph|facultative|autotroph|heterotroph|lithoautotroph|lithoheterotroph|lithotroph|methanotroph|methylotroph|mixotroph|obligate|chemoautolithotroph|oligotroph|organoheterotroph|organotroph|photoautotroph|photoheterotroph|photolithoautotroph|photolithotroph|photosynthetic|phototroph]'
slot_uri: MIXS:0000032
propagation:
is_a: nucleic acid sequence source field
title: propagation
description: 'The type of reproduction from the parent stock. Values for this
field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately
lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual.'
range: string
multivalued: false
examples:
- value: lytic
comments:
- 'Expected value: for virus: lytic, lysogenic, temperate, obligately lytic; for
plasmid: incompatibility group; for eukaryote: asexual, sexual; other more specific
values (e.g., incompatibility group) are allowed'
aliases:
- propagation
string_serialization: '{text}'
slot_uri: MIXS:0000033
encoded_traits:
is_a: nucleic acid sequence source field
title: encoded traits
description: Should include key traits like antibiotic resistance or xenobiotic
degradation phenotypes for plasmids, converting genes for phage
range: string
multivalued: false
examples:
- value: beta-lactamase class A
comments:
- 'Expected value: for plasmid: antibiotic resistance; for phage: converting genes'
aliases:
- encoded traits
string_serialization: '{text}'
slot_uri: MIXS:0000034
rel_to_oxygen:
is_a: nucleic acid sequence source field
title: relationship to oxygen
description: Is this organism an aerobe, anaerobe? Please note that aerobic and
anaerobic are valid descriptors for microbial environments
range: rel_to_oxygen_enum
multivalued: false
examples:
- value: aerobe
comments:
- 'Expected value: enumeration'
aliases:
- relationship to oxygen
string_serialization: '[aerobe|anaerobe|facultative|microaerophilic|microanaerobe|obligate
aerobe|obligate anaerobe]'
slot_uri: MIXS:0000015
isol_growth_condt:
is_a: nucleic acid sequence source field
title: isolation and growth condition
description: Publication reference in the form of pubmed ID (pmid), digital object
identifier (doi) or url for isolation and growth condition specifications of
the organism/material
range: string
multivalued: false
examples:
- value: 'doi: 10.1016/j.syapm.2018.01.009'
comments:
- 'Expected value: PMID,DOI or URL'
aliases:
- isolation and growth condition
string_serialization: '{PMID}|{DOI}|{URL}'
slot_uri: MIXS:0000003
samp_collec_device:
is_a: nucleic acid sequence source field
title: sample collection device
description: The device used to collect an environmental sample. This field accepts
terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO).
This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094).
range: string
multivalued: false
examples:
- value: swab, biopsy, niskin bottle, push core, drag swab [GENEPIO:0002713]
comments:
- 'Expected value: device name'
aliases:
- sample collection device
string_serialization: '{termLabel} {[termID]}|{text}'
slot_uri: MIXS:0000002
samp_collec_method:
is_a: nucleic acid sequence source field
title: sample collection method
description: The method employed for collecting the sample.
range: string
multivalued: false
examples:
- value: swabbing
comments:
- 'Expected value: PMID,DOI,url , or text'
aliases:
- sample collection method
string_serialization: '{PMID}|{DOI}|{URL}|{text}'
slot_uri: MIXS:0001225
samp_mat_process:
is_a: nucleic acid sequence source field
title: sample material processing
description: A brief description of any processing applied to the sample during
or after retrieving the sample from environment, or a link to the relevant protocol(s)
performed.
range: string
multivalued: false
examples:
- value: filtering of seawater, storing samples in ethanol
comments:
- 'Expected value: text'
aliases:
- sample material processing
string_serialization: '{text}'
slot_uri: MIXS:0000016
size_frac:
is_a: nucleic acid sequence source field
title: size fraction selected
description: Filtering pore size used in sample preparation
range: string
multivalued: false
examples:
- value: 0-0.22 micrometer
comments:
- 'Expected value: filter size value range'
aliases:
- size fraction selected
string_serialization: '{float}-{float} {unit}'
slot_uri: MIXS:0000017
samp_size:
is_a: nucleic acid sequence source field
title: amount or size of sample collected
description: The total amount or size (volume (ml), mass (g) or area (m2) ) of
sample collected.
range: quantity value
multivalued: false
examples:
- value: 5 liter
comments:
- 'Expected value: measurement value'
- 'Preferred unit: millliter, gram, milligram, liter'
aliases:
- amount or size of sample collected
slot_uri: MIXS:0000001
samp_vol_we_dna_ext:
is_a: nucleic acid sequence source field
title: sample volume or weight for DNA extraction
description: 'Volume (ml) or mass (g) of total collected sample processed for
DNA extraction. Note: total sample collected should be entered under the term
Sample Size (MIXS:0000001).'
range: quantity value
multivalued: false
examples:
- value: 1500 milliliter
comments:
- 'Expected value: measurement value'
- 'Preferred unit: millliter, gram, milligram, square centimeter'
aliases:
- sample volume or weight for DNA extraction
slot_uri: MIXS:0000111
source_uvig:
is_a: nucleic acid sequence source field
title: source of UViGs
description: Type of dataset from which the UViG was obtained
range: source_uvig_enum
multivalued: false
examples:
- value: viral fraction metagenome (virome)
comments:
- 'Expected value: enumeration'
aliases:
- source of UViGs
string_serialization: '[metagenome (not viral targeted)|viral fraction metagenome
(virome)|sequence-targeted metagenome|metatranscriptome (not viral targeted)|viral
fraction RNA metagenome (RNA virome)|sequence-targeted RNA metagenome|microbial
single amplified genome (SAG)|viral single amplified genome (vSAG)|isolate microbial
genome|other]'
slot_uri: MIXS:0000035
virus_enrich_appr:
is_a: nucleic acid sequence source field
title: virus enrichment approach
description: List of approaches used to enrich the sample for viruses, if any
range: virus_enrich_appr_enum
multivalued: false
examples:
- value: filtration + FeCl Precipitation + ultracentrifugation + DNAse
comments:
- 'Expected value: enumeration'
aliases:
- virus enrichment approach
string_serialization: '[filtration|ultrafiltration|centrifugation|ultracentrifugation|PEG
Precipitation|FeCl Precipitation|CsCl density gradient|DNAse|RNAse|targeted
sequence capture|other|none]'
slot_uri: MIXS:0000036
nucl_acid_ext:
is_a: sequencing field
title: nucleic acid extraction
description: A link to a literature reference, electronic resource or a standard
operating procedure (SOP), that describes the material separation to recover
the nucleic acid fraction from a sample
range: string
multivalued: false
examples:
- value: https://mobio.com/media/wysiwyg/pdfs/protocols/12888.pdf
comments:
- 'Expected value: PMID, DOI or URL'
aliases:
- nucleic acid extraction
string_serialization: '{PMID}|{DOI}|{URL}'
slot_uri: MIXS:0000037
nucl_acid_amp:
is_a: sequencing field
title: nucleic acid amplification
description: A link to a literature reference, electronic resource or a standard
operating procedure (SOP), that describes the enzymatic amplification (PCR,
TMA, NASBA) of specific nucleic acids
range: string
multivalued: false
examples:
- value: https://phylogenomics.me/protocols/16s-pcr-protocol/
comments:
- 'Expected value: PMID, DOI or URL'
aliases:
- nucleic acid amplification
string_serialization: '{PMID}|{DOI}|{URL}'
slot_uri: MIXS:0000038
lib_size:
is_a: sequencing field
title: library size
description: Total number of clones in the library prepared for the project
range: integer
multivalued: false
examples:
- value: '50'
comments:
- 'Expected value: number of clones'
aliases:
- library size
slot_uri: MIXS:0000039
lib_reads_seqd:
is_a: sequencing field
title: library reads sequenced
description: Total number of clones sequenced from the library
range: integer
multivalued: false
examples:
- value: '20'
comments:
- 'Expected value: number of reads sequenced'
aliases:
- library reads sequenced
slot_uri: MIXS:0000040
lib_layout:
is_a: sequencing field
title: library layout
description: Specify whether to expect single, paired, or other configuration
of reads
range: lib_layout_enum
multivalued: false
examples:
- value: paired
comments:
- 'Expected value: enumeration'
aliases:
- library layout
string_serialization: '[paired|single|vector|other]'
slot_uri: MIXS:0000041
lib_vector:
is_a: sequencing field
title: library vector
description: Cloning vector type(s) used in construction of libraries
range: string
multivalued: false
examples:
- value: Bacteriophage P1
comments:
- 'Expected value: vector'
aliases:
- library vector
string_serialization: '{text}'
slot_uri: MIXS:0000042
lib_screen:
is_a: sequencing field
title: library screening strategy
description: Specific enrichment or screening methods applied before and/or after
creating libraries
range: string
multivalued: false
examples:
- value: enriched, screened, normalized
comments:
- 'Expected value: screening strategy name'
aliases:
- library screening strategy
string_serialization: '{text}'
slot_uri: MIXS:0000043
target_gene:
is_a: sequencing field
title: target gene
description: Targeted gene or locus name for marker gene studies
range: string
multivalued: false
examples:
- value: 16S rRNA, 18S rRNA, nif, amoA, rpo
comments:
- 'Expected value: gene name'
aliases:
- target gene
string_serialization: '{text}'
slot_uri: MIXS:0000044
target_subfragment:
is_a: sequencing field
title: target subfragment
description: Name of subfragment of a gene or locus. Important to e.g. identify
special regions on marker genes like V6 on 16S rRNA
range: string
multivalued: false
examples:
- value: V6, V9, ITS
comments:
- 'Expected value: gene fragment name'
aliases:
- target subfragment
string_serialization: '{text}'
slot_uri: MIXS:0000045
pcr_primers:
is_a: sequencing field
title: pcr primers
description: PCR primers that were used to amplify the sequence of the targeted
gene, locus or subfragment. This field should contain all the primers used for
a single PCR reaction if multiple forward or reverse primers are present in
a single PCR reaction. The primer sequence should be reported in uppercase letters
range: string
multivalued: false
examples:
- value: FWD:GTGCCAGCMGCCGCGGTAA;REV:GGACTACHVGGGTWTCTAAT
comments:
- 'Expected value: FWD: forward primer sequence;REV:reverse primer sequence'
aliases:
- pcr primers
string_serialization: FWD:{dna};REV:{dna}
slot_uri: MIXS:0000046
mid:
is_a: sequencing field
title: multiplex identifiers
description: Molecular barcodes, called Multiplex Identifiers (MIDs), that are
used to specifically tag unique samples in a sequencing run. Sequence should
be reported in uppercase letters
range: string
multivalued: false
examples:
- value: GTGAATAT
comments:
- 'Expected value: multiplex identifier sequence'
aliases:
- multiplex identifiers
string_serialization: '{dna}'
slot_uri: MIXS:0000047
adapters:
is_a: sequencing field
title: adapters
description: Adapters provide priming sequences for both amplification and sequencing
of the sample-library fragments. Both adapters should be reported; in uppercase
letters
range: string
multivalued: false
examples:
- value: AATGATACGGCGACCACCGAGATCTACACGCT;CAAGCAGAAGACGGCATACGAGAT
comments:
- 'Expected value: adapter A and B sequence'
aliases:
- adapters
string_serialization: '{dna};{dna}'
slot_uri: MIXS:0000048
pcr_cond:
is_a: sequencing field
title: pcr conditions
description: Description of reaction conditions and components of PCR in the form
of 'initial denaturation:94degC_1.5min; annealing=...'
range: string
multivalued: false
examples:
- value: initial denaturation:94_3;annealing:50_1;elongation:72_1.5;final elongation:72_10;35
comments:
- 'Expected value: initial denaturation:degrees_minutes;annealing:degrees_minutes;elongation:degrees_minutes;final
elongation:degrees_minutes;total cycles'
aliases:
- pcr conditions
string_serialization: initial denaturation:degrees_minutes;annealing:degrees_minutes;elongation:degrees_minutes;final
elongation:degrees_minutes;total cycles
slot_uri: MIXS:0000049
seq_meth:
is_a: sequencing field
title: sequencing method
description: Sequencing machine used. Where possible the term should be taken
from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103).
range: string
multivalued: false
examples:
- value: 454 Genome Sequencer FLX [OBI:0000702]
comments:
- 'Expected value: Text or OBI'
aliases:
- sequencing method