|
22 | 22 | "allele_length": pa.uint16(), |
23 | 23 | "sequence": pa.binary(), |
24 | 24 | "spanning_reads": pa.uint16(), |
| 25 | + "phase_set": pa.uint32(), |
| 26 | + "haplotype": pa.uint16(), |
25 | 27 | "length_range_lower": pa.uint16(), |
26 | 28 | "length_range_upper": pa.uint16(), |
27 | 29 | "average_methylation": pa.float32()} |
28 | 30 |
|
29 | 31 | L_COLUMNS = ["LocusID", "chrom", "start", "end"] |
30 | 32 | A_COLUMNS = ["LocusID", "allele_number", "allele_length", "sequence"] |
31 | | -S_COLUMNS = ["LocusID", "allele_number", "spanning_reads", "length_range_lower", |
32 | | - "length_range_upper", "average_methylation"] |
| 33 | +S_COLUMNS = ["LocusID", "allele_number", "spanning_reads", "phase_set", "haplotype", |
| 34 | + "length_range_lower", "length_range_upper", "average_methylation"] |
33 | 35 |
|
34 | 36 | L_SCHEMA = pa.schema({key: DTYPES[key] for key in L_COLUMNS}) |
35 | 37 | A_SCHEMA = pa.schema({key: DTYPES[key] for key in A_COLUMNS}) |
@@ -82,15 +84,18 @@ def sample_extract(locus_id, fmt, o_alleles, n_alleles): |
82 | 84 | """ |
83 | 85 | ret = [] |
84 | 86 | gts = [_ for _ in fmt['GT'] if _ is not None] |
85 | | - view = zip(gts, fmt['SD'], fmt['ALLR'], |
86 | | - fmt.get('AM', [None] * len(gts))) |
87 | | - for an, sd, allr, am in view: |
88 | | - if an is None: |
89 | | - continue |
| 87 | + view = zip(gts, |
| 88 | + fmt['SD'], |
| 89 | + fmt['ALLR'], |
| 90 | + fmt.get('AM', [None] * len(gts)), |
| 91 | + range(len(gts))# if fmt.phased else [None, None] |
| 92 | + ) |
| 93 | + for an, sd, allr, am, hp in view: |
90 | 94 | # Map allele number to new, deduplicated allele number |
91 | 95 | an = n_alleles.index(o_alleles[an]) |
92 | 96 | lrl, lru = map(int, allr.split('-')) |
93 | | - ret.append([locus_id, an, sd, lrl, lru, am]) |
| 97 | + ps = fmt.get('PS', None) |
| 98 | + ret.append([locus_id, an, sd, ps, hp, lrl, lru, am]) |
94 | 99 | return ret |
95 | 100 |
|
96 | 101 |
|
|
0 commit comments