Skip to content

Commit

Permalink
[MRG] update mapping notebook to include SNP graph, and etc. (#168)
Browse files Browse the repository at this point in the history
* fix gz problem with test-private

* copy_local_genomes now gzips files

* fix mapping notebook fig #s, include SNP figure
  • Loading branch information
ctb authored Feb 16, 2022
1 parent 1516fd4 commit a22092b
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 24 deletions.
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,18 @@ databases/podar-ref/:
mkdir -p databases/podar-ref
curl -L https://osf.io/vbhy5/download -o databases/podar-ref.tar.gz
cd databases/podar-ref/ && tar xzf ../podar-ref.tar.gz
parallel -j 4 gzip {} ::: $$(ls databases/podar-ref/*.fa)

# sketch the ref genomes
databases/podar-ref.zip: databases/podar-ref/
sourmash sketch dna -p k=31,scaled=1000 --name-from-first \
databases/podar-ref/*.fa -o databases/podar-ref.zip
databases/podar-ref/*.fa.gz -o databases/podar-ref.zip

# download taxonomy
databases/podar-ref.tax.csv:
curl -L https://osf.io/4yhjw/download -o databases/podar-ref.tax.csv

# create info file and genomes directory:
databases/podar-ref.info.csv:
python -m genome_grist.copy_local_genomes databases/podar-ref/*.fa -o databases/podar-ref.info.csv -d databases/podar-ref.d
python -m genome_grist.copy_local_genomes databases/podar-ref/*.fa.gz -o databases/podar-ref.info.csv -d databases/podar-ref.d
python -m genome_grist.make_info_file databases/podar-ref.info.csv
20 changes: 17 additions & 3 deletions genome_grist/copy_local_genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
import csv
import os
import shutil

import gzip
import contextlib

def main():
p = argparse.ArgumentParser()
Expand Down Expand Up @@ -50,8 +51,21 @@ def main():
print(f"read identifer '{ident}' and name '{remainder}'")

destfile = os.path.join(args.output_directory, f"{ident}_genomic.fna.gz")
print(f"copying '{filename}' to '{destfile}'")
shutil.copyfile(filename, destfile)

is_gzipped = False
with contextlib.suppress(OSError):
with gzip.open(filename) as fp:
fp.read(1)
is_gzipped = True

if is_gzipped:
print(f"copying '{filename}' to '{destfile}'")
shutil.copyfile(filename, destfile)
else:
print(f"compressing '{filename}' into '{destfile}'")
with open(filename, 'rb') as fp:
with gzip.open(destfile, 'w') as outfp:
outfp.write(fp.read())

w.writerow(dict(ident=ident, display_name=remainder,
genome_filename=destfile))
Expand Down
Loading

0 comments on commit a22092b

Please sign in to comment.