-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1300 from izcram/feature/cleanupTarBall
Feature/cleanup tar ball
- Loading branch information
Showing
7 changed files
with
273 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,64 @@ | ||
#!/usr/bin/env python | ||
import os | ||
import argparse | ||
import hashlib | ||
import logging | ||
import os | ||
import sys | ||
from pathlib import Path | ||
from typing import Set | ||
|
||
|
||
def read_file_list(path: Path) -> Set[Path]: | ||
with open(path, "r") as f: | ||
return {Path(line.strip()) for line in f.readlines()} | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("-i", "--inputDir", help="Input directory for generating md5sums") | ||
parser.add_argument("-o", "--outputFile", help="Output file for md5sums") | ||
parser.add_argument("-f", "--keepListFilePath", help="only consider files whose paths are given in this file." | ||
"Paths are expected to be relative to --inputDir") | ||
parser.add_argument("-d", "--discardListFilePath", | ||
help="paths in this file are ignored, i.e. not included in final tarball. Paths are expected to be relative to --inputDir") | ||
|
||
args = parser.parse_args() | ||
|
||
logging.getLogger().setLevel(logging.INFO) | ||
|
||
keep_list = read_file_list(args.keepListFilePath) | ||
discard_list = read_file_list(args.discardListFilePath) | ||
|
||
lists_intersec = keep_list.intersection(discard_list) | ||
if lists_intersec: | ||
sys.exit(f"Keep list and discard list are not disjoint! Got {', '.join([str(p) for p in lists_intersec])}") | ||
|
||
input_dir = Path(args.inputDir) | ||
output_file_name = args.outputFile | ||
# Recurses through a directory and it's subdirectories and generates md5 hashes for each file | ||
|
||
# Recurses through a directory and it's subdirectories and generates md5 hashes for each file in keeplist | ||
# All hashes are appended to an output file as specified with the -o flag. | ||
with open(output_file_name, 'w') as f_out: | ||
for subdir, dirs, files in os.walk(args.inputDir): | ||
for subdir, _, files in os.walk(input_dir): | ||
subdir_path = Path(subdir).relative_to(input_dir) | ||
|
||
for file in files: | ||
# Don't hash the output file | ||
if file == output_file_name.split('/')[-1]: | ||
continue | ||
filename = os.path.join(subdir, file) | ||
md5hash = hashlib.md5(open(filename, 'rb').read()).hexdigest() | ||
f_out.write(file + ": " + md5hash + '\n') | ||
|
||
filepath = subdir_path / file | ||
if filepath not in keep_list: | ||
logging.info(f"Won't include file {filepath} in tarball") | ||
|
||
if filepath not in discard_list: | ||
# If a file is neither in the keep list nor the discard,list, fail, as it is an unexpected file. | ||
# This way we are making sure we don't forget to include a newly created file into the tarball | ||
sys.exit(f"Found found file {filepath} neither in keep list nor discard list. ") | ||
else: | ||
md5hash = hashlib.md5(open(Path(subdir) / file, 'rb').read()).hexdigest() | ||
f_out.write(f"{md5hash} {filepath}\n") # 2 whitespaces in order to be compatible with GNU md5sum | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
./1000G.sorted.hg38.vcffor_pipeline | ||
./release/artifacts/1000_Genomesready.vcf | ||
./release/artifacts/1000_Genomes.vcf | ||
./release/artifacts/aggregated.tsv | ||
./release/artifacts/bayesdel.vcf | ||
./release/artifacts/BICready.vcf | ||
./release/artifacts/BIC.vcf | ||
./release/artifacts/built.tsv | ||
./release/artifacts/built_with_bayesdel.tsv | ||
./release/artifacts/built_with_ca_ids.tsv | ||
./release/artifacts/built_with_mupit.tsv | ||
./release/artifacts/built_with_priors_clean.tsv | ||
./release/artifacts/built_with_priors.tsv | ||
./release/artifacts/built_with_vr_ids.tsv | ||
./release/artifacts/ClinVarready.vcf | ||
./release/artifacts/ClinVar.vcf | ||
./release/artifacts/enigma_from_clinvar.tsv | ||
./release/artifacts/ESPready.vcf | ||
./release/artifacts/ESP.vcf | ||
./release/artifacts/ExACready.vcf | ||
./release/artifacts/ExAC.vcf | ||
./release/artifacts/exLOVDready.vcf | ||
./release/artifacts/exLOVD.vcf | ||
./release/artifacts/Findlay_BRCA1_Ring_Function_Scoresready.vcf | ||
./release/artifacts/Findlay_BRCA1_Ring_Function_Scores.vcf | ||
./release/artifacts/GnomADready.vcf | ||
./release/artifacts/GnomAD.vcf | ||
./release/artifacts/LOVDready.vcf | ||
./release/artifacts/LOVD.vcf | ||
./release/artifacts/releaseDiff.log | ||
./release/artifacts/right1000_Genomes | ||
./release/artifacts/rightBIC | ||
./release/artifacts/rightClinVar | ||
./release/artifacts/rightESP | ||
./release/artifacts/rightExAC | ||
./release/artifacts/rightexLOVD | ||
./release/artifacts/rightFindlay_BRCA1_Ring_Function_Scores | ||
./release/artifacts/rightGnomAD | ||
./release/artifacts/rightLOVD | ||
./release/artifacts/victor_wdir/input.vcf.gz | ||
./release/artifacts/victor_wdir/input.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.10.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.10.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.10.vcf.gz | ||
./release/artifacts/victor_wdir/output.10.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.11.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.11.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.11.vcf.gz | ||
./release/artifacts/victor_wdir/output.11.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.12.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.12.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.12.vcf.gz | ||
./release/artifacts/victor_wdir/output.12.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.13.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.13.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.13.vcf.gz | ||
./release/artifacts/victor_wdir/output.13.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.14.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.14.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.14.vcf.gz | ||
./release/artifacts/victor_wdir/output.14.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.15.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.15.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.15.vcf.gz | ||
./release/artifacts/victor_wdir/output.15.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.16.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.16.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.16.vcf.gz | ||
./release/artifacts/victor_wdir/output.16.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.17.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.17.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.17.vcf.gz | ||
./release/artifacts/victor_wdir/output.17.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.18.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.18.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.18.vcf.gz | ||
./release/artifacts/victor_wdir/output.18.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.19.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.19.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.19.vcf.gz | ||
./release/artifacts/victor_wdir/output.19.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.1.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.1.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.1.vcf.gz | ||
./release/artifacts/victor_wdir/output.1.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.20.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.20.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.20.vcf.gz | ||
./release/artifacts/victor_wdir/output.20.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.21.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.21.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.21.vcf.gz | ||
./release/artifacts/victor_wdir/output.21.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.22.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.22.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.22.vcf.gz | ||
./release/artifacts/victor_wdir/output.22.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.2.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.2.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.2.vcf.gz | ||
./release/artifacts/victor_wdir/output.2.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.3.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.3.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.3.vcf.gz | ||
./release/artifacts/victor_wdir/output.3.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.4.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.4.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.4.vcf.gz | ||
./release/artifacts/victor_wdir/output.4.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.5.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.5.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.5.vcf.gz | ||
./release/artifacts/victor_wdir/output.5.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.6.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.6.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.6.vcf.gz | ||
./release/artifacts/victor_wdir/output.6.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.7.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.7.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.7.vcf.gz | ||
./release/artifacts/victor_wdir/output.7.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.8.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.8.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.8.vcf.gz | ||
./release/artifacts/victor_wdir/output.8.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.9.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.9.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.9.vcf.gz | ||
./release/artifacts/victor_wdir/output.9.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.for_PROV | ||
./release/artifacts/victor_wdir/output.vqslod | ||
./release/artifacts/victor_wdir/output.X.qc.vcf.gz | ||
./release/artifacts/victor_wdir/output.X.qc.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/output.X.vcf.gz | ||
./release/artifacts/victor_wdir/output.X.vcf.gz.tbi | ||
./release/artifacts/victor_wdir/slurm.annotate.run_1.start | ||
./release/artifacts/victor_wdir/slurm.annotate.run_1.stop | ||
./release/artifacts/victor_wdir/slurm.annotate.run_1.version |
Oops, something went wrong.