Skip to content

Commit

Permalink
Added skip-option
Browse files Browse the repository at this point in the history
  • Loading branch information
emilhaegglund committed Jun 14, 2017
1 parent 25c4be3 commit 30befe0
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 35 deletions.
6 changes: 4 additions & 2 deletions bin/schavott
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ def parse_arguments():
help='''Which scaffolder to use.''')
parser.add_argument('--sspace_path', '-p',
help='Path to SSPACE (Only for scaffolding)')
parser.add_argument('--min_read_length', '-l', default=5000,
parser.add_argument('--min_read_length', '-l', default=0,
help='''Minimum read length from MinION to use.''')
parser.add_argument('--min_quality', '-q', default=9,
parser.add_argument('--min_quality', '-q', default=0,
help='''Minimum quality for reads from MinION to use.''')
parser.add_argument('--watch', '-w', required=True,
help='Directory to watch for fast5 files')
Expand All @@ -53,6 +53,8 @@ def parse_arguments():
choices=['time', 'reads'],
help='''Use timer or read count.\
[reads]''')
parser.add_argument('--skip', '-j',
help='''Skips the first j read of the sequencing''')
parser.add_argument('--intensity', '-i', default=100,
help='''How often the scaffolding process should run.\
If run mode is set to reads, scaffolding will run\
Expand Down
2 changes: 1 addition & 1 deletion move_fast5.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
times = times.sort_values(["moveTimes"])
# Create a list with all filenames in the dataframe
path = list(times['filename'])

print(times["unix_timestamp_end"])
# Wait until each read is completed, then move it to the output_path
prev_time = 0
print("Start copying files to " + output_path)
Expand Down
56 changes: 32 additions & 24 deletions schavott/MainApp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,22 @@
import schavott.Assembler

class MainApp(object):
# TODO: Add switch to discard xx number of reads in the beginning
# to avoid reads from wrong organism in the scaffolding.
def __init__(self, args):
self.readQue = []
self.reads = []
self.passCounter = 0
self.failCounter = 0
self.runMode = args.run_mode
self.skip = args.skip
self.skip_counter = 0
self.output = args.output
self.plot = args.plot
self.triggerMode = args.trigger_mode
# Will be added to argument list
self.readLengths = []
self.minQuality = args.min_quality
self.minQuality = int(args.min_quality)
self.minLength = int(args.min_read_length)
self._reset_timer()
self._set_intensity(args.intensity)
Expand Down Expand Up @@ -53,36 +57,40 @@ def __init__(self, args):
def open_read(self, filePath):
"""Open downloaded fast5"""
# Try to read fast5 file.
try:
head, tail = os.path.split(filePath)
root, ext = os.path.splitext(tail)
read = schavott.ReadData.ReadData(filePath)
self.add_read(read)
if read.get_twod():
self.readLengths.append(read.get_length())
if read.get_quality() >= self.minQuality and read.get_length() >= self.minLength:
read.set_pass()
with open(os.path.join(self.output, "reads_fasta", root) + '.fasta', 'w') as f:
f.write(str(read.get_fasta()))
f.close()
self.update_counter(read)
#print("PassCounter: " + str(self.passCounter))
#print("FailCounter: " + str(self.failCounter))
#print("Reads not possible to open: " + str(len(self.readQue)))
except AttributeError:
self.add_to_readQue(filePath)



# If the file is not completly downloaded or corrupt

self.skip_counter += 1
if self.skip_counter > self.skip:
try:
head, tail = os.path.split(filePath)
root, ext = os.path.splitext(tail)
read = schavott.ReadData.ReadData(filePath)
self.add_read(read)
# Change if statement to if read.get_twod(): to use 2D reads, depricated from ONT.
if False:
self.readLengths.append(read.get_length())
if read.get_quality() >= self.minQuality and read.get_length() >= self.minLength:
read.set_pass()
with open(os.path.join(self.output, "reads_fasta", root) + '.fasta', 'w') as f:
f.write(str(read.get_fasta()))
f.close()
self.update_counter(read)
else:
self.readLengths.append(read.get_length_1d())
if read.get_quality_1d() >= self.minQuality and read.get_length_1d() >= self.minLength:
read.set_pass()
with open(os.path.join(self.output, "reads_fasta", root) + '.fasta', 'w') as f:
f.write(str(read.get_fasta_1d()))
f.close()
self.update_counter(read)
except AttributeError:
self.add_to_readQue(filePath)

def add_read(self, read):
self.reads.append(read)

def update_counter(self, read):
if read.get_pass():
self.passCounter += 1
print('Reads: ' + str(self.passCounter))
self.run_scaffold()
else:
self.failCounter += 1
Expand Down
58 changes: 52 additions & 6 deletions schavott/ReadData.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,24 @@ def __init__(self, filePath):
self.open_read(filePath)
self.passQuality = False
self.twod = False
self.oned = False
self.set_time()
self.set_1d()
self.set_length_1d()
self.set_quality_1d()
self.set_fastq_1d()
self.set_fasta_1d()
self.set_2d()
self.set_length()
self.set_quality()
self.set_time()
self.set_fastq()
self.set_fasta()
self.close_read()

def open_read(self, path):
try:
self._fast5 = h5py.File(path)
#print('Read: ' + path)
except IOError:
print('File was not possible to open')

Expand All @@ -27,17 +34,40 @@ def close_read(self):
self._fast5.close()

def set_2d(self):

try:
self._fast5['Analyses']['Basecall_2D_000']['BaseCalled_2D']
self.twod = True
#print('Has 2D')
except:
print('1D')
print('No 2D sequence')

def set_1d(self):
try:
self._fast5['Analyses']['Basecall_1D_000']['BaseCalled_template']
self.oned = True
except:
print('No template sequence')

def set_length_1d(self):
if self.oned:
self.length_1d = self._fast5['Analyses']['Basecall_1D_000']['Summary']['basecall_1d_template'].attrs['sequence_length']
#print('Read length (template): ' + str(self.length_1d))

def set_fastq_1d(self):
if self.oned:
self.fastq_1d = str(np.array(self._fast5['Analyses']['Basecall_1D_000']['BaseCalled_template']['Fastq']))

def set_fasta_1d(self):
if self.oned:
raw_fasta = self.fastq_1d.split('\n')[:2]
header = '>' + raw_fasta[0][3:] + '\n'
seq = raw_fasta[1] + '\n'
self.fasta_1d = header + seq

def set_length(self):
if self.twod:
self.length = self._fast5['Analyses']['Basecall_2D_000']['Summary']['basecall_2d'].attrs['sequence_length']
#print('Read length: ' + str(self.length))
#print('Read length (2d): ' + str(self.length))

def set_fastq(self):
if self.twod:
Expand All @@ -56,14 +86,18 @@ def set_time(self):
for key in self._fast5['Raw']['Reads'].keys():
startSample = self._fast5['Raw']['Reads'][key].attrs['start_time']
durationSample = self._fast5['Raw']['Reads'][key].attrs['duration']
self.startTime = datetime.datetime.fromtimestamp(int(expStartTime) + float(startSample)/samplingRate + float(durationSample)/samplingRate)
#self.startTime = datetime.datetime.now().time()
#self.startTime = datetime.datetime.fromtimestamp(int(expStartTime) + float(startSample)/samplingRate + float(durationSample)/samplingRate)
self.startTime = datetime.datetime.now().time()

def set_quality(self):
if self.twod:
self.quality = self._fast5['Analyses']['Basecall_2D_000']['Summary']['basecall_2d'].attrs['mean_qscore']
#print('Read quality:' + str(self.quality))

def set_quality_1d(self):
if self.oned:
self.quality_1d = self._fast5['Analyses']['Basecall_1D_000']['Summary']['basecall_1d_template'].attrs['mean_qscore']

def set_pass(self):
self.passQuality = True

Expand All @@ -88,5 +122,17 @@ def get_time(self):
def get_twod(self):
return self.twod

def get_oned(self):
return self.oend

def get_quality_1d(self):
return self.quality_1d

def get_length_1d(self):
return self.length_1d

def get_fastq_1d(self):
return self.fastq_1d

def get_fasta_1d(self):
return self.fasta_1d
4 changes: 2 additions & 2 deletions schavott/Scaffold.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def run_sspace(self, passCounter):
self._create_single_fasta()
# Run SSPACE without alignment step
args = ['perl', self.sspacePath, '-c', self.contigPath,
'-p', self.npReads, '-i', '70', '-a', '1500', '-g' '-5000',
'-b', outdir]
'-p', self.npReads, '-i', '70', '-b', outdir, '-g', '-5000', '-t', '18']
print(args)

process = subprocess.Popen(args, stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
Expand Down

0 comments on commit 30befe0

Please sign in to comment.