Skip to content

Commit 7ed0dc3

Browse files
committed
add a progressbar / logging
1 parent 7df2031 commit 7ed0dc3

File tree

1 file changed

+19
-5
lines changed

1 file changed

+19
-5
lines changed

trish.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,21 @@
77
from pathlib import PosixPath
88
from pprint import pprint
99

10+
try:
11+
from tqdm import tqdm
12+
progressbar = tqdm
13+
except:
14+
progressbar = lambda x: x
15+
1016
TOKEN_SPLITTER = re.compile(' |(\w+)')
1117

1218
DEFAULT_WINDOW_SIZE = 3
1319

20+
import logging
21+
import sys
22+
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG, format='%(asctime)s:%(levelname)s - %(message)s')
23+
logger = logging.getLogger()
24+
1425
def tokenize(line):
1526
is_group = False
1627
for token in re.split(TOKEN_SPLITTER, line):
@@ -134,7 +145,7 @@ def preprocess_source(source_file, window_size):
134145

135146
def preprocess_sources(source_files, window_size):
136147
'''creates line groups'''
137-
for source_file in source_files:
148+
for source_file in progressbar(source_files):
138149
yield from preprocess_source(source_file, window_size)
139150

140151

@@ -216,7 +227,7 @@ def group_codebase_files(matches):
216227
# a map from a pair of codebases to a map of pair of files
217228
# to list of pairs of matches
218229
codebases_map = defaultdict(lambda: defaultdict(list))
219-
for keygroup, occurences in matches:
230+
for keygroup, occurences in progressbar(matches):
220231
for line_a, line_b in pairs(occurences):
221232
base_a, base_b = line_a.codebase, line_b.codebase
222233
if base_a is base_b:
@@ -270,7 +281,7 @@ def group_lines(options, codebases_map):
270281
}
271282
'''
272283
res = {}
273-
for codebase_pair, file_map in codebases_map.items():
284+
for codebase_pair, file_map in progressbar(codebases_map.items()):
274285
file_res = {}
275286
for file_pair, matches in file_map.items():
276287
left_file, right_file = file_pair
@@ -347,7 +358,7 @@ def get_cluster(run):
347358

348359
file_res[file_pair] = clusters
349360
res[codebase_pair] = file_res
350-
return res
361+
return res
351362

352363

353364
def rate_grouped_lines(codebases_map):
@@ -381,6 +392,7 @@ def store(f, *args, **kwargs):
381392
}
382393
}
383394
'''
395+
logger.info('grouping codebases / files')
384396
codebase_file_groups = store(group_codebase_files, matches)
385397
'''
386398
{
@@ -391,6 +403,7 @@ def store(f, *args, **kwargs):
391403
}
392404
}
393405
'''
406+
logger.info('graph madness')
394407
lengthful_matches = store(group_lines, options, codebase_file_groups)
395408
return lengthful_matches, res
396409
# return store(rate_grouped_lines, lengthful_matches), res
@@ -400,9 +413,10 @@ def main(args=sys.argv[1:]):
400413
options = _trish_parser().parse_args(args=args)
401414
source_files = find_sources(options.targets, options.pattern)
402415
window_size = options.window_size
416+
logger.info('correlating sources')
403417
matches = correlate_sources(source_files, window_size)
404418
scores, metadata = process_matches(options, matches)
405-
pprint(metadata)
419+
# pprint(metadata)
406420
# for codebase_pair, score in scores.items():
407421
# codebase_a, codebase_b = codebase_pair
408422
# print(f'{score}\t{codebase_a.name}\t{codebase_b.name}')

0 commit comments

Comments
 (0)