-
Notifications
You must be signed in to change notification settings - Fork 0
/
CsvDiffToSheet.py
193 lines (174 loc) · 8.69 KB
/
CsvDiffToSheet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
__author__ = "Sean Asiala"
__copyright__ = "Copyright (C) 2020 Sean Asiala"
# TODO(sasiala): may need to update naming to match general python conventions
import logger
import re
def check_change_sub(line, out_file):
# TODO(sasiala): change to similar format to new/deleted lines
is_change_sub = re.match('^\- .*\n\?.*\n\+ .*$', line)
if is_change_sub:
split_lines = re.split('\n\?.*\n\+ ', line)
first_line = re.sub('^- ', '', split_lines[0]).split(',')
second_line = split_lines[1].split(',')
iter_size = len(first_line)
if (len(first_line) > len(second_line)):
iter_size = len(second_line)
# TODO(sasiala): deal with lines of diff sizes (skipping rest of output, currently)
new_list = []
for col in range(iter_size):
if not first_line[col] == second_line[col]:
new_list.append('||'.join([first_line[col], second_line[col]]))
else:
new_list.append(first_line[col])
out_file.write('Change/Sub,')
out_file.write(','.join(new_list))
out_file.write('\n')
return True
elif re.match('^- .*\n\+ .*$', line):
# TODO(sasiala): not sure if this check belongs here; will this always be a subtraction?
# TODO(sasiala): having this check here requires two calls of this function in check_compound. It's awkward.
split_lines = re.split('\n', line)
first_line = re.sub('^- ', '', split_lines[0]).split(',')
second_line = re.sub('^\+ ', '', split_lines[1]).split(',')
iter_size = len(first_line)
if (len(first_line) > len(second_line)):
iter_size = len(second_line)
# TODO(sasiala): deal with lines of diff sizes (skipping rest of output, currently)
new_list = []
for col in range(iter_size):
if not first_line[col] == second_line[col]:
new_list.append('||'.join([first_line[col], second_line[col]]))
else:
new_list.append(first_line[col])
out_file.write('Change/Sub,')
out_file.write(','.join(new_list))
out_file.write('\n')
return True
return False
def check_change_add(line, out_file):
# TODO(sasiala): change to similar format to new/deleted lines
is_change_add = re.match('^- .*\n\+ .*\n\? .*$', line)
if is_change_add:
split_lines = re.split('\n\?.*$', line)
temp_lines = split_lines[0].split('\n')
first_line = re.sub('^- ', '', temp_lines[0]).split(',')
second_line = re.sub('^\+ ', '', temp_lines[1]).split(',')
iter_size = len(first_line)
if (len(first_line) > len(second_line)):
iter_size = len(second_line)
# TODO(sasiala): deal with lines of diff sizes (skipping rest of output, currently)
new_list = []
for col in range(iter_size):
if not first_line[col] == second_line[col]:
new_list.append('||'.join([first_line[col], second_line[col]]))
else:
new_list.append(first_line[col])
out_file.write('Change/Add,')
out_file.write(','.join(new_list))
out_file.write('\n')
return True
return False
def check_change_add_and_sub(line, out_file):
is_add_and_sub = re.match('^- .*\n\? .*\n\+ .*\n\? .*$', line)
if is_add_and_sub:
line = line + '\n'
lines = re.split('\n\? .*[\n|$]', line)
# TODO(sasiala): split on "," or similar, instead of , (need to think about strings w/ comma)
first_line = re.sub('^- ', '', lines[0]).split(',')
second_line = re.sub('^\+ ', '', lines[1]).split(',')
iter_size = len(first_line)
if (len(first_line) > len(second_line)):
iter_size = len(second_line)
# TODO(sasiala): deal with lines of diff sizes (skipping rest of output, currently)
new_list = []
for col in range(iter_size):
if not first_line[col] == second_line[col]:
new_list.append('||'.join([first_line[col], second_line[col]]))
else:
new_list.append(first_line[col])
out_file.write('Change/Add/Sub,')
out_file.write(','.join(new_list))
out_file.write('\n')
return True
return False
# TODO(sasiala): new line is broken
def check_new_line(line, out_file):
is_new_line = re.match('^\+ .+$', line)
if is_new_line:
split_lines = line.split('\n')
for i in split_lines:
if not re.match('^\+ $', i):
out_file.write('New Line,')
out_file.write(re.sub('^\+ ', '', i))
out_file.write('\n')
return True
return False
def check_deleted_line(line, out_file):
is_deleted_line = re.match('^- .+$', line)
if is_deleted_line:
split_lines = line.split('\n')
for i in split_lines:
if not re.match('^- $', i):
out_file.write('Deleted Line,')
out_file.write(re.sub('^- ', '', i))
out_file.write('\n')
return True
return False
def check_compound(line_in, out_file):
line = line_in.split('\n')
left_over = line
logger.log('diff_to_sheet.log', f'Line: {line_in}', logger.LogLevel.DEBUG)
# TODO(sasiala): am I sure these can't be in the middle of the string?
# TODO(sasiala): there is a case for - .*\n+ .*$ that needs to be handled.
# It is similar to check_change_sub and can be seen in sheet 0 of test.
if (len(line) >= 4 and check_change_add_and_sub('\n'.join(line[0:4]), out_file)):
left_over = line[4:]
logger.log('diff_to_sheet.log', 'Compound:Change/Add/Sub', logger.LogLevel.DEBUG)
elif (len(line) >= 3 and check_change_add('\n'.join(line[0:3]), out_file)):
left_over = line[3:]
logger.log('diff_to_sheet.log', 'Compound:Change/Add', logger.LogLevel.DEBUG)
elif (len(line) >= 3 and check_change_sub('\n'.join(line[0:3]), out_file)):
left_over = line[3:]
logger.log('diff_to_sheet.log', 'Compound:Change/Sub', logger.LogLevel.DEBUG)
elif (len(line) == 2 and check_change_sub('\n'.join(line), out_file)):
left_over = line[2:]
logger.log('diff_to_sheet.log', 'Compound:Change/Sub(len 2)', logger.LogLevel.DEBUG)
for i in left_over:
# TODO(sasiala): should the check for an empty line be here?
# Or should that be fixed when generating the file used as input here?
if re.match('^- $', i) or re.match('^\+ $', i) or re.match('^ $', i) or re.match('^$', i):
logger.log('diff_to_sheet.log', 'Compound:Skipped empty +/- line', logger.LogLevel.DEBUG)
elif (check_new_line(i, out_file)):
logger.log('diff_to_sheet.log', 'Compound:New Line', logger.LogLevel.DEBUG)
elif (check_deleted_line(i, out_file)):
logger.log('diff_to_sheet.log', 'Compound:Deleted Line', logger.LogLevel.DEBUG)
elif re.match('^ .+$', i):
logger.log('diff_to_sheet.log', 'Compound:No Change', logger.LogLevel.DEBUG)
out_file.write('No Change,')
out_file.write(re.sub('^ ', '', i))
out_file.write('\n')
else:
# unexpected format in diff
logger.log('diff_to_sheet.log', 'Compound:Curious (unexpected diff format)...', logger.LogLevel.ERROR)
logger.log('diff_to_sheet.log', i, logger.LogLevel.ERROR)
logger.log('diff_to_sheet.log', '/Curious', logger.LogLevel.ERROR)
# TODO(sasiala): return False
return True
def diff_to_sheet(csv_diff_path, out_path):
logger.log('diff_to_sheet.log', f'Creating sheet csv for {csv_diff_path}', logger.LogLevel.DEBUG)
with open(out_path, 'w') as out_file:
with open(csv_diff_path, 'r') as csv_diff:
lines = csv_diff.read().split('\n \n')
for line in lines:
if not check_compound(line, out_file):
# unexpected format in diff
logger.log('diff_to_sheet.log', 'Curious (unexpected diff format)...', logger.LogLevel.ERROR)
logger.log('diff_to_sheet.log', line, logger.LogLevel.ERROR)
logger.log('diff_to_sheet.log', '/Curious', logger.LogLevel.ERROR)
# TODO(sasiala): return False
logger.log('diff_to_sheet.log', f'Successfully completed sheet csv for {csv_diff_path}', logger.LogLevel.DEBUG)
return True
logger.log('diff_to_sheet.log', f'Failed to create sheet csv for {csv_diff_path}. Error: could not open csv_diff', logger.LogLevel.ERROR)
return False
logger.log('diff_to_sheet.log', f'Failed to create sheet csv for {csv_diff_path}. Error: could not open out_path', logger.LogLevel.ERROR)
return False