Skip to content

Commit f1b85e6

Browse files
committed
Do git diff within python to get rid of changed-files action
1 parent 361c5a7 commit f1b85e6

File tree

2 files changed

+175
-85
lines changed

2 files changed

+175
-85
lines changed

.github/workflows/readable-data-changes.yml

+4-14
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,12 @@ jobs:
88
runs-on: ubuntu-latest
99
steps:
1010
- uses: actions/checkout@v4
11+
with:
12+
fetch-depth: '0'
1113

1214
- uses: actions/setup-python@v5
1315
with:
1416
python-version: '3.10'
15-
16-
- name: Get changed files
17-
id: changed-files
18-
uses: tj-actions/changed-files@v43
19-
with:
20-
separator: "\n"
21-
old_new_files_separator: "\n"
22-
output_renamed_files_as_deleted_and_added: true
23-
include_all_old_new_renamed_files: true
24-
recover_deleted_files: true
2517

2618
- name: Parse asset changes to be more readable
2719
id: readable-assets
@@ -30,10 +22,8 @@ jobs:
3022
REPO: ${{ github.repository }}
3123
REPO_PATH: ${{ github.workspace }}
3224
PRNUM: ${{ github.event.number }}
33-
CHANGES_ALL: ${{ steps.changed-files.outputs.all_changed_and_modified_files }}
34-
CHANGES_DELETED: ${{ steps.changed-files.outputs.deleted_files }}
35-
CHANGES_ADDED: ${{ steps.changed-files.outputs.added_files }}
36-
CHANGES_RENAME_PAIRS: ${{ steps.changed-files.outputs.all_old_new_renamed_files }}
25+
COMMIT_A: ${{ github.event.pull_request.base.sha }}
26+
COMMIT_B: ${{ github.event.pull_request.head.sha }}
3727

3828
- name: Comment on PR
3929
uses: thollander/actions-comment-pull-request@v2

readableAssets.py

+171-71
Original file line numberDiff line numberDiff line change
@@ -1,128 +1,216 @@
11
import os
2+
import sys
23
import uuid
34
from pathlib import Path
45
from hashlib import sha256
56

67

7-
### util
8+
### inputs
9+
10+
inAction = os.getenv("GITHUB_ACTIONS") is not None
11+
12+
if inAction:
13+
REPO = os.environ.get("REPO")
14+
PR_NUM = os.environ.get("PRNUM")
15+
COMMIT_A = os.environ.get("COMMIT_A")
16+
COMMIT_B = os.environ.get("COMMIT_B")
17+
else:
18+
19+
args = sys.argv
20+
21+
helpMsg = f"""
22+
Parse asset changes to be more readable.
823
9-
def list_dir(folderpath = ".", file = False, folder = False, silent = True):
10-
results = []
11-
for filename in os.listdir(folderpath):
12-
fullpath = os.path.join(folderpath, filename)
13-
if not file and os.path.isfile(fullpath): continue
14-
if not folder and os.path.isdir(fullpath): continue
15-
# ext = os.path.splitext(filename)[-1].lower()
16-
results.append(filename)
17-
if not silent: print(filename)
18-
return results
24+
Usage:
25+
python {args[0]} <commit_SHA_A> <commit_SHA_B>
26+
27+
If no commit_SHA is provided, the script defaults to HEAD~ and HEAD.
28+
If one commit_SHA is provided, the script defaults to commit_SHA~ and commit_SHA.
29+
30+
"""
31+
32+
if len(args) > 3:
33+
print(helpMsg)
34+
sys.exit()
35+
36+
elif len(args) == 3:
37+
COMMIT_A = args[1]
38+
COMMIT_B = args[2]
39+
elif len(args) == 2:
40+
COMMIT_B = args[1]
41+
COMMIT_A = COMMIT_B + "~"
42+
else:
43+
print(helpMsg)
44+
response = input("Input Y to go with the default, diff-ing HEAD~ and HEAD.")
45+
if response != "Y": sys.exit()
46+
COMMIT_A = "HEAD~"
47+
COMMIT_B = "HEAD"
48+
49+
print("\n")
50+
51+
52+
### util
1953

2054
def read_txt(path):
2155
with open(path, 'r', encoding='utf-8-sig') as f:
2256
text = f.read()
2357
return text
2458

59+
def read_file_even_deleted(path):
60+
if not Path(path).is_file():
61+
recovered_file_content = run_command(f"git show {COMMIT_A}:{path}")
62+
return recovered_file_content
63+
return read_txt(path)
64+
65+
def run_command(command):
66+
output = os.popen(command).read()
67+
return output
68+
2569
def set_multiline_output(name, value):
2670
with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
2771
delimiter = uuid.uuid1()
2872
print(f'{name}<<{delimiter}', file=fh)
2973
print(value, file=fh)
3074
print(delimiter, file=fh)
3175

32-
def set_output(name, value):
33-
with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
34-
print(f'{name}={value}', file=fh)
3576

77+
### diff-ing the changes
78+
79+
changes_all, changes_deleted, changes_added, renamed_pairs = [], [], [], []
3680

37-
### processing the inputs
81+
commands = [
82+
f"git diff --name-only --no-renames {COMMIT_A} {COMMIT_B}",
83+
f"git diff --diff-filter=D --name-only --no-renames {COMMIT_A} {COMMIT_B}",
84+
f"git diff --diff-filter=A --name-only --no-renames {COMMIT_A} {COMMIT_B}",
85+
f"git diff --name-status {COMMIT_A} {COMMIT_B}"
86+
]
3887

39-
repo = os.environ.get("REPO")
40-
repo_path = Path( os.environ.get("REPO_PATH") )
41-
pr_number = os.environ.get("PRNUM")
88+
changes = []
4289

43-
changes_all = [e.replace("\\","") for e in os.environ.get("CHANGES_ALL").split("\n")]
44-
changes_deleted = [e.replace("\\","") for e in os.environ.get("CHANGES_DELETED").split("\n")]
45-
changes_added = [e.replace("\\","") for e in os.environ.get("CHANGES_ADDED").split("\n")]
90+
for command in commands:
91+
output = run_command(command)
92+
output_list = []
93+
if output != "":
94+
output_list = output.strip().split("\n")
95+
changes.append(output_list)
4696

47-
renamed_pairs = os.environ.get("CHANGES_RENAME_PAIRS")
97+
changes_all, changes_deleted, changes_added, renamed_pairs = changes
4898

49-
renamed_before, renamed_after = [], []
50-
if ',' in renamed_pairs:
51-
renamed_pairs = [e.replace("\\","") for e in renamed_pairs.split("\n")]
52-
renamed_before = [e.split(',')[0] for e in renamed_pairs]
53-
renamed_after = [e.split(',')[1] for e in renamed_pairs]
99+
if renamed_pairs:
100+
renamed_pairs = [e.split("\t") for e in renamed_pairs if e[0] == "R"]
54101

102+
renamed_before = [e[1] for e in renamed_pairs]
103+
renamed_after = [e[2] for e in renamed_pairs]
55104

56-
### some functions
57105

58-
def path_to_object_id(p):
59-
s = p.replace("objects/","").replace("categories/","").replace(".txt","").strip()
60-
if s.isnumeric(): return int(s)
61-
return -9999
106+
### so we only read each object once
62107

63-
objects_dict = {} # so we read each object only once
108+
objects_dict = {}
64109

65110
def get_object_name_by_id(object_id):
66111
if object_id <= 0: return str(object_id)
67112
if object_id in objects_dict.keys(): return objects_dict[object_id]
68-
object_path = repo_path / "objects" / f"{object_id}.txt"
69-
# if not object_path.is_file():
70-
# object_path = Path("deleted_files") / "objects" / f"{object_id}.txt"
71-
# print( object_path )
72-
object_file_content = read_txt( object_path )
73-
object_name = object_file_content.splitlines()[1]
113+
object_path = f"objects/{object_id}.txt"
114+
object_file_content = read_file_even_deleted( object_path ).splitlines()
115+
if len(object_file_content) < 2:
116+
object_name = str(object_id)
117+
else:
118+
object_name = object_file_content[1]
74119
objects_dict[object_id] = object_name
75120
return object_name
76121

122+
def read_category_as_object_list(content):
123+
list_str = content[content.find("\n", content.find("numObjects="))+1:].splitlines()
124+
list_int = [int(e.split()[0]) for e in list_str]
125+
return list_int
126+
77127

78-
### go through the changed files, summarize each in a line
128+
### go through the changed files, parse object IDs into object names
79129

80130
object_lines, transition_lines, category_lines, other_lines = [], [], [], []
81131

82132
for changed_file in changes_all:
83133

84-
### whether the change is an added file, or a deleted one, or a modified one
85-
### note that renamed files are configured to show as deleted of old and added of new
86-
sign = "`.`"
134+
# whether the change is an added file, or a deleted one, or a modified one
135+
# note that renamed files are configured to show as deleted of old and added of new
136+
sign = "."
87137
if changed_file in changes_added:
88-
sign = "`+`"
138+
sign = "+"
89139
elif changed_file in changes_deleted:
90-
sign = "`-`"
140+
sign = "-"
141+
if inAction:
142+
sign = f"`{sign}`"
91143

92-
### the hash is used to link directly to the changed file on github site
93-
file_change_hash = 0
144+
# the hash is used to link to the changed file on github site
145+
file_hash = 0
94146
if changed_file in renamed_before:
95147
index = renamed_before.index(changed_file)
96-
file_change_hash = sha256(renamed_after[index].encode('utf-8')).hexdigest()
148+
file_hash = sha256(renamed_after[index].encode('utf-8')).hexdigest()
97149
else:
98-
file_change_hash = sha256(changed_file.encode('utf-8')).hexdigest()
150+
file_hash = sha256(changed_file.encode('utf-8')).hexdigest()
99151

100152
change_processed = False
101153

102154
if 'objects/' in changed_file or 'categories/' in changed_file:
103155

104-
object_id = path_to_object_id(changed_file)
105-
if object_id != -9999:
156+
id_str = changed_file.replace("objects/","").replace("categories/","").replace(".txt","").strip()
157+
if id_str.isnumeric():
158+
159+
object_id = int(id_str)
106160
object_name = get_object_name_by_id(object_id)
107161
object_name = object_name.replace("#", "<span>#</span>")
162+
163+
164+
165+
if inAction:
166+
167+
if 'categories/' in changed_file:
168+
category_before_output = run_command(f"git show {COMMIT_A}:{changed_file}")
169+
category_after_output = run_command(f"git show {COMMIT_B}:{changed_file}")
170+
171+
if category_before_output != "" and category_after_output != "":
172+
category_before = read_category_as_object_list(category_before_output)
173+
category_after = read_category_as_object_list(category_after_output)
174+
175+
added = list(set(category_after) - set(category_before))
176+
removed = list(set(category_before) - set(category_after))
177+
178+
category_details = ""
179+
if len(added) > 0:
180+
category_details += "\n".join( [ f"+ {e} {get_object_name_by_id(e)}" for e in added] )
181+
if len(removed) > 0:
182+
category_details += "\n".join( [ f"- {e} {get_object_name_by_id(e)}" for e in removed] )
183+
184+
line = f"""
185+
{sign} [{object_id}](https://github.com/{REPO}/pull/{PR_NUM}/files#diff-{file_hash}) {object_name}
186+
<details>
187+
<summary>Details</summary>
188+
189+
```diff
190+
{category_details}
191+
```
192+
193+
</details>
194+
"""
195+
else:
196+
line = f"{sign} [{object_id}](https://github.com/{REPO}/pull/{PR_NUM}/files#diff-{file_hash}) {object_name}"
197+
198+
else:
199+
line = f"{sign} {object_id} {object_name}"
200+
108201
if 'objects/' in changed_file:
109-
object_lines.append(f"{sign} [{object_id}](https://github.com/{repo}/pull/{pr_number}/files#diff-{file_change_hash}) {object_name}")
202+
object_lines.append(line)
110203
elif 'categories/' in changed_file:
111-
category_lines.append(f"{sign} [{object_id}](https://github.com/{repo}/pull/{pr_number}/files#diff-{file_change_hash}) {object_name}")
204+
category_lines.append(line)
112205

113206
change_processed = True
114207

115208
elif 'transitions/' in changed_file:
116209

117-
filename = changed_file.replace("transitions/","").replace(".txt","")
118-
filename_parts = filename.split("_")
119-
120-
transition_path = repo_path / "transitions" / f"{filename}.txt"
121-
# if not transition_path.is_file():
122-
# transition_path = Path("deleted_files") / "transitions" / f"{filename}.txt"
123-
# print( transition_path )
124-
transition_file_content = read_txt( transition_path )
210+
transition_file_content = read_file_even_deleted( changed_file )
125211
transition_file_content_parts = transition_file_content.split()
212+
213+
filename_parts = changed_file.replace("transitions/","").replace(".txt","").split("_")
126214

127215
a = int(filename_parts[0])
128216
b = int(filename_parts[1])
@@ -162,10 +250,12 @@ def get_object_name_by_id(object_id):
162250

163251
if flag != "": flag = f"({flag})"
164252

165-
transition_details = "\r\n".join( [ f"{e[0]}: {e[1]}" for e in trans_keyValuePairs] )
253+
if inAction:
254+
255+
transition_details = "\n".join( [ f"{e[0]}: {e[1]}" for e in trans_keyValuePairs] )
166256

167-
transition_line = f"""
168-
{sign} [{a} + {b} = {c} + {d} {flag}](https://github.com/{repo}/pull/{pr_number}/files#diff-{file_change_hash})
257+
transition_line = f"""
258+
{sign} [{a} + {b} = {c} + {d} {flag}](https://github.com/{REPO}/pull/{PR_NUM}/files#diff-{file_hash})
169259
<details>
170260
<summary><code class="notranslate">{a_name}</code> + <code class="notranslate">{b_name}</code> = <code class="notranslate">{c_name}</code> + <code class="notranslate">{d_name}</code></summary>
171261
@@ -175,28 +265,38 @@ def get_object_name_by_id(object_id):
175265
176266
</details>
177267
"""
268+
else:
269+
transition_line = f"{sign} {a} + {b} = {c} + {d} {flag}\n{a_name} + {b_name} = {c_name} + {d_name} {flag}\n\n"
270+
178271
transition_lines.append(transition_line)
179272
change_processed = True
180273

181274
if not change_processed:
182-
line = f"{sign} [link](https://github.com/{repo}/pull/{pr_number}/files#diff-{file_change_hash}) {changed_file}"
275+
if inAction:
276+
line = f"{sign} [link](https://github.com/{REPO}/pull/{PR_NUM}/files#diff-{file_hash}) {changed_file}"
277+
else:
278+
line = f"{sign} {changed_file}"
183279
other_lines.append(line)
184-
280+
185281

186282
### assemble the output message
187283

188284
message = ""
189285

190286
if len(object_lines) > 0:
191-
message += "## Objects:\r\n" + "\r\n".join(object_lines) + "\r\n"
287+
message += "## Objects:\n\n" + "\n".join(object_lines) + "\n\n"
192288

193289
if len(category_lines) > 0:
194-
message += "## Categories:\r\n" + "\r\n".join(category_lines) + "\r\n"
290+
message += "## Categories:\n\n" + "\n".join(category_lines) + "\n\n"
195291

196292
if len(transition_lines) > 0:
197-
message += "## Transitions:\r\n" + "".join(transition_lines) + "\r\n"
293+
message += "## Transitions:\n\n" + "".join(transition_lines) + "\n\n"
198294

199295
if len(other_lines) > 0:
200-
message += "## Others:\r\n" + "\r\n".join(other_lines)
296+
message += "## Others:\n\n" + "\n".join(other_lines)
201297

202-
set_multiline_output("OUTPUT_MESSAGE", message)
298+
299+
if inAction:
300+
set_multiline_output("OUTPUT_MESSAGE", message)
301+
else:
302+
print(message)

0 commit comments

Comments
 (0)