-
Notifications
You must be signed in to change notification settings - Fork 0
/
topo_order_commits.py
230 lines (183 loc) · 6.43 KB
/
topo_order_commits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#!/usr/bin/python3
import os, sys
from collections import deque
class CommitNode:
def __init__(self, commit_hash):
"""
:type commit_hash: str
"""
self.commit_hash = commit_hash
self.parents = set()
self.children = set()
"""
This function is used to find the .git directory that is present in the current directory or any of its parent directories.
If no .git directory is found, the program exits with an error message.
"""
def getPath():
# get current working directory
current = os.getcwd()
# returns true if path is an existing directory
is_valid_path = False
while current != "":
# append .git
check = current + "/.git"
# check if path is valid
is_valid_path = os.path.isdir(check)
# if valid, return
if is_valid_path:
return current
# else, delete
else:
current = current[: current.rfind("/")]
# .git not found, print to stderr
print("Not inside a Git repository", file=sys.stderr)
exit(1)
"""
This function navigates to the .git/refs/heads directory to get the list of local branch names and their corresponding commit hashes.
The information is returned as a dictionary with branch names as keys and commit hashes as values.
"""
def getBranch():
path = getPath() + "/.git/refs/heads"
branches = dict()
for root, dirs, files in os.walk(path):
for f in files:
branch = root + "/" + f
head = branch[branch.rfind("heads/") + 6 :]
commit = (open(branch).read()).strip()
branches[head] = commit
return branches
"""
This function creates a mapping of commit hashes to their corresponding branch names, using the information retrieved by getBranch()
"""
# returns dictionary where branch name outputted when given commit hash
def map_hash_to_branch():
branch = getBranch()
hash_to_branch = dict() # maps hash to branch name
for b, commit in branch.items():
hash_to_branch[commit] = b
return hash_to_branch
"""
This function builds the commit graph.
For each branch, it starts from the latest commit and traverses the commit history by following the parent commits.
The commits and their relationships are stored in CommitNode objects and added to the graph dictionary (commit hash to CommitNode mapping).
The function also identifies root commits (those without parents) and returns the graph and the set of root commits.
"""
def getGraph():
# branch
branch = getBranch()
# dictionary
graph = dict() # maps hash to commitnode object
stack = []
root_commits = set()
# traverse commit history by following parents of the head
# create a commitnode object for each new commit seen
# populate commitnode with parents
for b, commit in branch.items():
# add branch to stack
stack.append(commit)
while stack:
# pop off last commit
current_commit = stack.pop()
# create commit node object
c_object = CommitNode(current_commit)
# get git object
decompressed_data = os.popen("git cat-file -p " + current_commit).read()
# get the strings
strings = decompressed_data.split("\n")
has_parents = False
# find strings that start with parent and store them as parents in the commitnode object
for string in strings:
if string.startswith("parent"):
parent = string[7:]
# add parent to parent set
c_object.parents.add(parent)
stack.append(parent)
has_parents = True
# if commit has no parents, it is a root commit
if has_parents == False:
root_commits.add(current_commit)
if current_commit in graph:
pass
else:
# store commit object in graph
graph[current_commit] = c_object
# populate DAG with children using parental relationships
for commit, node in graph.items():
# get "parent" commits
parents = node.parents
# for each parent commit, add current commit as child
for p in parents:
parentnode = graph[p]
parentnode.children.add(commit)
return graph, root_commits
"""
This function does a topological sort using breadth first search.
It then prints it in reverse so that the earliest commits are at the end of the list.
"""
def topo_sort():
graph, roots = getGraph()
result = []
queue = deque()
for root in roots:
queue.append(root)
while queue:
# pop
current = queue.popleft()
# add to result
result.append(current)
# for each child
for child in graph[current].children:
# remove parent from child
graph[child].parents.remove(current)
# append child
if len(graph[child].parents) == 0:
queue.append(child)
return result[::-1]
def topo_order_commits():
graph, _ = getGraph()
order = topo_sort()
htb = map_hash_to_branch()
empty_line = False
for i in range(len(order) - 1):
# get commit
commit = order[i]
# get node object of current node
node = graph[commit]
if empty_line == True:
# sticky start
print("=")
for child in node.children:
print(child, end="")
print("")
# if tip, print branch
if commit in htb:
print(commit, htb[commit])
else:
print(commit)
empty_line = False
# next commit is parent of current commit
elif order[i + 1] in node.parents:
# if tip, print branch
if commit in htb:
print(commit, htb[commit])
else:
print(commit)
else:
# if tip, print branch
if commit in htb:
print(commit, htb[commit])
else:
print(commit)
# sticky end
for parent in node.parents:
print(parent + " ", end="")
print("=")
print("")
empty_line = True
commit = order[-1]
if commit in htb:
print(commit, htb[commit])
else:
print(commit)
if __name__ == "__main__":
topo_order_commits()