-
Notifications
You must be signed in to change notification settings - Fork 4
/
helmchartsduplicatefinder.py
70 lines (61 loc) · 1.99 KB
/
helmchartsduplicatefinder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import tarfile
import re
import operator
import sys
import os
def dupefinder_template(s, threshold):
values = {}
#print(">>>", s)
p = re.compile("\{\{.*\}\}")
s = re.sub(p, "TEMPLATE", s)
#print(">>>", s)
for line in s.split("\n"):
lineparts = line.strip().split(":")
k, *v = lineparts
v = ":".join(v).strip()
#print(k, v)
values[v] = values.setdefault(v, 0) + 1
return values
def dupefinder(chartfile, threshold, verbose=False):
blacklist = ("TEMPLATE", "\"TEMPLATE\"", "'TEMPLATE'", "", "v1", "extensions/v1beta1")
tgz = False
if chartfile.endswith(".tgz"):
tgz = True
values = {}
if tgz:
tar = tarfile.open(chartfile)
else:
tar = [x for x in [[os.path.join(root, f) for f in filenames] for root, directories, filenames in os.walk(chartfile)] if x]
tar = [x for y in tar for x in y]
for entry in tar:
if tgz:
entryname = entry.name
else:
entryname = entry
if "/templates/" in entryname and entryname.endswith(".yaml") and not "/charts/" in entryname:
if verbose:
print("parse", entryname)
if tgz:
template = tar.extractfile(entryname)
valuestemplate = dupefinder_template(template.read().decode("utf-8"), threshold)
else:
valuestemplate = dupefinder_template(open(entryname).read(), threshold)
#values.update(valuestemplate)
for v, count in valuestemplate.items():
values[v] = values.setdefault(v, 0) + count
else:
if verbose:
print("skip", entryname)
hitlist = sorted(values.items(), key=operator.itemgetter(1, 0), reverse=True)
significantlist = [x for x in hitlist if x[1] >= threshold and x[0] not in blacklist]
return significantlist
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Syntax: {} <helmchartfile>".format(sys.argv[0]), file=sys.stderr)
sys.exit(1)
dupeslist = dupefinder(sys.argv[1], 3, verbose=True)
print("-----------------------------------")
print("Duplicate values without templates:")
print("-----------------------------------")
for v, count in dupeslist:
print("{:2d} x {}".format(count, v))