-
Notifications
You must be signed in to change notification settings - Fork 4
/
stats.py
98 lines (83 loc) · 2.69 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
import json
import operator
import glob
import os
prefix = "serverless"
basefiles = glob.glob("*-literature-base.json")
if len(basefiles) == 1:
prefix = os.path.basename(basefiles[0]).split("-")[0]
analysis_filename = "{}-literature-analysis.json".format(prefix)
biblio_filename = "{}-literature-bibliography.json".format(prefix)
tech_filename = "{}-literature-technologies.json".format(prefix)
f = open(biblio_filename)
biblio = json.load(f)
f = open(analysis_filename)
analysis = json.load(f)
f = open(tech_filename)
tech = json.load(f)
years = {}
for ident in biblio:
y = biblio[ident]["year"]
years[y] = years.get(y, 0) + 1
countries = {}
technologies = {}
dtechnologies = {}
for ident in analysis:
cs = analysis[ident]["countries"]
for c in cs:
countries[c] = countries.get(c, 0) + 1
ts = analysis[ident]["technologies"]
for t in ts:
technologies[t] = technologies.get(t, 0) + 1
if not analysis[ident]["independent"]:
dtechnologies[t] = dtechnologies.get(t, 0) + 1
academic = 0
industry = 0
both = 0
for ident in analysis:
a = analysis[ident]["academic"]
i = analysis[ident]["industry"]
if a and i:
both += 1
elif a:
academic += 1
elif i:
industry += 1
r_academic = academic / len(analysis)
r_industry = industry / len(analysis)
r_both = both / len(analysis)
instmult = []
instuniq = set()
for ident in analysis:
insts = analysis[ident]["institutions"]
for inst in insts:
instmult.append(inst)
instuniq.add(inst)
def searchkeys(dl, keys_list):
if isinstance(dl, dict):
keys_list += dl.keys()
list(map(lambda x: searchkeys(x, keys_list), list(dl.values())))
elif isinstance(dl, list):
list(map(lambda x: searchkeys(x, keys_list), dl))
numkeys = 0
jsonfiles = glob.glob("*.json")
for jsonfile in jsonfiles:
f = open(jsonfile)
entries = json.load(f)
keys = []
searchkeys(entries, keys)
numkeys += len(keys)
allsorted = lambda x, *y: sorted(x.items(), key=operator.itemgetter(*y), reverse=True)
f = open("stats.txt", "w")
print("Years:", allsorted(years, 0), file=f)
print("Number of countries:", len(countries), file=f)
print("Countries:", allsorted(countries, 1, 0), file=f)
print("Ratio academic to industry to both:", academic, ":", industry, ":", both, "=", r_academic, r_industry, r_both, file=f)
print("Number of institutions:", len(instmult) / len(analysis), "per paper;", len(instuniq), "involved in total", file=f)
print("Number of technologies:", len(tech), "; open source:", len([x for x in tech if tech[x]["open-source"]]), file=f)
print("Technologies:", allsorted(technologies, 1, 0), file=f)
print("Technology by authors:", allsorted(dtechnologies, 1, 0), file=f)
print("JSON keys: {}".format(numkeys), file=f)
f.close()
print("Written stats to stats.txt.")