-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'opt-curation' into opt
- Loading branch information
Showing
126 changed files
with
656 additions
and
835 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,4 +25,5 @@ sf*/ | |
sf*.tar | ||
sf*.tar.gz | ||
|
||
paramgen/__pycache__/ | ||
tools/paramgen/__pycache__/ |
122 changes: 64 additions & 58 deletions
122
tools/paramgen/parameter_curation.py → paramgen/parameter_curation.py
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
import math | ||
from operator import itemgetter | ||
|
||
|
||
def allclose(a, b, rtol=1e-05, atol=1e-08): | ||
return abs(a - b) <= (atol + rtol * abs(b)) | ||
|
||
|
||
def readFactors(f): | ||
res = [] | ||
for line in f.readlines(): | ||
values = [item if index == 0 else int(item) for (index, item) in enumerate(line.split(","))] | ||
res.append(values) | ||
|
||
return res | ||
|
||
|
||
class Window: | ||
def __init__(self, paramId, start, end): | ||
self.param = paramId | ||
self.start = start | ||
self.end = end | ||
self.avg = 0.0 | ||
self.stddev = 0.0 | ||
self.size = end - start + 1 | ||
|
||
def __str__(self): | ||
res = "[%d, %d] " % (self.start, self.end) | ||
res += "size: %d, avg: %0.2f, stddev: %0.2f" % (self.size, self.avg, self.stddev) | ||
return res | ||
|
||
|
||
def getAverageCost(rows, key): | ||
return float(sum([key(r) for r in rows])) / len(rows) | ||
|
||
|
||
def getCostStdDev(rows, avg, key): | ||
return math.sqrt(sum([math.pow(key(r) - avg, 2) for r in rows]) / len(rows)) | ||
|
||
|
||
def updateAverageCost(avg, oldelem, newelem, samplesize): | ||
return avg + (newelem - oldelem) / samplesize | ||
|
||
|
||
def findWindows(factors, param, amount, bounds): | ||
data = factors[bounds[0]: bounds[1]] | ||
allWindows = [] | ||
start = 0 | ||
|
||
initWindow = Window(param, start, amount - 1) | ||
initWindow.avg = getAverageCost(data[start:amount], itemgetter(param)) | ||
initWindow.stddev = getCostStdDev(data[start:amount], initWindow.avg, itemgetter(param)) | ||
|
||
s1 = sum([x[param] for x in data[start:start + amount]]) | ||
s2 = sum([x[param] * x[param] for x in data[start:start + amount]]) | ||
start += 1 | ||
allWindows.append(initWindow) | ||
|
||
while start + amount < len(data): | ||
end = start + amount | ||
if data[end - 1][param] < 10: | ||
break | ||
|
||
window = Window(param, bounds[0] + start, bounds[0] + end - 1) | ||
|
||
# update the streaming stats about avg and stddev | ||
s1 -= data[start - 1][param] | ||
s1 += data[end - 1][param] | ||
s2 -= data[start - 1][param] * data[start - 1][param] | ||
s2 += data[end - 1][param] * data[end - 1][param] | ||
|
||
window.avg = float(s1) / amount | ||
window.stddev = math.sqrt(float(amount * s2 - s1 * s1)) / amount | ||
|
||
allWindows.append(window) | ||
start += 1 | ||
|
||
allWindows.sort(key=lambda windows: windows.stddev) | ||
|
||
res = [] | ||
first = allWindows[0] | ||
iter = 0 | ||
|
||
while iter < len(allWindows) and allWindows[iter].stddev == first.stddev: | ||
res.append(allWindows[iter]) | ||
iter += 1 | ||
|
||
return res | ||
|
||
|
||
def mergeWindows(windows): | ||
res = [] | ||
|
||
cur = windows[0] | ||
|
||
iter = 1 | ||
constucted = cur | ||
|
||
while iter < len(windows): | ||
while iter < len(windows) and windows[iter].start == cur.start + 1 and allclose(windows[iter].avg, cur.avg): | ||
cur = windows[iter] | ||
constucted.end = cur.end | ||
constucted.size += 1 | ||
iter += 1 | ||
|
||
res.append(constucted) | ||
if iter >= len(windows): | ||
break | ||
|
||
constucted = windows[iter] | ||
cur = windows[iter] | ||
iter += 1 | ||
|
||
return res | ||
|
||
|
||
def generate(factors, portion): | ||
amount = int(len(factors) * portion) | ||
params = len(factors[0]) - 1 | ||
|
||
keys = [i for i in range(1, params + 1)] | ||
|
||
factors = sorted(factors, key=itemgetter(*keys), reverse=True) | ||
result = [] | ||
paramId = 1 | ||
|
||
current_windows = findWindows(factors, paramId, amount, (0, len(factors))) | ||
|
||
while len(current_windows) > 1 and paramId < params: | ||
paramId += 1 | ||
current_windows = mergeWindows(current_windows) | ||
|
||
new_windows = [] | ||
for w in current_windows: | ||
w2 = findWindows(factors, paramId, amount, (w.start, w.end + 1)) | ||
new_windows.extend(w2) | ||
|
||
new_windows.sort(key=lambda w: w.stddev) | ||
|
||
current_windows = [] | ||
first = new_windows[0] | ||
iter = 0 | ||
|
||
while iter < len(new_windows) and new_windows[iter].stddev == first.stddev: | ||
current_windows.append(new_windows[iter]) | ||
iter += 1 | ||
|
||
w = current_windows[0] | ||
|
||
result.extend([factors[w.start + i][0] for i in range(amount)]) | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/bash | ||
|
||
LDBC_FINBENCH_DATAGEN_JAR=target/ldbc_finbench_datagen-0.2.0-SNAPSHOT-jar-with-dependencies.jar | ||
OUTPUT_DIR=out/ | ||
|
||
# Note: generate factor tables with --generate-factors | ||
|
||
echo "start factor table generation" | ||
|
||
time spark-submit --master local[*] \ | ||
--class ldbc.finbench.datagen.LdbcDatagen \ | ||
--driver-memory 480g \ | ||
${LDBC_FINBENCH_DATAGEN_JAR} \ | ||
--output-dir ${OUTPUT_DIR} \ | ||
--factor-format csv \ | ||
--generate-factors | ||
|
||
echo "start parameter curation" |
Oops, something went wrong.