generated from fun-stack/example
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpostprocessing.go
122 lines (95 loc) · 2.72 KB
/
postprocessing.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package main
import (
"bytes"
"context"
"database/sql"
"fmt"
"io"
"strings"
"time"
"github.com/pkg/errors"
"golang.org/x/exp/slog"
)
const (
qnRankFormulaSQL = "pow(ageHours * (cumulativeUpvotes + overallPriorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + overallPriorWeight), 0.8) / pow(ageHours + 2, gravity/0.8) desc"
// qnRankFormulaSQL = `
// pow(
// ageHours *
// sample_from_gamma_distribution(
// cumulativeUpvotes + overallPriorWeight,
// (
// 1-exp(-fatigueFactor*cumulativeExpectedUpvotes)
// ) / fatigueFactor + overallPriorWeight
// )
// , 0.8
// ) / pow(
// ageHours + 2
// , gravity/0.8
// ) desc`
hnRankFormulaSQL = "(score-1) / pow(ageHours + 2, gravity/0.8) desc"
)
func (app app) crawlPostprocess(ctx context.Context, tx *sql.Tx) error {
t := time.Now()
defer crawlPostprocessingDuration.UpdateDuration(t)
var err error
// for _, filename := range []string{"previous-crawl.sql", "resubmissions.sql", "raw-ranks.sql", "upvote-rates.sql"} {
for _, filename := range []string{
"previous-crawl.sql",
"resubmissions.sql",
"raw-ranks.sql",
"penalties.sql",
} {
err = executeSQLFile(ctx, tx, filename)
if err != nil {
return err
}
}
err = app.updateQNRanks(ctx, tx)
if err != nil {
return errors.Wrap(err, "updateQNRanks")
}
app.logger.Info("Finished crawl postprocessing", slog.Duration("elapsed", time.Since(t)))
return err
}
var qnRanksSQL = readSQLSource("qnranks.sql")
func (app app) updateQNRanks(ctx context.Context, tx *sql.Tx) error {
t := time.Now()
d := defaultFrontPageParams
sql := fmt.Sprintf(qnRanksSQL, d.PriorWeight, d.OverallPriorWeight, d.Gravity, d.PenaltyWeight, d.FatigueFactor, qnRankFormulaSQL)
stmt, err := tx.Prepare(sql)
if err != nil {
return errors.Wrap(err, "preparing updateQNRanksSQL")
}
_, err = stmt.ExecContext(ctx)
app.logger.Debug("Finished executing updateQNRanks", slog.Duration("elapsed", time.Since(t)))
return errors.Wrap(err, "executing updateQNRanksSQL")
}
func readSQLSource(filename string) string {
f, err := resources.Open("sql/" + filename)
if err != nil {
panic(err)
}
defer f.Close()
buf := bytes.NewBuffer(nil)
_, err = io.Copy(buf, f)
if err != nil {
panic(err)
}
return buf.String()
}
func executeSQLFile(ctx context.Context, tx *sql.Tx, filename string) error {
sql := readSQLSource(filename)
sql = strings.Trim(sql, " \n\r;")
parts := strings.Split(sql, ";\n")
for _, sql := range parts {
stmt, err := tx.Prepare(sql)
if err != nil {
return errors.Wrapf(err, "preparing SQL in file %s", filename)
}
_, err = stmt.ExecContext(ctx)
if err != nil {
return errors.Wrapf(err, "executing SQL in file %s", filename)
}
}
return nil
}