Skip to content

Commit 508d616

Browse files
authored
Merge pull request #337 from Southclaws/cache-ask-results
cache ask results in db
2 parents 0721fd0 + f69c1f8 commit 508d616

40 files changed

+4874
-113
lines changed

app/resources/datagraph/content.go

+9
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/Southclaws/fault"
1515
"github.com/cixtor/readability"
1616
"github.com/microcosm-cc/bluemonday"
17+
"github.com/russross/blackfriday/v2"
1718
"github.com/samber/lo"
1819
"go.uber.org/zap"
1920
"golang.org/x/net/html"
@@ -146,6 +147,14 @@ func NewRichTextWithOptions(raw string, opts ...option) (Content, error) {
146147
return NewRichTextFromReader(strings.NewReader(raw), opts...)
147148
}
148149

150+
func NewRichTextFromMarkdown(md string) (Content, error) {
151+
html := blackfriday.Run([]byte(md), blackfriday.WithExtensions(
152+
blackfriday.NoEmptyLineBeforeBlock,
153+
))
154+
155+
return NewRichTextFromReader(strings.NewReader(string(html)))
156+
}
157+
149158
func NewRichTextFromReader(r io.Reader, opts ...option) (Content, error) {
150159
o := options{baseURL: "ignore:"}
151160
for _, opt := range opts {

app/resources/datagraph/content_test.go

+32
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,38 @@ func TestNewRichText(t *testing.T) {
126126
})
127127
}
128128

129+
func TestNewRichTextFromMarkdown(t *testing.T) {
130+
t.Run("simple", func(t *testing.T) {
131+
fmd, err := NewRichTextFromMarkdown(`To start with data science, it is essential to begin with a practical and step-by-step approach. First, explore platforms like Kaggle, which provide datasets that are accessible for beginners. While Kaggle may appear daunting at first, you can choose beginner-friendly tutorials and datasets that interest you. Begin by downloading and inspecting these datasets to get familiar with their structure and content. Concurrently, work on crafting questions from the data to guide your exploration—this helps in developing a problem-solving mindset.
132+
133+
Consistency in practicing with data, asking for advice, and seeking support, such as shared links or files, are also key steps. Keep in mind that experience and understanding grow steadily through practice rather than seeking perfection right away.
134+
135+
References:
136+
- sdr:thread/cto7n8ifunp55p1bujv0: Emphasized the importance of staying practical and using beginner tutorials and platforms like Kaggle.
137+
- sdr:thread/cto7nm2funp55p1bujvg: Provided advice on starting with data, forming questions, and the value of consistent practice.
138+
`)
139+
140+
check(t, Content{
141+
short: `To start with data science, it is essential to begin with a practical and step-by-step approach. First, explore platforms like...`,
142+
links: []string{},
143+
media: []string{},
144+
})(fmd, err)
145+
146+
rendered := fmd.HTML()
147+
assert.Equal(t, `<body><p>To start with data science, it is essential to begin with a practical and step-by-step approach. First, explore platforms like Kaggle, which provide datasets that are accessible for beginners. While Kaggle may appear daunting at first, you can choose beginner-friendly tutorials and datasets that interest you. Begin by downloading and inspecting these datasets to get familiar with their structure and content. Concurrently, work on crafting questions from the data to guide your exploration—this helps in developing a problem-solving mindset.</p>
148+
149+
<p>Consistency in practicing with data, asking for advice, and seeking support, such as shared links or files, are also key steps. Keep in mind that experience and understanding grow steadily through practice rather than seeking perfection right away.</p>
150+
151+
<p>References:</p>
152+
153+
<ul>
154+
<li>sdr:thread/cto7n8ifunp55p1bujv0: Emphasized the importance of staying practical and using beginner tutorials and platforms like Kaggle.</li>
155+
<li>sdr:thread/cto7nm2funp55p1bujvg: Provided advice on starting with data, forming questions, and the value of consistent practice.</li>
156+
</ul>
157+
</body>`, rendered)
158+
})
159+
}
160+
129161
func TestSplit(t *testing.T) {
130162
r := require.New(t)
131163
a := assert.New(t)

app/resources/question/question.go

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package question
2+
3+
import (
4+
"github.com/Southclaws/storyden/app/resources/account"
5+
"github.com/Southclaws/storyden/app/resources/datagraph"
6+
"github.com/Southclaws/storyden/internal/ent"
7+
"github.com/rs/xid"
8+
)
9+
10+
type Question struct {
11+
ID xid.ID
12+
Slug string
13+
Query string
14+
Result datagraph.Content
15+
Author account.Account
16+
}
17+
18+
func Map(in *ent.Question) (*Question, error) {
19+
authorEdge, err := in.Edges.AuthorOrErr()
20+
if err != nil {
21+
return nil, err
22+
}
23+
24+
result, err := datagraph.NewRichText(in.Result)
25+
if err != nil {
26+
return nil, err
27+
}
28+
29+
author, err := account.MapAccount(authorEdge)
30+
if err != nil {
31+
return nil, err
32+
}
33+
34+
return &Question{
35+
ID: in.ID,
36+
Slug: in.Slug,
37+
Query: in.Query,
38+
Result: result,
39+
Author: *author,
40+
}, nil
41+
}

app/resources/question/repo.go

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package question
2+
3+
import (
4+
"context"
5+
6+
"github.com/Southclaws/fault"
7+
"github.com/Southclaws/fault/fctx"
8+
"github.com/Southclaws/storyden/app/resources/account"
9+
"github.com/Southclaws/storyden/app/resources/datagraph"
10+
"github.com/Southclaws/storyden/internal/ent"
11+
"github.com/Southclaws/storyden/internal/ent/question"
12+
"github.com/gosimple/slug"
13+
"github.com/rs/xid"
14+
)
15+
16+
type Repository struct {
17+
db *ent.Client
18+
}
19+
20+
func New(db *ent.Client) *Repository {
21+
return &Repository{db: db}
22+
}
23+
24+
func (r *Repository) Store(ctx context.Context, accountID account.AccountID, query string, result datagraph.Content) (*Question, error) {
25+
create := r.db.Question.Create()
26+
mutate := create.Mutation()
27+
28+
slug := slug.Make(query)
29+
30+
mutate.SetSlug(slug)
31+
mutate.SetQuery(query)
32+
mutate.SetResult(result.HTML())
33+
mutate.SetAccountID(xid.ID(accountID))
34+
35+
create.OnConflictColumns("slug").UpdateNewValues()
36+
37+
res, err := create.Save(ctx)
38+
if err != nil {
39+
return nil, fault.Wrap(err, fctx.With(ctx))
40+
}
41+
42+
q, err := r.db.Question.Query().
43+
Where(question.ID(res.ID)).
44+
WithAuthor(func(aq *ent.AccountQuery) {
45+
aq.WithAccountRoles(func(arq *ent.AccountRolesQuery) {
46+
arq.WithRole()
47+
})
48+
}).
49+
Only(ctx)
50+
if err != nil {
51+
return nil, fault.Wrap(err, fctx.With(ctx))
52+
}
53+
54+
return Map(q)
55+
}
56+
57+
func (r *Repository) Get(ctx context.Context, id xid.ID) (*Question, error) {
58+
q, err := r.db.Question.Query().
59+
Where(question.ID(id)).
60+
WithAuthor(func(aq *ent.AccountQuery) {
61+
aq.WithAccountRoles(func(arq *ent.AccountRolesQuery) {
62+
arq.WithRole()
63+
})
64+
}).
65+
Only(ctx)
66+
if err != nil {
67+
return nil, err
68+
}
69+
70+
return Map(q)
71+
}
72+
73+
func (r *Repository) GetByQuerySlug(ctx context.Context, query string) (*Question, error) {
74+
slug := slug.Make(query)
75+
76+
q, err := r.db.Question.Query().
77+
Where(question.Slug(slug)).
78+
WithAuthor(func(aq *ent.AccountQuery) {
79+
aq.WithAccountRoles(func(arq *ent.AccountRolesQuery) {
80+
arq.WithRole()
81+
})
82+
}).
83+
Only(ctx)
84+
if err != nil {
85+
return nil, err
86+
}
87+
88+
return Map(q)
89+
}

app/resources/resources.go

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
"github.com/Southclaws/storyden/app/resources/profile/follow_querier"
4444
"github.com/Southclaws/storyden/app/resources/profile/follow_writer"
4545
"github.com/Southclaws/storyden/app/resources/profile/profile_search"
46+
"github.com/Southclaws/storyden/app/resources/question"
4647
"github.com/Southclaws/storyden/app/resources/settings"
4748
"github.com/Southclaws/storyden/app/resources/tag/tag_querier"
4849
"github.com/Southclaws/storyden/app/resources/tag/tag_writer"
@@ -94,6 +95,7 @@ func Build() fx.Option {
9495
participant_querier.New,
9596
participant_writer.New,
9697
hydrate.New,
98+
question.New,
9799
),
98100
)
99101
}

app/services/semdex/asker/asker.go

+22-2
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,36 @@ import (
88
"github.com/Southclaws/fault"
99
"github.com/Southclaws/fault/fctx"
1010
"github.com/Southclaws/fault/ftag"
11+
"go.uber.org/zap"
1112

1213
"github.com/Southclaws/storyden/app/resources/pagination"
14+
"github.com/Southclaws/storyden/app/resources/question"
1315
"github.com/Southclaws/storyden/app/services/search/searcher"
1416
"github.com/Southclaws/storyden/app/services/semdex"
1517
"github.com/Southclaws/storyden/internal/config"
1618
"github.com/Southclaws/storyden/internal/infrastructure/ai"
1719
)
1820

19-
func New(cfg config.Config, searcher semdex.Searcher, prompter ai.Prompter) (semdex.Asker, error) {
21+
func New(
22+
cfg config.Config,
23+
logger *zap.Logger,
24+
searcher semdex.Searcher,
25+
prompter ai.Prompter,
26+
questions *question.Repository,
27+
) (semdex.Asker, error) {
28+
asker, err := newAsker(cfg, searcher, prompter)
29+
if err != nil {
30+
return nil, err
31+
}
32+
33+
return newCachedAsker(
34+
logger,
35+
asker,
36+
questions,
37+
)
38+
}
39+
40+
func newAsker(cfg config.Config, searcher semdex.Searcher, prompter ai.Prompter) (semdex.Asker, error) {
2041
if cfg.SemdexProvider != "" && cfg.LanguageModelProvider == "" {
2142
return nil, fault.New("semdex requires a language model provider to be enabled")
2243
}
@@ -32,7 +53,6 @@ func New(cfg config.Config, searcher semdex.Searcher, prompter ai.Prompter) (sem
3253
return newPerplexityAsker(cfg, searcher)
3354

3455
default:
35-
3656
return &defaultAsker{
3757
searcher: searcher,
3858
prompter: prompter,

app/services/semdex/asker/cached.go

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
package asker
2+
3+
import (
4+
"context"
5+
"strings"
6+
"time"
7+
8+
htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2"
9+
"github.com/Southclaws/fault"
10+
"github.com/Southclaws/fault/fctx"
11+
"go.uber.org/zap"
12+
13+
"github.com/Southclaws/storyden/app/resources/account"
14+
"github.com/Southclaws/storyden/app/resources/datagraph"
15+
"github.com/Southclaws/storyden/app/resources/question"
16+
"github.com/Southclaws/storyden/app/services/authentication/session"
17+
"github.com/Southclaws/storyden/app/services/semdex"
18+
)
19+
20+
type cachedAsker struct {
21+
logger *zap.Logger
22+
asker semdex.Asker
23+
questions *question.Repository
24+
}
25+
26+
func newCachedAsker(
27+
logger *zap.Logger,
28+
asker semdex.Asker,
29+
questions *question.Repository,
30+
) (semdex.Asker, error) {
31+
return &cachedAsker{
32+
logger: logger,
33+
asker: asker,
34+
questions: questions,
35+
}, nil
36+
}
37+
38+
func (a *cachedAsker) Ask(ctx context.Context, q string) (func(yield func(string, error) bool), error) {
39+
cached, err := a.questions.GetByQuerySlug(ctx, q)
40+
if err == nil {
41+
return a.cachedResult(ctx, cached)
42+
}
43+
44+
return a.livePrompt(ctx, q)
45+
}
46+
47+
func (a *cachedAsker) cachedResult(ctx context.Context, q *question.Question) (func(yield func(string, error) bool), error) {
48+
md, err := htmltomarkdown.ConvertNode(q.Result.HTMLTree())
49+
if err != nil {
50+
return nil, fault.Wrap(err, fctx.With(ctx))
51+
}
52+
53+
chunks := strings.SplitAfter(string(md), " ")
54+
55+
return func(yield func(string, error) bool) {
56+
for _, ch := range chunks {
57+
select {
58+
case <-ctx.Done():
59+
return
60+
61+
default:
62+
if !yield(ch, nil) {
63+
return
64+
}
65+
}
66+
time.Sleep(time.Millisecond * 10)
67+
}
68+
}, nil
69+
}
70+
71+
func (a *cachedAsker) livePrompt(ctx context.Context, q string) (func(yield func(string, error) bool), error) {
72+
accountID, err := session.GetAccountID(ctx)
73+
if err != nil {
74+
return nil, fault.Wrap(err, fctx.With(ctx))
75+
}
76+
77+
iter, err := a.asker.Ask(ctx, q)
78+
if err != nil {
79+
return nil, fault.Wrap(err, fctx.With(ctx))
80+
}
81+
82+
return func(yield func(string, error) bool) {
83+
acc := []string{}
84+
85+
defer func() {
86+
err := a.cacheResult(ctx, accountID, q, acc)
87+
if err != nil {
88+
a.logger.Error("failed to cache result", zap.Error(err))
89+
}
90+
}()
91+
92+
for chunk, err := range iter {
93+
if err != nil {
94+
yield("", err)
95+
return
96+
}
97+
98+
acc = append(acc, chunk)
99+
if !yield(chunk, nil) {
100+
return
101+
}
102+
}
103+
}, nil
104+
}
105+
106+
func (a *cachedAsker) cacheResult(ctx context.Context, accountID account.AccountID, q string, chunks []string) error {
107+
result := strings.Join(chunks, "")
108+
109+
acc, err := datagraph.NewRichTextFromMarkdown(result)
110+
if err != nil {
111+
return fault.Wrap(err, fctx.With(ctx))
112+
}
113+
114+
_, err = a.questions.Store(ctx, accountID, q, acc)
115+
if err != nil {
116+
return fault.Wrap(err, fctx.With(ctx))
117+
}
118+
119+
return nil
120+
}

app/services/semdex/asker/default.go

+7-6
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@ type defaultAsker struct {
1616
prompter ai.Prompter
1717
}
1818

19-
func (a *defaultAsker) Ask(ctx context.Context, q string) (chan string, chan error) {
19+
func (a *defaultAsker) Ask(ctx context.Context, q string) (func(yield func(string, error) bool), error) {
2020
t, err := buildContextPrompt(ctx, a.searcher, q)
2121
if err != nil {
22-
ech := make(chan error, 1)
23-
ech <- fault.Wrap(err, fctx.With(ctx))
24-
return nil, ech
22+
return nil, fault.Wrap(err, fctx.With(ctx))
2523
}
2624

27-
chch, ech := a.prompter.PromptStream(ctx, t)
25+
iter, err := a.prompter.PromptStream(ctx, t)
26+
if err != nil {
27+
return nil, fault.Wrap(err, fctx.With(ctx))
28+
}
2829

29-
return chch, ech
30+
return iter, nil
3031
}

0 commit comments

Comments
 (0)