-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathingest.yaml
83 lines (83 loc) · 2.07 KB
/
ingest.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
input:
file:
paths: ["./amazon.csv"]
scanner:
csv: {}
pipeline:
processors:
- branch:
processors:
- ollama_embeddings:
model: mxbai-embed-large
text: |
User ${!this.user_name} bought a product called ${!this.product_name},
which has the category ${!this.category} for ${!this.discounted_price}.
The product description is ${!this.about_product}
They gave a review: ${!this.review_content}
result_map: |
root.text_embedding = this
output:
sql_insert:
driver: "clickhouse"
dsn: clickhouse://localhost:9000
init_statement: |
CREATE TABLE IF NOT EXISTS sales (
txn_id TEXT,
product_id TEXT,
product_name TEXT,
category TEXT,
discounted_price TEXT,
actual_price TEXT,
dicount_percentage TEXT,
rating TEXT,
rating_count TEXT,
about_product TEXT,
user_id TEXT,
user_name TEXT,
review_title TEXT,
review_content TEXT,
img_link TEXT,
product_link TEXT,
text_embedding Array(Float32)
) ENGINE = MergeTree()
ORDER BY txn_id
PRIMARY KEY txn_id
table: sales
columns:
- txn_id
- product_id
- product_name
- category
- discounted_price
- actual_price
- dicount_percentage
- rating
- rating_count
- about_product
- user_id
- user_name
- review_title
- review_content
- img_link
- product_link
- text_embedding
args_mapping: |
root = [
this.review_id,
this.product_id,
this.product_name,
this.category,
this.discounted_price,
this.actual_price,
this.dicount_percentage,
this.rating,
this.rating_count,
this.about_product,
this.user_id,
this.user_name,
this.review_title,
this.review_content,
this.img_link,
this.product_link,
this.text_embedding,
]