Skip to content

Commit 07c1078

Browse files
Fix #28 prompts.duckdb is not picked up unless I pass --duckdb (#31)
1 parent d5b2dc3 commit 07c1078

File tree

4 files changed

+143
-161
lines changed

4 files changed

+143
-161
lines changed

.github/FUNDING.yml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
github: [Florents-Tselai]

README.md

+23-71
Original file line numberDiff line numberDiff line change
@@ -8,29 +8,20 @@
88
[![codecov](https://codecov.io/gh/Florents-Tselai/tsellm/branch/main/graph/badge.svg)](https://codecov.io/gh/Florents-Tselai/tsellm)
99
[![License](https://img.shields.io/badge/BSD%20license-blue.svg)](https://github.com/Florents-Tselai/tsellm/blob/main/LICENSE)
1010

11-
**tsellm** is the easiest way to access LLMs through your SQLite or DuckDB database.
11+
**tsellm** is the easiest way to access LLMs from SQLite or DuckDB.
1212

1313
```shell
1414
pip install tsellm
1515
```
1616

17-
```shell
18-
usage: tsellm [-h] [--sqlite | --duckdb] [-v] [filename] [sql]
19-
20-
Use LLMs in SQLite and DuckDB
21-
22-
positional arguments:
23-
filename SQLite/DuckDB database to open (defaults to SQLite ':memory:').
24-
A new database is created if the file does not previously exist.
25-
sql An SQL query to execute. Any returned rows are printed to
26-
stdout.
27-
28-
options:
29-
-h, --help show this help message and exit
30-
--sqlite SQLite mode
31-
--duckdb DuckDB mode
32-
-v, --version Print tsellm version
33-
17+
```bash
18+
cat <<EOF | tee >(sqlite3 prompts.sqlite3) | duckdb prompts.duckdb
19+
CREATE TABLE prompts ( p TEXT);
20+
INSERT INTO prompts VALUES('hello world!');
21+
INSERT INTO prompts VALUES('how are you?');
22+
INSERT INTO prompts VALUES('is this real life?');
23+
INSERT INTO prompts VALUES('1+1=?');
24+
EOF
3425
```
3526

3627
Behind the scenes, **tsellm** is based on the beautiful [llm](https://llm.datasette.io) library,
@@ -42,95 +33,56 @@ For example, to access `gpt4all` models
4233

4334
```shell
4435
llm install llm-gpt4all
45-
# Then pick any gpt4all (it will be downloaded automatically the first time you use any model
46-
tsellm :memory: "select prompt('What is the capital of Greece?', 'orca-mini-3b-gguf2-q4_0')"
47-
tsellm :memory: "select prompt('What is the capital of Greece?', 'orca-2-7b')"
36+
```
37+
38+
```sql
39+
tsellm prompts.duckdb "select prompt(p, 'orca-mini-3b-gguf2-q4_0') from prompts"
40+
tsellm prompts.sqlite3 "select prompt(p, 'orca-2-7b') from prompts"
4841
```
4942

5043
## Embeddings
5144

5245
```shell
5346
llm install llm-sentence-transformers
5447
llm sentence-transformers register all-MiniLM-L12-v2
55-
tsellm :memory: "select embed('Hello', 'sentence-transformers/all-MiniLM-L12-v2')"
48+
```
49+
50+
```sql
51+
tsellm prompts.sqlite3 "select embed(p, 'sentence-transformers/all-MiniLM-L12-v2')"
5652
```
5753

5854
### Embeddings for binary (`BLOB`) columns
5955

6056
```shell
6157
wget https://tselai.com/img/flo.jpg
62-
sqlite3 images.db <<EOF
58+
sqlite3 images.sqlite3 <<EOF
6359
CREATE TABLE images(name TEXT, type TEXT, img BLOB);
6460
INSERT INTO images(name,type,img) VALUES('flo','jpg',readfile('flo.jpg'));
6561
EOF
6662
```
6763

6864
```shell
6965
llm install llm-clip
70-
tsellm images.db "select embed(img, 'clip') from images"
71-
```
72-
73-
## Examples
74-
75-
Things get more interesting if you
76-
combine models in your standard queries.
77-
78-
First, create a db with some data.
79-
You can easily toggle between SQLite and DuckDB,
80-
and **tsellm** will pick this up automatically.
81-
82-
### SQLite
83-
```bash
84-
sqlite3 prompts.db <<EOF
85-
CREATE TABLE prompts (
86-
p TEXT
87-
);
88-
INSERT INTO prompts VALUES('hello world!');
89-
INSERT INTO prompts VALUES('how are you?');
90-
INSERT INTO prompts VALUES('is this real life?');
91-
INSERT INTO prompts VALUES('1+1=?');
92-
EOF
9366
```
9467

95-
With a single query you can access get prompt
96-
responses from different LLMs:
97-
9868
```sql
99-
tsellm prompts.db "
100-
select p,
101-
prompt(p, 'orca-2-7b'),
102-
prompt(p, 'orca-mini-3b-gguf2-q4_0'),
103-
embed(p, 'sentence-transformers/all-MiniLM-L12-v2')
104-
from prompts"
69+
tsellm images.sqlite3 "select embed(img, 'clip') from images"
10570
```
10671

107-
### DuckDB
72+
### Multiple Prompts
10873

109-
```bash
110-
duckdb prompts.duckdb <<EOF
111-
CREATE TABLE prompts (
112-
p TEXT
113-
);
114-
INSERT INTO prompts VALUES('hello world!');
115-
INSERT INTO prompts VALUES('how are you?');
116-
INSERT INTO prompts VALUES('is this real life?');
117-
INSERT INTO prompts VALUES('1+1=?');
118-
EOF
119-
```
120-
121-
With a single query you can access get prompt
74+
With a single query you can easily access get prompt
12275
responses from different LLMs:
12376

12477
```sql
125-
tsellm prompts.duckdb "
78+
tsellm prompts.sqlite3 "
12679
select p,
12780
prompt(p, 'orca-2-7b'),
12881
prompt(p, 'orca-mini-3b-gguf2-q4_0'),
12982
embed(p, 'sentence-transformers/all-MiniLM-L12-v2')
13083
from prompts"
13184
```
13285

133-
13486
## Interactive Shell
13587

13688
If you don't provide an SQL query,

tests/test_tsellm.py

+43-37
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,7 @@
1010
from llm import cli as llm_cli
1111

1212
from tsellm.__version__ import __version__
13-
from tsellm.cli import (
14-
cli,
15-
TsellmConsole,
16-
SQLiteConsole,
17-
TsellmConsoleMixin,
18-
)
13+
from tsellm.cli import cli, TsellmConsole, SQLiteConsole, DuckDBConsole, DBSniffer
1914

2015

2116
def new_tempfile():
@@ -25,17 +20,33 @@ def new_tempfile():
2520
def new_sqlite_file():
2621
f = new_tempfile()
2722
with sqlite3.connect(f) as db:
28-
db.execute("SELECT 1")
23+
db.execute("CREATE TABLE my(x text)")
2924
return f
3025

3126

3227
def new_duckdb_file():
3328
f = new_tempfile()
3429
con = duckdb.connect(f.__str__())
35-
con.sql("SELECT 1")
30+
con.sql("CREATE TABLE my(x text)")
3631
return f
3732

3833

34+
class TestDBSniffer(unittest.TestCase):
35+
def setUp(self):
36+
self.sqlite_fp = new_sqlite_file()
37+
self.duckdb_fp = new_duckdb_file()
38+
39+
def test_sniff_sqlite(self):
40+
sqlite_sni = DBSniffer(self.sqlite_fp)
41+
self.assertTrue(sqlite_sni.is_sqlite)
42+
self.assertFalse(sqlite_sni.is_duckdb)
43+
44+
def test_snif_duckdb(self):
45+
duckdb_sni = DBSniffer(self.duckdb_fp)
46+
self.assertFalse(duckdb_sni.is_sqlite)
47+
self.assertTrue(duckdb_sni.is_duckdb)
48+
49+
3950
class TsellmConsoleTest(unittest.TestCase):
4051
def setUp(self):
4152
super().setUp()
@@ -69,23 +80,15 @@ def expect_failure(self, *args):
6980
self.assertEqual(out, "")
7081
return err
7182

72-
def test_sniff_sqlite(self):
73-
self.assertTrue(TsellmConsoleMixin().is_sqlite(new_sqlite_file()))
74-
75-
def test_sniff_duckdb(self):
76-
self.assertTrue(TsellmConsoleMixin().is_duckdb(new_duckdb_file()))
77-
7883
def test_console_factory_sqlite(self):
7984
s = new_sqlite_file()
80-
self.assertTrue(TsellmConsoleMixin().is_sqlite(s))
8185
obj = TsellmConsole.create_console(s)
8286
self.assertIsInstance(obj, SQLiteConsole)
8387

84-
# def test_console_factory_duckdb(self):
85-
# s = new_duckdb_file()
86-
# self.assertTrue(TsellmConsole.is_duckdb(s))
87-
# obj = TsellmConsole.create_console(s)
88-
# self.assertIsInstance(obj, DuckDBConsole)
88+
d = new_duckdb_file()
89+
self.assertTrue(TsellmConsole.create_console(d))
90+
obj = TsellmConsole.create_console(d)
91+
self.assertIsInstance(obj, DuckDBConsole)
8992

9093
def test_cli_help(self):
9194
out = self.expect_success("-h")
@@ -98,11 +101,6 @@ def test_cli_version(self):
98101
def test_choose_db(self):
99102
self.expect_failure("--sqlite", "--duckdb")
100103

101-
def test_deault_sqlite(self):
102-
f = new_tempfile()
103-
self.expect_success(str(f), "select 1")
104-
self.assertTrue(TsellmConsoleMixin().is_sqlite(f))
105-
106104
MEMORY_DB_MSG = "Connected to :memory:"
107105
PS1 = "tsellm> "
108106
PS2 = "... "
@@ -112,7 +110,7 @@ def run_cli(self, *args, commands=()):
112110
captured_stdin() as stdin,
113111
captured_stdout() as stdout,
114112
captured_stderr() as stderr,
115-
self.assertRaises(SystemExit) as cm
113+
self.assertRaises(SystemExit) as cm,
116114
):
117115
for cmd in commands:
118116
stdin.write(cmd + "\n")
@@ -121,8 +119,9 @@ def run_cli(self, *args, commands=()):
121119

122120
out = stdout.getvalue()
123121
err = stderr.getvalue()
124-
self.assertEqual(cm.exception.code, 0,
125-
f"Unexpected failure: {args=}\n{out}\n{err}")
122+
self.assertEqual(
123+
cm.exception.code, 0, f"Unexpected failure: {args=}\n{out}\n{err}"
124+
)
126125
return out, err
127126

128127
def test_interact(self):
@@ -197,13 +196,6 @@ def test_cli_execute_incomplete_sql(self):
197196
stderr = self.expect_failure(*self.path_args, "sel")
198197
self.assertIn("OperationalError (SQLITE_ERROR)", stderr)
199198

200-
def test_cli_on_disk_db(self):
201-
self.addCleanup(unlink, TESTFN)
202-
out = self.expect_success(TESTFN, "create table t(t)")
203-
self.assertEqual(out, "")
204-
out = self.expect_success(TESTFN, "select count(t) from t")
205-
self.assertIn("(0,)", out)
206-
207199
def assertMarkovResult(self, prompt, generated):
208200
# Every word should be one of the original prompt (see https://github.com/simonw/llm-markov/blob/657ca504bcf9f0bfc1c6ee5fe838cde9a8976381/tests/test_llm_markov.py#L20)
209201
for w in prompt.split(" "):
@@ -256,7 +248,7 @@ class DiskSQLiteTest(InMemorySQLiteTest):
256248

257249
def setUp(self):
258250
super().setUp()
259-
self.db_fp = str(new_tempfile())
251+
self.db_fp = str(new_sqlite_file())
260252
self.path_args = (
261253
"--sqlite",
262254
self.db_fp,
@@ -265,7 +257,7 @@ def setUp(self):
265257
def test_embed_default_hazo_leaves_valid_db_behind(self):
266258
# This should probably be called for all test cases
267259
super().test_embed_default_hazo()
268-
self.assertTrue(TsellmConsoleMixin().is_sqlite(self.db_fp))
260+
self.assertTrue(DBSniffer(self.db_fp).is_sqlite)
269261

270262

271263
class InMemoryDuckDBTest(InMemorySQLiteTest):
@@ -299,5 +291,19 @@ def test_embed_hazo_binary(self):
299291
pass
300292

301293

294+
class DiskDuckDBTest(InMemoryDuckDBTest):
295+
db_fp = None
296+
path_args = ()
297+
298+
def setUp(self):
299+
super().setUp()
300+
self.db_fp = str(new_duckdb_file())
301+
self.path_args = (self.db_fp,)
302+
303+
def test_duckdb_is_picked_up(self):
304+
# https://github.com/Florents-Tselai/tsellm/issues/28
305+
super().test_cli_execute_sql()
306+
307+
302308
if __name__ == "__main__":
303309
unittest.main()

0 commit comments

Comments
 (0)