From e1142eb32c39d29350eb5f2493d61a2c5b9df4df Mon Sep 17 00:00:00 2001 From: wrongu Date: Sat, 11 Nov 2023 07:35:10 -0500 Subject: [PATCH 1/7] gitignore pycharm setup --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index b36e0ad..2d15c12 100644 --- a/.gitignore +++ b/.gitignore @@ -363,7 +363,7 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ ### Python Patch ### # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration From abd83652923d1ee4df987533e984fc277d6b3ab2 Mon Sep 17 00:00:00 2001 From: wrongu Date: Sat, 11 Nov 2023 08:25:29 -0500 Subject: [PATCH 2/7] refactoring assignmentstore and resposestore config --- .../JSONFileAssignmentStore.py | 6 ++- freetext/assignment_stores/__init__.py | 54 ++++++++++++++++++- freetext/config.example.py | 18 +++---- freetext/response_stores/__init__.py | 52 ++++++++++++++++++ freetext/server.py | 45 ++++------------ 5 files changed, 127 insertions(+), 48 deletions(-) diff --git a/freetext/assignment_stores/JSONFileAssignmentStore.py b/freetext/assignment_stores/JSONFileAssignmentStore.py index 3714738..fb1aa67 100644 --- a/freetext/assignment_stores/JSONFileAssignmentStore.py +++ b/freetext/assignment_stores/JSONFileAssignmentStore.py @@ -5,6 +5,8 @@ import json import os import uuid +from typing import Union +import pathlib class JSONFileAssignmentStore(AssignmentStore): @@ -12,8 +14,8 @@ class JSONFileAssignmentStore(AssignmentStore): A AssignmentStore that stores assignments in a JSON file. """ - def __init__(self, filename: str): - self._filename = filename + def __init__(self, path: Union[str, pathlib.Path]): + self._filename = path def get_assignment(self, key: AssignmentID) -> Assignment: """ diff --git a/freetext/assignment_stores/__init__.py b/freetext/assignment_stores/__init__.py index 09aa179..52ba03c 100644 --- a/freetext/assignment_stores/__init__.py +++ b/freetext/assignment_stores/__init__.py @@ -1,4 +1,56 @@ from .AssignmentStore import AssignmentStore, InMemoryAssignmentStore from .JSONFileAssignmentStore import JSONFileAssignmentStore +from .DynamoAssignmentStore import DynamoAssignmentStore +from pydantic import BaseModel +from typing import Literal, Union -__all__ = ["AssignmentStore", "InMemoryAssignmentStore", "JSONFileAssignmentStore"] + +class InMemoryAssignmentStoreConfig(BaseModel): + type: str = Literal["in_memory"] + + +class JSONAssignmentStoreConfig(BaseModel): + type: str = Literal["json"] + path: str = "assignments.json" + + +class DynamoAssignmentStoreConfig(BaseModel): + type: str = Literal["dynamo"] + aws_access_key_id: str + aws_secret_access_key: str + aws_region: str + table_name: str + + +AssignmentStoreConfig = Union[ + InMemoryAssignmentStoreConfig, + JSONAssignmentStoreConfig, + DynamoAssignmentStoreConfig, +] + + +def create_assignment_store(config: AssignmentStoreConfig) -> AssignmentStore: + """Factory function for creating a assignment store from a config object.""" + if config.type == Literal["in_memory"]: + return InMemoryAssignmentStore() + elif config.type == Literal["json"]: + return JSONFileAssignmentStore(config.path) + elif config.type == Literal["dynamo"]: + return DynamoAssignmentStore( + config.aws_access_key_id, + config.aws_secret_access_key, + config.aws_region, + config.table_name, + ) + else: + raise ValueError(f"Unknown assignment store type: {config.type}") + + +__all__ = [ + "AssignmentStore", + "InMemoryAssignmentStore", + "JSONFileAssignmentStore", + "DynamoAssignmentStore", + "AssignmentStoreConfig", + "create_assignment_store", +] diff --git a/freetext/config.example.py b/freetext/config.example.py index 0f983c0..2264256 100644 --- a/freetext/config.example.py +++ b/freetext/config.example.py @@ -1,4 +1,6 @@ -from pydantic import BaseSettings +from pydantic import BaseSettings, Field +from freetext.assignment_stores import AssignmentStoreConfig +from freetext.response_stores import ResponseStoreConfig class OpenAIConfig(BaseSettings): @@ -13,11 +15,9 @@ class ApplicationSettings(BaseSettings): # your OpenAI API key) assignment_creation_secret: str = "I'm totally allowed to make a project" - # AWS credentials and table names for storing assignments and responses. - # If you're using local (e.g., JSON-based) stores, you can set these all to - # empty strings or ignore them entirely. - aws_access_key_id: str = "AKIA###" - aws_secret_access_key: str = "###" - aws_region: str = "us-east-1" - assignments_table: str = "llm4_freetext_assignments" - responses_table: str = "llm4_freetext_responses" + # To override the config for stores, replace Field(..., discriminator="type") with the config you want, e.g.: + # assignment_store: AssignmentStoreConfig = JSONAssignmentStoreConfig(path="assignments.json") + # or + # assignment_store: AssignmentStoreConfig = InMemoryAssignmentStoreConfig() + assignment_store: AssignmentStoreConfig = Field(..., discriminator="type") + response_store: ResponseStoreConfig = Field(..., discriminator="type") diff --git a/freetext/response_stores/__init__.py b/freetext/response_stores/__init__.py index 7304394..1b16da4 100644 --- a/freetext/response_stores/__init__.py +++ b/freetext/response_stores/__init__.py @@ -1,2 +1,54 @@ from .ResponseStore import ResponseStore, InMemoryResponseStore from .JSONFileResponseStore import JSONFileResponseStore +from .DynamoResponseStore import DynamoResponseStore +from pydantic import BaseModel +from typing import Literal, Union + + +class InMemoryResponseStoreConfig(BaseModel): + type: str = Literal["in_memory"] + + +class JSONResponseStoreConfig(BaseModel): + type: str = Literal["json"] + path: str = "responses.json" + + +class DynamoResponseStoreConfig(BaseModel): + type: str = Literal["dynamo"] + aws_access_key_id: str + aws_secret_access_key: str + aws_region: str + table_name: str + + +ResponseStoreConfig = Union[ + InMemoryResponseStoreConfig, JSONResponseStoreConfig, DynamoResponseStoreConfig +] + + +def create_response_store(config: ResponseStoreConfig) -> ResponseStore: + """Factory function for creating a response store from a config object.""" + if config.type == Literal["in_memory"]: + return InMemoryResponseStore() + elif config.type == Literal["json"]: + return JSONFileResponseStore(config.path) + elif config.type == Literal["dynamo"]: + return DynamoResponseStore( + config.aws_access_key_id, + config.aws_secret_access_key, + config.aws_region, + config.table_name, + ) + else: + raise ValueError(f"Unknown response store type: {config.type}") + + +__all__ = [ + "ResponseStore", + "InMemoryResponseStore", + "JSONFileResponseStore", + "DynamoResponseStore", + "ResponseStoreConfig", + "create_response_store", +] diff --git a/freetext/server.py b/freetext/server.py index d0b39bd..0c783cb 100644 --- a/freetext/server.py +++ b/freetext/server.py @@ -6,19 +6,9 @@ from fastapi.responses import HTMLResponse, PlainTextResponse from mangum import Mangum -from .assignment_stores import ( - AssignmentStore, - InMemoryAssignmentStore, - JSONFileAssignmentStore, -) -from .assignment_stores.DynamoAssignmentStore import DynamoAssignmentStore +from freetext.assignment_stores import AssignmentStore, create_assignment_store +from freetext.response_stores import ResponseStore, create_response_store from .config import ApplicationSettings -from .response_stores.ResponseStore import ( - ResponseStore, - InMemoryResponseStore, -) -from .response_stores.DynamoResponseStore import DynamoResponseStore - from .feedback_providers.FeedbackProvider import FeedbackProvider from .feedback_providers.OpenAIFeedbackProvider import OpenAIChatBasedFeedbackProvider from .llm4text_types import ( @@ -39,9 +29,9 @@ class FeedbackRouter: def __init__( self, + assignment_store: AssignmentStore, + response_store: ResponseStore, feedback_providers: Optional[list[FeedbackProvider]] = None, - assignment_store: Optional[AssignmentStore] = None, - response_store: Optional[ResponseStore] = None, fallback_feedback_provider: Optional[FeedbackProvider] = None, ): """ @@ -53,15 +43,9 @@ def __init__( assignment_store: An assignment store to use. """ + self._assignment_store = assignment_store + self._response_store = response_store self._feedback_providers = feedback_providers or [] - self._assignment_store = ( - assignment_store - if (assignment_store is not None) - else InMemoryAssignmentStore() - ) - self._response_store = ( - response_store if (response_store is not None) else InMemoryResponseStore() - ) self._fallback_feedback_provider = fallback_feedback_provider def add_feedback_provider(self, feedback_provider: FeedbackProvider) -> None: @@ -120,20 +104,9 @@ def get_commons(): config = ApplicationSettings() return Commons( feedback_router=FeedbackRouter( - [OpenAIChatBasedFeedbackProvider()], - assignment_store=DynamoAssignmentStore( - aws_access_key_id=config.aws_access_key_id, - aws_secret_access_key=config.aws_secret_access_key, - aws_region=config.aws_region, - table_name=config.assignments_table, - ), - response_store=DynamoResponseStore( - aws_access_key_id=config.aws_access_key_id, - aws_secret_access_key=config.aws_secret_access_key, - aws_region=config.aws_region, - table_name=config.responses_table, - ), - # JSONFileAssignmentStore("assignments.json"), + assignment_store=create_assignment_store(config.assignment_store), + response_store=create_response_store(config.response_store), + feedback_providers=[OpenAIChatBasedFeedbackProvider()], ) ) From f5f3fee820927a4b57be8d48afefe5bcf61a736d Mon Sep 17 00:00:00 2001 From: wrongu Date: Sat, 11 Nov 2023 13:44:31 -0500 Subject: [PATCH 3/7] Fix missing Config classes in __all__ declarations --- freetext/assignment_stores/__init__.py | 5 ++++- freetext/response_stores/__init__.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/freetext/assignment_stores/__init__.py b/freetext/assignment_stores/__init__.py index 52ba03c..f0eb151 100644 --- a/freetext/assignment_stores/__init__.py +++ b/freetext/assignment_stores/__init__.py @@ -48,9 +48,12 @@ def create_assignment_store(config: AssignmentStoreConfig) -> AssignmentStore: __all__ = [ "AssignmentStore", + "AssignmentStoreConfig", "InMemoryAssignmentStore", + "InMemoryAssignmentStoreConfig", "JSONFileAssignmentStore", + "JSONAssignmentStoreConfig", "DynamoAssignmentStore", - "AssignmentStoreConfig", + "DynamoAssignmentStoreConfig", "create_assignment_store", ] diff --git a/freetext/response_stores/__init__.py b/freetext/response_stores/__init__.py index 1b16da4..5800ba1 100644 --- a/freetext/response_stores/__init__.py +++ b/freetext/response_stores/__init__.py @@ -46,9 +46,12 @@ def create_response_store(config: ResponseStoreConfig) -> ResponseStore: __all__ = [ "ResponseStore", + "ResponseStoreConfig", "InMemoryResponseStore", + "InMemoryResponseStoreConfig", "JSONFileResponseStore", + "JSONResponseStoreConfig", "DynamoResponseStore", - "ResponseStoreConfig", + "DynamoResponseStoreConfig", "create_response_store", ] From 62e1f46dc6b3ad2e49c009a47aa166e054362c80 Mon Sep 17 00:00:00 2001 From: wrongu Date: Sat, 11 Nov 2023 13:44:05 -0500 Subject: [PATCH 4/7] PromptStore protocol and two simple implementations (InMemory, PlainText) --- .../prompt_stores/PlainTextPromptStore.py | 88 ++++++++++++++++++ freetext/prompt_stores/PromptStore.py | 42 +++++++++ freetext/prompt_stores/__init__.py | 1 + tests/test_plain_text_prompt_store.py | 92 +++++++++++++++++++ 4 files changed, 223 insertions(+) create mode 100644 freetext/prompt_stores/PlainTextPromptStore.py create mode 100644 freetext/prompt_stores/PromptStore.py create mode 100644 freetext/prompt_stores/__init__.py create mode 100644 tests/test_plain_text_prompt_store.py diff --git a/freetext/prompt_stores/PlainTextPromptStore.py b/freetext/prompt_stores/PlainTextPromptStore.py new file mode 100644 index 0000000..83b2e25 --- /dev/null +++ b/freetext/prompt_stores/PlainTextPromptStore.py @@ -0,0 +1,88 @@ +from .PromptStore import PromptStore, PromptID +import pathlib +from typing import Union +from functools import lru_cache + + +class PlainTextPromptStore(PromptStore): + extension = ".txt" + delimiter = "." + + def __init__(self, path: Union[str, pathlib.Path]): + self._root = pathlib.Path(path) + self._keys = set() + self._update_by_traversal() + + def _validate_path(self, path: Union[str, pathlib.Path]) -> pathlib.Path: + if not isinstance(path, pathlib.Path): + path = pathlib.Path(path) + + # All paths must be relative OR absolute and in the root directory + if path.is_absolute() and not path.is_relative_to(self._root): + raise ValueError(f"Path {path} is not in root {self._root}.") + + # All paths must point to files with the correct extension + if path.suffix != self.extension: + raise ValueError(f"File {path} is not a {self.extension} file.") + + # Return the combined root + path + return self._root.joinpath(path.relative_to(self._root)) + + def _relative_path(self, path: Union[str, pathlib.Path]) -> pathlib.Path: + return self._validate_path(path).relative_to(self._root) + + def _path_to_key(self, path: Union[str, pathlib.Path]) -> PromptID: + rel_path = self._relative_path(path) + return self.delimiter.join(rel_path.parts[:-1] + (rel_path.stem,)) + + def _key_to_path(self, key: PromptID) -> pathlib.Path: + parts = key.split(self.delimiter) + parts[-1] += self.extension + return self._validate_path(self._root.joinpath(*parts)) + + def _add_file(self, path: pathlib.Path): + path = self._validate_path(path) + path.parent.mkdir(parents=True, exist_ok=True) + path.touch() + self._keys.add(self._path_to_key(path)) + + def _add_key(self, key: PromptID): + self._add_file(self._key_to_path(key)) + + def _del_file(self, path: pathlib.Path): + path = self._validate_path(path) + path.unlink() + self._keys.remove(self._path_to_key(path)) + # TODO - more efficient clear that only clears the cache for this key. Can't be done with built-in lru_cache. + # See here: https://bugs.python.org/issue28178 + self.get_prompt.cache_clear() + + def _del_key(self, key: PromptID): + self._del_file(self._key_to_path(key)) + + def _update_by_traversal(self): + for path in self._root.glob("**/*" + self.extension): + self._add_file(path) + + @lru_cache(maxsize=128) + def get_prompt(self, prompt_id: PromptID) -> str: + with self._key_to_path(prompt_id).open("r") as f: + return f.read() + + def set_prompt(self, prompt_id: PromptID, prompt: str): + path = self._key_to_path(prompt_id) + self._add_file(path) + with path.open("w") as f: + f.write(prompt) + # TODO - more efficient clear that only clears the cache for this key. Can't be done with built-in lru_cache. + # See here: https://bugs.python.org/issue28178 + self.get_prompt.cache_clear() + + def __delitem__(self, key: PromptID): + self._del_key(key) + + def get_prompt_ids(self) -> list[PromptID]: + return list(self._keys) + + def __contains__(self, key: PromptID) -> bool: + return key in self._keys diff --git a/freetext/prompt_stores/PromptStore.py b/freetext/prompt_stores/PromptStore.py new file mode 100644 index 0000000..764929b --- /dev/null +++ b/freetext/prompt_stores/PromptStore.py @@ -0,0 +1,42 @@ +from typing import Protocol + + +PromptID = str + + +class PromptStore(Protocol): + def get_prompt(self, prompt_id: PromptID) -> str: + ... + + def set_prompt(self, prompt_id: PromptID, prompt: str): + ... + + def __delitem__(self, key: PromptID): + ... + + def get_prompt_ids(self) -> list[PromptID]: + ... + + def __contains__(self, key: PromptID) -> bool: + ... + + +class InMemoryPromptStore(PromptStore): + def __init__(self): + self._prompts = {} + + def get_prompt(self, prompt_id: PromptID) -> str: + return self._prompts[prompt_id] + + def set_prompt(self, prompt_id: PromptID, prompt: str): + self._prompts[prompt_id] = prompt + + def __delitem__(self, key: PromptID): + del self._prompts[key] + + def get_prompt_ids(self) -> list[PromptID]: + return list(self._prompts.keys()) + + def __contains__(self, key: PromptID) -> bool: + return key in self._prompts + diff --git a/freetext/prompt_stores/__init__.py b/freetext/prompt_stores/__init__.py new file mode 100644 index 0000000..3daac65 --- /dev/null +++ b/freetext/prompt_stores/__init__.py @@ -0,0 +1 @@ +from .PromptStore import PromptStore, InMemoryPromptStore diff --git a/tests/test_plain_text_prompt_store.py b/tests/test_plain_text_prompt_store.py new file mode 100644 index 0000000..d7c471d --- /dev/null +++ b/tests/test_plain_text_prompt_store.py @@ -0,0 +1,92 @@ +import unittest +from freetext.prompt_stores.PlainTextPromptStore import PlainTextPromptStore +import tempfile +import shutil +import pathlib + + +class TestPlainTextPromptStore(unittest.TestCase): + def setUp(self): + # Create a temporary directory for the file storage + self.test_dir = tempfile.mkdtemp() + self.store = PlainTextPromptStore(self.test_dir) + + def tearDown(self): + # Remove the temporary directory after the test + shutil.rmtree(self.test_dir) + + def test_initialization(self): + """Test the initialization of the PlainTextPromptStore.""" + self.assertEqual(self.store._root, pathlib.Path(self.test_dir)) + self.assertIsInstance(self.store._keys, set) + # Assuming the directory is empty to begin with, keys should be empty + self.assertEqual(len(self.store._keys), 0) + + def test_path_and_key_conversion(self): + """Test conversion between paths and keys.""" + p = pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt") + self.assertEqual(self.store._path_to_key(p), "a.b.c") + + def test_addition_and_deletion_of_files(self): + """Test adding and deleting files updates keys and filesystem.""" + key, content = "a.b.c", "This is a test prompt." + self.assertFalse(pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt").exists()) + self.store.set_prompt(key, content) + self.assertTrue(pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt").exists()) + self.assertEqual(len(self.store._keys), 1) + self.assertIn(key, self.store._keys) + self.assertEqual(self.store.get_prompt(key), content) + del self.store[key] + self.assertFalse(pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt").exists()) + self.assertEqual(len(self.store._keys), 0) + self.assertNotIn(key, self.store._keys) + with self.assertRaises(FileNotFoundError): + self.store.get_prompt(key) + + def test_get_prompt_ids(self): + """Test get_prompt_ids method.""" + self.store.set_prompt("a.b.c", "This is a test prompt.") + self.store.set_prompt("a.b.d", "This is another test prompt.") + self.assertEqual(len(self.store.get_prompt_ids()), 2) + self.assertSetEqual(set(self.store.get_prompt_ids()), {"a.b.c", "a.b.d"}) + + def test_no_modification_outside_of_root(self): + with self.assertRaises(ValueError): + self.store._add_file(pathlib.Path("/not/root/a/b/c.data")) + + def test_init_from_existing_files(self): + self.assertEqual(len(self.store._keys), 0) + content = "This is a test prompt, hard-coded." + path = pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt") + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w") as f: + f.write(content) + + # Re-initialize store and check that the file was loaded + self.store = PlainTextPromptStore(self.test_dir) + self.assertEqual(len(self.store._keys), 1) + self.assertTrue("a.b.c" in self.store) + + def test_value_error_on_wrong_extension(self): + with self.assertRaises(ValueError): + self.store._add_file(pathlib.Path(self.test_dir).joinpath("a", "b", "c.data")) + + def test_cache_invalidated_on_set(self): + key, content = "a.b.c", "This is a test prompt." + self.store.set_prompt(key, content) + self.assertEqual(self.store.get_prompt(key), content) + new_content = "This is a new test prompt." + self.store.set_prompt(key, new_content) + self.assertEqual(self.store.get_prompt(key), new_content) + + def test_cache_invalidated_on_del(self): + key, content = "a.b.c", "This is a test prompt." + self.store.set_prompt(key, content) + self.assertEqual(self.store.get_prompt(key), content) + del self.store[key] + with self.assertRaises(FileNotFoundError): + self.store.get_prompt(key) + + +if __name__ == "__main__": + unittest.main() From a30610a220b6d9760981bdad284469c6e40a6eee Mon Sep 17 00:00:00 2001 From: wrongu Date: Sat, 11 Nov 2023 13:45:00 -0500 Subject: [PATCH 5/7] PromptStoreConfig as part of ApplicationSettings --- freetext/config.example.py | 2 ++ freetext/prompt_stores/PromptStore.py | 1 - freetext/prompt_stores/__init__.py | 36 +++++++++++++++++++++++++++ tests/test_plain_text_prompt_store.py | 16 +++++++++--- 4 files changed, 50 insertions(+), 5 deletions(-) diff --git a/freetext/config.example.py b/freetext/config.example.py index 2264256..ca4b7f6 100644 --- a/freetext/config.example.py +++ b/freetext/config.example.py @@ -1,6 +1,7 @@ from pydantic import BaseSettings, Field from freetext.assignment_stores import AssignmentStoreConfig from freetext.response_stores import ResponseStoreConfig +from freetext.prompt_stores import PromptStoreConfig class OpenAIConfig(BaseSettings): @@ -21,3 +22,4 @@ class ApplicationSettings(BaseSettings): # assignment_store: AssignmentStoreConfig = InMemoryAssignmentStoreConfig() assignment_store: AssignmentStoreConfig = Field(..., discriminator="type") response_store: ResponseStoreConfig = Field(..., discriminator="type") + prompt_store: PromptStoreConfig = Field(..., discriminator="type") diff --git a/freetext/prompt_stores/PromptStore.py b/freetext/prompt_stores/PromptStore.py index 764929b..5021c79 100644 --- a/freetext/prompt_stores/PromptStore.py +++ b/freetext/prompt_stores/PromptStore.py @@ -39,4 +39,3 @@ def get_prompt_ids(self) -> list[PromptID]: def __contains__(self, key: PromptID) -> bool: return key in self._prompts - diff --git a/freetext/prompt_stores/__init__.py b/freetext/prompt_stores/__init__.py index 3daac65..876c477 100644 --- a/freetext/prompt_stores/__init__.py +++ b/freetext/prompt_stores/__init__.py @@ -1 +1,37 @@ from .PromptStore import PromptStore, InMemoryPromptStore +from .PlainTextPromptStore import PlainTextPromptStore +from pydantic import BaseModel +from typing import Literal, Union + + +class InMemoryPromptStoreConfig(BaseModel): + type: str = Literal["in_memory"] + + +class PlainTextPromptStoreConfig(BaseModel): + type: str = Literal["plain_text"] + root: str + + +PromptStoreConfig = Union[InMemoryPromptStoreConfig, PlainTextPromptStoreConfig] + + +def create_prompt_store(config: PromptStoreConfig) -> PromptStore: + """Factory function for creating a prompt store from a config object.""" + if config.type == Literal["in_memory"]: + return InMemoryPromptStore() + elif config.type == Literal["plain_text"]: + return PlainTextPromptStore(config.root) + else: + raise ValueError(f"Unknown prompt store type: {config.type}") + + +__all__ = [ + "PromptStore", + "PromptStoreConfig", + "InMemoryPromptStore", + "InMemoryPromptStoreConfig", + "PlainTextPromptStore", + "PlainTextPromptStoreConfig", + "create_prompt_store", +] diff --git a/tests/test_plain_text_prompt_store.py b/tests/test_plain_text_prompt_store.py index d7c471d..9592947 100644 --- a/tests/test_plain_text_prompt_store.py +++ b/tests/test_plain_text_prompt_store.py @@ -30,14 +30,20 @@ def test_path_and_key_conversion(self): def test_addition_and_deletion_of_files(self): """Test adding and deleting files updates keys and filesystem.""" key, content = "a.b.c", "This is a test prompt." - self.assertFalse(pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt").exists()) + self.assertFalse( + pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt").exists() + ) self.store.set_prompt(key, content) - self.assertTrue(pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt").exists()) + self.assertTrue( + pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt").exists() + ) self.assertEqual(len(self.store._keys), 1) self.assertIn(key, self.store._keys) self.assertEqual(self.store.get_prompt(key), content) del self.store[key] - self.assertFalse(pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt").exists()) + self.assertFalse( + pathlib.Path(self.test_dir).joinpath("a", "b", "c.txt").exists() + ) self.assertEqual(len(self.store._keys), 0) self.assertNotIn(key, self.store._keys) with self.assertRaises(FileNotFoundError): @@ -69,7 +75,9 @@ def test_init_from_existing_files(self): def test_value_error_on_wrong_extension(self): with self.assertRaises(ValueError): - self.store._add_file(pathlib.Path(self.test_dir).joinpath("a", "b", "c.data")) + self.store._add_file( + pathlib.Path(self.test_dir).joinpath("a", "b", "c.data") + ) def test_cache_invalidated_on_set(self): key, content = "a.b.c", "This is a test prompt." From 162948a8bc8cf059cdfbf912dbba2ce9774afc1e Mon Sep 17 00:00:00 2001 From: wrongu Date: Sat, 11 Nov 2023 13:58:58 -0500 Subject: [PATCH 6/7] Moved all default prompts into txt files. Set example config to use this. --- freetext/config.example.py | 4 +- .../OpenAIFeedbackProvider.py | 147 ++---------------- freetext/server.py | 7 +- prompts/default/grader/draft_criteria.txt | 35 +++++ prompts/default/grader/draft_response.txt | 18 +++ prompts/default/grader/feedback.txt | 48 ++++++ prompts/default/grader/improve_question.txt | 24 +++ 7 files changed, 142 insertions(+), 141 deletions(-) create mode 100644 prompts/default/grader/draft_criteria.txt create mode 100644 prompts/default/grader/draft_response.txt create mode 100644 prompts/default/grader/feedback.txt create mode 100644 prompts/default/grader/improve_question.txt diff --git a/freetext/config.example.py b/freetext/config.example.py index ca4b7f6..f00df0d 100644 --- a/freetext/config.example.py +++ b/freetext/config.example.py @@ -1,7 +1,7 @@ from pydantic import BaseSettings, Field from freetext.assignment_stores import AssignmentStoreConfig from freetext.response_stores import ResponseStoreConfig -from freetext.prompt_stores import PromptStoreConfig +from freetext.prompt_stores import PromptStoreConfig, PlainTextPromptStoreConfig class OpenAIConfig(BaseSettings): @@ -22,4 +22,4 @@ class ApplicationSettings(BaseSettings): # assignment_store: AssignmentStoreConfig = InMemoryAssignmentStoreConfig() assignment_store: AssignmentStoreConfig = Field(..., discriminator="type") response_store: ResponseStoreConfig = Field(..., discriminator="type") - prompt_store: PromptStoreConfig = Field(..., discriminator="type") + prompt_store: PromptStoreConfig = PlainTextPromptStoreConfig(root="prompts/default") diff --git a/freetext/feedback_providers/OpenAIFeedbackProvider.py b/freetext/feedback_providers/OpenAIFeedbackProvider.py index d7c80c9..0d05288 100644 --- a/freetext/feedback_providers/OpenAIFeedbackProvider.py +++ b/freetext/feedback_providers/OpenAIFeedbackProvider.py @@ -3,6 +3,7 @@ from ..config import OpenAIConfig from ..feedback_providers.FeedbackProvider import FeedbackProvider +from ..prompt_stores import PromptStore from ..llm4text_types import Assignment, Feedback, Submission @@ -16,7 +17,10 @@ class OpenAIChatBasedFeedbackProvider(FeedbackProvider): more cost-effective API use, at the cost of a more constrained prompt. """ - def __init__(self, config_override: Optional[OpenAIConfig] = None): + def __init__( + self, prompt_store: PromptStore, config_override: Optional[OpenAIConfig] = None + ): + self.prompts = prompt_store if config_override is not None: self.config = config_override else: @@ -39,58 +43,7 @@ async def get_feedback( openai_kwargs = self.config.dict() guidance.llm = guidance.llms.OpenAI("gpt-3.5-turbo", **openai_kwargs) - grader = guidance.Program( - """ - {{#system~}} - You are a helpful instructor, who knows that students need precise and terse feedback. Students are most motivated if you are engaging and remain positive, but it is more important to be honest and accurate than cheerful. - {{~/system}} - - {{#user~}} - The student has been given the following prompt by the instructor: - - ---- - {{prompt}} - ---- - - The secret, grader-only criteria for grading are: - ---- - {{criteria}} - ---- - - Please give your OWN answer to the prompt: - - {{~/user}} - - {{#assistant~}} - {{gen '_machine_answer'}} - {{~/assistant}} - - {{#user~}} - The complete student response is as follows: - ---- - - {{response}} - - ---- - - Thinking about the differences between your answer and the student's, provide your feedback to the student as a bulleted list indicating both what the student got right and what they got wrong. Give details about what they are missing or misunderstood, and mention points they overlooked, if any. - - Do not instruct the student to review the criteria, as this is not provided to the student. Write to the student using "you" in the second person. The student will not see your answer to the prompt, so do not refer to it. - - Be particularly mindful of scientific rigor issues including confusing correlation with causation, biases, and logical fallacies. You must also correct factual errors using your extensive domain knowledge, even if the errors are subtle or minor. - - Do not say "Keep up the good work" or other encouragement "fluff." Write only the response to the student; do not write any other text. - - {{audience_caveat}} - - {{fact_check_caveat}} - {{~/user}} - - {{#assistant~}} - {{gen 'feedback'}} - {{~/assistant}} - """ - ) + grader = guidance.Program(self.prompts.get_prompt("grader.feedback")) response = submission.submission_string feedback = grader( @@ -131,45 +84,7 @@ async def suggest_criteria(self, assignment: Assignment) -> List[str]: openai_kwargs = self.config.dict() guidance.llm = guidance.llms.OpenAI("gpt-3.5-turbo", **openai_kwargs) - grader = guidance.Program( - """ - {{#system~}} - You are a helpful instructor, who knows that students need precise and terse feedback. - {{~/system}} - - {{#user~}} - The student has been given the following prompt by the instructor: - - ---- - {{prompt}} - ---- - - The secret, grader-only criteria for grading are: - ---- - {{criteria}} - ---- - - Please give your OWN answer to the prompt: - - {{~/user}} - - {{#assistant~}} - {{gen '_machine_answer'}} - {{~/assistant}} - - {{#user~}} - Thinking about the important points that must be addressed in this question, provide a bulleted list of criteria that should be used to grade the student's response. These criteria should be specific and precise, and should be able to be applied to the student's response to determine a grade. You may include the criteria that were provided to the student if you agree with them, or you may modify them or replace them entirely. - - In general, you should provide 3-5 criteria. You can provide fewer if you think that is appropriate. - - {{audience_caveat}} - {{~/user}} - - {{#assistant~}} - {{gen 'criteria'}} - {{~/assistant}} - """ - ) + grader = guidance.Program(self.prompts.get_prompt("grader.draft_criteria")) response = assignment.student_prompt criteria = grader( @@ -208,26 +123,7 @@ async def suggest_question(self, assignment: Assignment) -> str: guidance.llm = guidance.llms.OpenAI("gpt-3.5-turbo", **openai_kwargs) draft_response = guidance.Program( - """ - {{#system~}} - You are a knowledgeable assistant who is working to develop a course. - {{~/system}} - - {{#user~}} - You must answer the following question to the best of your ability. - - ---- - {{prompt}} - ---- - - Please give your OWN answer to this question: - - {{~/user}} - - {{#assistant~}} - {{gen '_machine_answer'}} - {{~/assistant}} - """ + self.prompts.get_prompt("grader.draft_response") ) criteria = draft_response(prompt=assignment.student_prompt) @@ -240,32 +136,7 @@ async def suggest_question(self, assignment: Assignment) -> str: ) question_improver = guidance.Program( - """ - {{#system~}} - You are a knowledgeable instructor who is working to develop a course. - {{~/system}} - - {{#user~}} - A student has been given the following prompt by the instructor: - - ---- - {{prompt}} - ---- - - The student has received the following feedback from the grader: - - ---- - {{feedback}} - ---- - - You are concerned that the student may have been confused by the question. You want to improve the question so that students are less likely to be confused. You should not change the meaning of the question, but you may clarify the question so that the requirements of the grader are more clear. Do not explicitly refer to the feedback in your question. Your question should take the form of a question that a student would be asked. - - {{~/user}} - - {{#assistant~}} - {{gen 'improved_question'}} - {{~/assistant}} - """ + self.prompts.get_prompt("grader.improve_question") ) improved_question = question_improver( diff --git a/freetext/server.py b/freetext/server.py index 0c783cb..5b4e7a2 100644 --- a/freetext/server.py +++ b/freetext/server.py @@ -8,6 +8,7 @@ from freetext.assignment_stores import AssignmentStore, create_assignment_store from freetext.response_stores import ResponseStore, create_response_store +from freetext.prompt_stores import PromptStore, create_prompt_store from .config import ApplicationSettings from .feedback_providers.FeedbackProvider import FeedbackProvider from .feedback_providers.OpenAIFeedbackProvider import OpenAIChatBasedFeedbackProvider @@ -106,7 +107,11 @@ def get_commons(): feedback_router=FeedbackRouter( assignment_store=create_assignment_store(config.assignment_store), response_store=create_response_store(config.response_store), - feedback_providers=[OpenAIChatBasedFeedbackProvider()], + feedback_providers=[ + OpenAIChatBasedFeedbackProvider( + create_prompt_store(config.prompt_store) + ) + ], ) ) diff --git a/prompts/default/grader/draft_criteria.txt b/prompts/default/grader/draft_criteria.txt new file mode 100644 index 0000000..d95e971 --- /dev/null +++ b/prompts/default/grader/draft_criteria.txt @@ -0,0 +1,35 @@ +{{#system~}} +You are a helpful instructor, who knows that students need precise and terse feedback. +{{~/system}} + +{{#user~}} +The student has been given the following prompt by the instructor: + +---- +{{prompt}} +---- + +The secret, grader-only criteria for grading are: +---- +{{criteria}} +---- + +Please give your OWN answer to the prompt: + +{{~/user}} + +{{#assistant~}} +{{gen '_machine_answer'}} +{{~/assistant}} + +{{#user~}} +Thinking about the important points that must be addressed in this question, provide a bulleted list of criteria that should be used to grade the student's response. These criteria should be specific and precise, and should be able to be applied to the student's response to determine a grade. You may include the criteria that were provided to the student if you agree with them, or you may modify them or replace them entirely. + +In general, you should provide 3-5 criteria. You can provide fewer if you think that is appropriate. + +{{audience_caveat}} +{{~/user}} + +{{#assistant~}} +{{gen 'criteria'}} +{{~/assistant}} \ No newline at end of file diff --git a/prompts/default/grader/draft_response.txt b/prompts/default/grader/draft_response.txt new file mode 100644 index 0000000..adb536b --- /dev/null +++ b/prompts/default/grader/draft_response.txt @@ -0,0 +1,18 @@ +{{#system~}} +You are a knowledgeable assistant who is working to develop a course. +{{~/system}} + +{{#user~}} +You must answer the following question to the best of your ability. + +---- +{{prompt}} +---- + +Please give your OWN answer to this question: + +{{~/user}} + +{{#assistant~}} +{{gen '_machine_answer'}} +{{~/assistant}} \ No newline at end of file diff --git a/prompts/default/grader/feedback.txt b/prompts/default/grader/feedback.txt new file mode 100644 index 0000000..0c9d8cc --- /dev/null +++ b/prompts/default/grader/feedback.txt @@ -0,0 +1,48 @@ +{{#system~}} +You are a helpful instructor, who knows that students need precise and terse feedback. Students are most motivated if you are engaging and remain positive, but it is more important to be honest and accurate than cheerful. +{{~/system}} + +{{#user~}} +The student has been given the following prompt by the instructor: + +---- +{{prompt}} +---- + +The secret, grader-only criteria for grading are: +---- +{{criteria}} +---- + +Please give your OWN answer to the prompt: + +{{~/user}} + +{{#assistant~}} +{{gen '_machine_answer'}} +{{~/assistant}} + +{{#user~}} +The complete student response is as follows: +---- + +{{response}} + +---- + +Thinking about the differences between your answer and the student's, provide your feedback to the student as a bulleted list indicating both what the student got right and what they got wrong. Give details about what they are missing or misunderstood, and mention points they overlooked, if any. + +Do not instruct the student to review the criteria, as this is not provided to the student. Write to the student using "you" in the second person. The student will not see your answer to the prompt, so do not refer to it. + +Be particularly mindful of scientific rigor issues including confusing correlation with causation, biases, and logical fallacies. You must also correct factual errors using your extensive domain knowledge, even if the errors are subtle or minor. + +Do not say "Keep up the good work" or other encouragement "fluff." Write only the response to the student; do not write any other text. + +{{audience_caveat}} + +{{fact_check_caveat}} +{{~/user}} + +{{#assistant~}} +{{gen 'feedback'}} +{{~/assistant}} \ No newline at end of file diff --git a/prompts/default/grader/improve_question.txt b/prompts/default/grader/improve_question.txt new file mode 100644 index 0000000..65004ef --- /dev/null +++ b/prompts/default/grader/improve_question.txt @@ -0,0 +1,24 @@ +{{#system~}} +You are a knowledgeable instructor who is working to develop a course. +{{~/system}} + +{{#user~}} +A student has been given the following prompt by the instructor: + +---- +{{prompt}} +---- + +The student has received the following feedback from the grader: + +---- +{{feedback}} +---- + +You are concerned that the student may have been confused by the question. You want to improve the question so that students are less likely to be confused. You should not change the meaning of the question, but you may clarify the question so that the requirements of the grader are more clear. Do not explicitly refer to the feedback in your question. Your question should take the form of a question that a student would be asked. + +{{~/user}} + +{{#assistant~}} +{{gen 'improved_question'}} +{{~/assistant}} \ No newline at end of file From d4d6c698dfb9e1096c48d51eed91dca2c669ae4d Mon Sep 17 00:00:00 2001 From: wrongu Date: Sat, 11 Nov 2023 14:29:25 -0500 Subject: [PATCH 7/7] Configurable GPT model. Also moved some more code prompts to txt file prompts. --- freetext/config.example.py | 1 + .../feedback_providers/OpenAIFeedbackProvider.py | 12 +----------- prompts/default/grader/feedback.txt | 4 +--- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/freetext/config.example.py b/freetext/config.example.py index f00df0d..98a15ff 100644 --- a/freetext/config.example.py +++ b/freetext/config.example.py @@ -7,6 +7,7 @@ class OpenAIConfig(BaseSettings): token: str = "sk-###" organization: str = "org-###" + model: str = "gpt-3.5-turbo" class ApplicationSettings(BaseSettings): diff --git a/freetext/feedback_providers/OpenAIFeedbackProvider.py b/freetext/feedback_providers/OpenAIFeedbackProvider.py index 0d05288..739e029 100644 --- a/freetext/feedback_providers/OpenAIFeedbackProvider.py +++ b/freetext/feedback_providers/OpenAIFeedbackProvider.py @@ -25,6 +25,7 @@ def __init__( self.config = config_override else: self.config = OpenAIConfig() + guidance.llm = guidance.llms.OpenAI(**self.config.dict()) async def get_feedback( self, submission: Submission, assignment: Assignment @@ -40,9 +41,6 @@ async def get_feedback( # set the default language model used to execute guidance programs try: - openai_kwargs = self.config.dict() - guidance.llm = guidance.llms.OpenAI("gpt-3.5-turbo", **openai_kwargs) - grader = guidance.Program(self.prompts.get_prompt("grader.feedback")) response = submission.submission_string @@ -52,8 +50,6 @@ async def get_feedback( criteria="\n".join( [f" * {f}" for f in assignment.feedback_requirements] ), - audience_caveat="", # You should provide feedback keeping in mind that the student is a Graduate Student and should be graded accordingly. - fact_check_caveat="You should also fact-check the student's response. If the student's response is factually incorrect, you should provide feedback on the incorrect statements.", ) return [ @@ -81,9 +77,6 @@ async def suggest_criteria(self, assignment: Assignment) -> List[str]: """ try: - openai_kwargs = self.config.dict() - guidance.llm = guidance.llms.OpenAI("gpt-3.5-turbo", **openai_kwargs) - grader = guidance.Program(self.prompts.get_prompt("grader.draft_criteria")) response = assignment.student_prompt @@ -119,9 +112,6 @@ async def suggest_question(self, assignment: Assignment) -> str: """ try: - openai_kwargs = self.config.dict() - guidance.llm = guidance.llms.OpenAI("gpt-3.5-turbo", **openai_kwargs) - draft_response = guidance.Program( self.prompts.get_prompt("grader.draft_response") ) diff --git a/prompts/default/grader/feedback.txt b/prompts/default/grader/feedback.txt index 0c9d8cc..fab51c9 100644 --- a/prompts/default/grader/feedback.txt +++ b/prompts/default/grader/feedback.txt @@ -38,9 +38,7 @@ Be particularly mindful of scientific rigor issues including confusing correlati Do not say "Keep up the good work" or other encouragement "fluff." Write only the response to the student; do not write any other text. -{{audience_caveat}} - -{{fact_check_caveat}} +You should also fact-check the student's response. If the student's response is factually incorrect, you should provide feedback on the incorrect statements. {{~/user}} {{#assistant~}}