Skip to content

Commit

Permalink
Fix remap_column_names (#140)
Browse files Browse the repository at this point in the history
When I try to do the following, I got error:

```python
ds = Dataset.from_dict(
    {
        "question": ["question"],
        "answer": ["answer"],
        "contexts": [["context"]],
    }
)

from ragas import evaluate
from ragas.metrics import Faithfulness

evaluate(dataset =ds, metrics=[Faithfulness(batch_size=1)])
```

```
KeyError: "Column ground_truths not in the dataset. Current columns in the dataset: ['question', 'answer', 'contexts']"
```
But `ground_truths ` is not needed for `Faithfulness` .

This PR is to fix it.
  • Loading branch information
yujonglee authored Sep 15, 2023
1 parent d0bcea1 commit e194caa
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/ragas/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def remap_column_names(dataset: Dataset, column_map: dict[str, str]) -> Dataset:
"""
Remap the column names in case dataset uses different column names
"""
column_map = {k: v for k, v in column_map.items() if v in dataset.column_names}
inverse_column_map = {v: k for k, v in column_map.items()}
return dataset.from_dict(
{inverse_column_map[name]: dataset[name] for name in column_map.values()}
Expand Down
20 changes: 20 additions & 0 deletions tests/unit/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,23 @@ def test_column_remap(column_map):
remapped_dataset = remap_column_names(TEST_DATASET, column_map)

assert remapped_dataset.column_names == list(column_map.keys())


def test_column_remap_omit():
TEST_DATASET = Dataset.from_dict(
{
"query": [""],
"answer": [""],
"contexts": [[""]],
}
)

column_map = {
"question": "query",
"contexts": "contexts",
"answer": "answer",
"ground_truths": "ground_truths",
}

remapped_dataset = remap_column_names(TEST_DATASET, column_map)
assert remapped_dataset.column_names == ["question", "contexts", "answer"]

0 comments on commit e194caa

Please sign in to comment.