|
1 | 1 | from unitxt.benchmark import Benchmark |
2 | 2 | from unitxt.catalog import add_to_catalog |
3 | 3 | from unitxt.standard import DatasetRecipe |
4 | | -from unitxt.templates import MultipleChoiceTemplate, MultiReferenceTemplate |
5 | 4 |
|
6 | | -ai2d_llama_vision_template = MultipleChoiceTemplate( |
7 | | - input_format="{context} Look at the scientific diagram carefully and answer the following question: {question}\n{choices}\nRespond only with the correct option digit.", |
8 | | - choices_separator="\n", |
9 | | - target_field="answer", |
10 | | - enumerator="capitals", |
11 | | -) |
12 | | -doc_vqa_llama_vision_template = MultiReferenceTemplate( |
13 | | - input_format="{context} Read the text in the image carefully and answer the question with the text as seen exactly in the image." |
14 | | - " For yes/no questions, just respond Yes or No. If the answer is numeric, just respond with the number and nothing else. " |
15 | | - "If the answer has multiple words, just respond with the words and absolutely nothing else. Never respond in a sentence or a phrase.\n Question: {question}", |
16 | | - references_field="answers", |
17 | | -) |
18 | | -chart_qa_llama_vision_template = MultiReferenceTemplate( |
19 | | - input_format="{context} {question}\nAnswer the question with a single word.", |
20 | | - references_field="answers", |
21 | | - __description__="lmms-evals default template for chartqa.", |
22 | | -) |
23 | 5 | benchmark = Benchmark( |
24 | 6 | subsets={ |
25 | 7 | "doc_vqa_default": DatasetRecipe( |
|
39 | 21 | ), |
40 | 22 | "doc_vqa_llama_vision_template": DatasetRecipe( |
41 | 23 | card="cards.doc_vqa.lmms_eval", |
42 | | - template=doc_vqa_llama_vision_template, |
| 24 | + template="templates.qa.llama_vision.with_context.doc_vqa", |
43 | 25 | format="formats.chat_api", |
44 | 26 | ), |
45 | 27 | "info_vqa_llama_vision_template": DatasetRecipe( |
46 | 28 | card="cards.info_vqa_lmms_eval", |
47 | | - template=doc_vqa_llama_vision_template, |
| 29 | + template="templates.qa.llama_vision.with_context.info_vqa", |
48 | 30 | format="formats.chat_api", |
49 | 31 | ), |
50 | 32 | "chart_qa_llama_vision_template": DatasetRecipe( |
51 | 33 | card="cards.chart_qa_lmms_eval", |
52 | | - template=chart_qa_llama_vision_template, |
| 34 | + template="templates.qa.llama_vision.with_context.chart_qa", |
53 | 35 | format="formats.chat_api", |
54 | 36 | ), |
55 | 37 | "ai2d_llama_vision_template": DatasetRecipe( |
56 | 38 | card="cards.ai2d", |
57 | | - template=ai2d_llama_vision_template, |
| 39 | + template="templates.qa.llama_vision.multiple_choice.with_context.ai2d", |
58 | 40 | format="formats.chat_api", |
59 | 41 | ), |
60 | 42 | }, |
|
0 commit comments