✨ Links to links_fetch for government websites

reworkd · Dec 4, 2023 · d9ca790 · d9ca790
1 parent f76e5f9
commit d9ca790
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -65,8 +65,9 @@ We have defined a set of test intents that an agent can be evaluated on. These i
 enum in [examples.json](https://github.com/reworkd/bananalyzer/blob/main/bananalyzer/data/schemas.py).
 
 - **fetch**: The agent must retrieve specific JSON information from the page. This is the most common test type.
-- **links**: The agent must scrape all detail page links from a page. The agent must click a specific element on the
-  page.
+- **links**: The agent must scrape all detail page links from a page
+- **links_fetch**: The agent must scrape all detail page links from a page and additionally extract JSON information
+  for each link
 
 # Getting Started
 
@@ -94,9 +95,10 @@ class NullAgentRunner(AgentRunner):
         example: Example,
     ) -> AgentResult:
         page = await context.new_page()
-        await page.goto(example.get_static_url())   # example.url has the real url, example.get_static_url() returns the local mhtml file url
+        await page.goto(
+            example.get_static_url())  # example.url has the real url, example.get_static_url() returns the local mhtml file url
         await asyncio.sleep(0.5)
-        return example.evals[0].expected    # Just return expected output directly so that tests pass
+        return example.evals[0].expected  # Just return expected output directly so that tests pass
 ```
 
 - Run `bananalyze ./tests/banalyzer.py` to run the test suite

diff --git a/bananalyzer/data/schemas.py b/bananalyzer/data/schemas.py
@@ -13,6 +13,7 @@
 GoalType = Literal[
     "fetch",  # Scrape specific JSON information from a single page. Does not require navigation
     "links",  # Scrape all detail page links from a single listing page
+    "links_fetch",  # Scrape all detail page links from a single listing page along with JSON information
     "click",  # Make a single click on a page
     "navigate",  # Travel to a new page
     "search",  # Search for the answer to a specific query

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bananalyzer"
-version = "0.6.12"
+version = "0.6.13"
 
 description = "Open source AI Agent evaluation framework for web tasks 🐒🍌"
 authors = ["asim-shrestha <[email protected]>"]

diff --git a/static/examples.json b/static/examples.json
@@ -3308,7 +3308,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {
@@ -3336,7 +3336,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {
@@ -3364,7 +3364,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {
@@ -3400,7 +3400,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {
@@ -3452,7 +3452,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {
@@ -3474,10 +3474,6 @@
             "name": "Class Licensing & Registration (Amendment) Regulations,2016",
             "url": "https://moitt.gov.pk/SiteImage/Misc/files/Class-Licensing-registration-regulation-2016-adm.pdf"
           },
-          {
-            "name": "No name",
-            "url": "No URL"
-          },
           {
             "name": "Telecom Consumers Protection (Amendment) Regulations, 2012",
             "url": "https://moitt.gov.pk/SiteImage/Misc/files/telecom_consumer_protect_regulation_2012.pdf"
@@ -3528,7 +3524,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {
@@ -3564,7 +3560,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {
@@ -3672,7 +3668,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {
@@ -3872,7 +3868,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {
@@ -3968,7 +3964,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {
@@ -4048,7 +4044,7 @@
     "source": "mhtml",
     "category": "government",
     "subcategory": "download",
-    "type": "links",
+    "type": "links_fetch",
     "goal": "Fetch all of the links to the pdf documents on the current page",
     "evals": [
       {