Skip to content

Commit

Permalink
[apps/browser] use pyppeteer for stability and speed
Browse files Browse the repository at this point in the history
  • Loading branch information
javierluraschi committed Sep 10, 2024
1 parent 53491fd commit 7f5bf7c
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 45 deletions.
74 changes: 39 additions & 35 deletions apps/browser/app.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import WebDriverException
import pyppeteer
import asyncio

import json
import hal9 as h9
Expand All @@ -14,41 +10,49 @@
from sitefind import site_find
from siteuse import site_use

def take_screenshot():
time.sleep(2)
driver.save_screenshot("screenshot.png")
async def take_screenshot(page, step):
await asyncio.sleep(2)
await page.screenshot({'path': "screenshot.png"})
shutil.copy("screenshot.png", f"storage/screenshot-{int(time.time())}.png")

chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
custom_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
chrome_options.add_argument(f"user-agent={custom_user_agent}")
def wrap_in_async_function(code):
indented_code = "\n".join(" " + line for line in code.splitlines() if line.strip()) # Indent each line by 4 spaces
wrapped_code = f"async def dynamic_async_func(page):\n{indented_code}"
return wrapped_code

driver = webdriver.Chrome(options=chrome_options)
async def main():
custom_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
browser = await pyppeteer.launch()
page = await browser.newPage()

prompt = h9.input()
site = site_find(prompt)
await page.setUserAgent(custom_user_agent)

print(f"Navigating to {site}")
driver.get(site)
# Get the input and find the site
prompt = h9.input()
site = site_find(prompt)

for i in range(1, 5):
code = "# No code generated"
try:
code = site_use(prompt, driver.current_url)
exec(code)
take_screenshot()
except WebDriverException as e:
print(f"Failed to use browser, driver details follow.\n```\n{code}\n```\n\n```\n{e}\n```\n")
print(f"Available Memory: {(psutil.virtual_memory().available/ (1024 ** 2)):.2f} MB")
except Exception as e:
print(f"Failed to use browser, details follow.\n```\n{code}\n```\n\n```\n{e}\n```\n")
print(f"Navigating to {site}")
await page.goto(site)

prompt = h9.input(f"Taking screenshot for step {i}/5, what next?")
for i in range(1, 5):
code = "# No code generated"
try:
code = site_use(prompt, page.url)
wrapped_code = wrap_in_async_function(code)
local_vars = {}
exec(wrapped_code, {}, local_vars)
await local_vars['dynamic_async_func'](page)

driver.quit()
await take_screenshot(page, i)
except Exception as e:
print(f"Failed to use browser, details follow.\n```\n{code}\n```\n\n```\n{e}\n```\n")
print(f"Available Memory: {(psutil.virtual_memory().available/ (1024 ** 2)):.2f} MB")

print("Five tasks completed, this browser session is restarting.")
print("🌐 I can browse the web, how can I help?")
prompt = h9.input(f"Taking screenshot for step {i}/5, what next?")

await browser.close()

print("Five tasks completed, this browser session is restarting.")
print("🌐 I can browse the web, how can I help?")

asyncio.get_event_loop().run_until_complete(main())
2 changes: 1 addition & 1 deletion apps/browser/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
selenium
pyppeteer
psutil
16 changes: 7 additions & 9 deletions apps/browser/siteuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@
from openai import OpenAI

system_prompt = """
Only write python code using selenium to perform the user request. The code will be run dynamically with eval().
Only write python code using pyppeteer to perform the user request. The code will be run dynamically with eval().
The driver already stored as a driver variable.
The page is already stored as a page variable that you can use.
The following includes have been defined:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
The following code has already been executed:
import pyppeteer
browser = await pyppeteer.launch()
page = await browser.newPage()
At the beginning of the code, use print() to communicate what the code will do.
Only reply with a code block for python code.
Expand All @@ -20,7 +18,7 @@
def site_use(prompt, current):
messages = [
{ "role": "system", "content": system_prompt},
{ "role": "user", "content": f"Driver alredy in page {current}. User requests: {prompt}" }
{ "role": "user", "content": f"Page alredy in page {current}. User requests: {prompt}" }
]
completion = OpenAI().chat.completions.create(model = "gpt-4", messages = messages)
content = completion.choices[0].message.content
Expand Down

0 comments on commit 7f5bf7c

Please sign in to comment.