Skip to content

Commit

Permalink
[apps/browser] improve window size and element selection
Browse files Browse the repository at this point in the history
  • Loading branch information
javierluraschi committed Sep 13, 2024
1 parent ec7e3ff commit b2bd708
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 113 deletions.
8 changes: 3 additions & 5 deletions apps/browser/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@
from siteuse import site_use

async def take_screenshot(page):
await asyncio.sleep(2)
await asyncio.sleep(3)
await page.screenshot({'path': "screenshot.png"})
shutil.copy("screenshot.png", f"storage/screenshot-{int(time.time())}.png")

async def extract_elements(page):
extract_js = open('extract.js', 'r').read()
elements = await page.evaluate(extract_js)
print(elements)
return elements
return await page.evaluate(extract_js)

def wrap_in_async_function(code):
indented_code = "\n".join(" " + line for line in code.splitlines() if line.strip()) # Indent each line by 4 spaces
Expand All @@ -31,7 +29,7 @@ async def main():
custom_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
browser = await pyppeteer.launch(
headless=True,
args=['--no-sandbox', '--disable-setuid-sandbox']
args=['--no-sandbox', '--disable-setuid-sandbox', f"--window-size=1470,832"]
)

page = await browser.newPage()
Expand Down
217 changes: 112 additions & 105 deletions apps/browser/extract.js
Original file line number Diff line number Diff line change
@@ -1,123 +1,130 @@
function isFocusable(element) {
// Elements with tabindex="-1" are not focusable via tab key
const tabindex = element.getAttribute('tabindex');

if (tabindex !== null && parseInt(tabindex) >= 0) {
return true;
}

// Check for naturally focusable elements (without tabindex)
const focusableTags = ['A', 'BUTTON', 'INPUT', 'SELECT', 'TEXTAREA', 'IFRAME', 'AREA', 'SUMMARY'];

if (focusableTags.includes(element.tagName) && !element.disabled) {
return true;
}

// Check for contenteditable elements
if (element.hasAttribute('contenteditable')) {
return true;
() => {
function isFocusable(element) {
// Elements with tabindex="-1" are not focusable via tab key
const tabindex = element.getAttribute('tabindex');

if (tabindex !== null && parseInt(tabindex) >= 0) {
return true;
}

// Check for naturally focusable elements (without tabindex)
const focusableTags = ['A', 'BUTTON', 'INPUT', 'SELECT', 'TEXTAREA', 'IFRAME', 'AREA', 'SUMMARY'];

if (focusableTags.includes(element.tagName) && !element.disabled) {
return true;
}

// Check for contenteditable elements
if (element.hasAttribute('contenteditable')) {
return true;
}

return false;
}

return false;
}

// Function to generate a unique CSS selector for an element
function generateSelector(element) {
if (element.id) {
return `${element.tagName.toLowerCase()}#${element.id}`;
} else {
let path = [];
while (element && element.nodeType === Node.ELEMENT_NODE) {
let selector = element.tagName.toLowerCase();
if (element.className) {
selector += '.' + element.className.trim().split(/\s+/).join('.');

// Function to generate a unique CSS selector for an element
function generateSelector(element, levels) {
if (element.id) {
return `${element.tagName.toLowerCase()}#${element.id}`;
} else {
let path = [];

// Consider only the current element and its parent
let currentElement = element;
for (let i = 0; i < (levels ?? 2) && currentElement && currentElement.nodeType === Node.ELEMENT_NODE; i++) {
let selector = currentElement.tagName.toLowerCase();

if (currentElement.className) {
selector += '.' + currentElement.className.trim().split(/\s+/).join('.');
}

path.unshift(selector);
currentElement = currentElement.parentNode; // Move to parent node
}
path.unshift(selector);
element = element.parentNode;

return path.join(' > ');
}
return path.join(' > ');
}
}

// Function to retrieve text associated with an element
function getElementText(el) {
let text = '';
// Function to retrieve text associated with an element
function getElementText(el) {
let text = '';

// Try innerText
if (el.innerText && el.innerText.trim()) {
text = el.innerText.trim();
}
// Try innerText
if (el.innerText && el.innerText.trim()) {
text = el.innerText.trim();
}

// Try value (for input elements)
if (!text && el.value && el.value.trim()) {
text = el.value.trim();
}
// Try value (for input elements)
if (!text && el.value && el.value.trim()) {
text = el.value.trim();
}

// Try aria-label
if (!text && el.getAttribute('aria-label')) {
text = el.getAttribute('aria-label').trim();
}
// Try aria-label
if (!text && el.getAttribute('aria-label')) {
text = el.getAttribute('aria-label').trim();
}

// Try alt attribute (for images and areas)
if (!text && el.getAttribute('alt')) {
text = el.getAttribute('alt').trim();
}
// Try alt attribute (for images and areas)
if (!text && el.getAttribute('alt')) {
text = el.getAttribute('alt').trim();
}

// Try title attribute
if (!text && el.getAttribute('title')) {
text = el.getAttribute('title').trim();
}
// Try title attribute
if (!text && el.getAttribute('title')) {
text = el.getAttribute('title').trim();
}

// Try associated <label> element (for input elements with id)
if (!text && el.tagName === 'INPUT' && el.id) {
const label = document.querySelector(`label[for="${el.id}"]`);
if (label && label.innerText && label.innerText.trim()) {
text = label.innerText.trim();
// Try associated <label> element (for input elements with id)
if (!text && el.tagName === 'INPUT' && el.id) {
const label = document.querySelector(`label[for="${el.id}"]`);
if (label && label.innerText && label.innerText.trim()) {
text = label.innerText.trim();
}
}
}

// Try parent <label> element (for inputs wrapped in labels)
if (!text) {
let parent = el.parentElement;
while (parent) {
if (parent.tagName === 'LABEL') {
if (parent.innerText && parent.innerText.trim()) {
text = parent.innerText.trim();
break;
// Try parent <label> element (for inputs wrapped in labels)
if (!text) {
let parent = el.parentElement;
while (parent) {
if (parent.tagName === 'LABEL') {
if (parent.innerText && parent.innerText.trim()) {
text = parent.innerText.trim();
break;
}
}
parent = parent.parentElement;
}
parent = parent.parentElement;
}

return text;
}

return text;
}

// Get all elements in the document
const allElements = document.querySelectorAll('*');

// Filter the elements to find those that are focusable
const tabOrderedElements = Array.from(allElements).filter(isFocusable);

// Sort elements by their tab index value (defaulting to 0 if no tabindex is specified)
tabOrderedElements.sort((a, b) => {
const tabindexA = a.getAttribute('tabindex') !== null ? parseInt(a.getAttribute('tabindex')) : 0;
const tabindexB = b.getAttribute('tabindex') !== null ? parseInt(b.getAttribute('tabindex')) : 0;
return tabindexA - tabindexB;
});

// Collect the results into an array of dictionaries
const result = tabOrderedElements.map((el) => {
const tabindex = el.getAttribute('tabindex') !== null ? el.getAttribute('tabindex') : '0';
const query = generateSelector(el);
const text = getElementText(el);

return {
tabindex: tabindex,
query: query,
text: text
};
});

// Output the result
return resultl
// Get all elements in the document
const allElements = document.querySelectorAll('*');

// Filter the elements to find those that are focusable
const tabOrderedElements = Array.from(allElements).filter(isFocusable);

// Sort elements by their tab index value (defaulting to 0 if no tabindex is specified)
tabOrderedElements.sort((a, b) => {
const tabindexA = a.getAttribute('tabindex') !== null ? parseInt(a.getAttribute('tabindex')) : 0;
const tabindexB = b.getAttribute('tabindex') !== null ? parseInt(b.getAttribute('tabindex')) : 0;
return tabindexA - tabindexB;
});

// Collect the results into an array of dictionaries
const result = tabOrderedElements.map((el) => {
const tabindex = el.getAttribute('tabindex') !== null ? el.getAttribute('tabindex') : '0';
const query = generateSelector(el);
const text = getElementText(el);

return {
query: query,
text: text
};
});

// Output the result
return result.filter(e => e.text.trim() !== "");
}
9 changes: 6 additions & 3 deletions apps/browser/siteuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,24 @@
"""

def site_use(prompt, current, elements):
elements_str = "\n".join([f"{item['text']}: {item['query']}" for item in elements])

messages = [
{ "role": "system", "content": system_prompt},
{ "role": "user", "content": f"""
Page is in URL: {current}.
The following dictionary contains all the elements in the page and their query selectors to use:
{json.dumps(elements)}
The following lines contains all the text elements in the page and their query selectors to use, first the clickable text the selector.
{elements_str}
User requests: {prompt}
""" }
]

completion = OpenAI().chat.completions.create(model = "gpt-4", messages = messages)
content = completion.choices[0].message.content
extracted = h9.extract(content, language = "*")
if not extracted or len(extracted) == 0:
return content

return extracted

0 comments on commit b2bd708

Please sign in to comment.