Skip to content

Commit

Permalink
Use lxml over html.parser
Browse files Browse the repository at this point in the history
  • Loading branch information
JadynHax authored Oct 20, 2020
1 parent d6b753e commit 0f586f0
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions scpscraper/scpscraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def get_single_scp(scp_id: str) -> BeautifulSoup:
r = urllib.request.urlopen(url=f'http://scp-wiki.wikidot.com/scp-{scp_id}')

# Return the organized content for parsing.
return BeautifulSoup(r, 'html.parser')
return BeautifulSoup(r, 'lxml')

# Error handling.
except Exception as e:
Expand All @@ -34,7 +34,7 @@ def _get_scp_name(scp_id: int) -> str:
# Grab the HTML and parse as needed.
r = urllib.request.urlopen(url=url)
try:
soup = BeautifulSoup(r, 'html.parser')
soup = BeautifulSoup(r, 'lxml')
content = soup.find('div', id='page-content')
list_elements = content.find_all('li')

Expand Down

0 comments on commit 0f586f0

Please sign in to comment.