Skip to content

Commit

Permalink
Revert "Do not retry requests in case of response with 4xx status code "
Browse files Browse the repository at this point in the history
  • Loading branch information
JECSand authored Jun 21, 2023
1 parent 8abb764 commit 2482bd0
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 53 deletions.
3 changes: 1 addition & 2 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,4 @@
1.13 02/14/2023 -- Added additional unit tests.
1.14 02/21/2023 -- Fixed get_ten_day_avg_daily_volume as reported in #137.
1.14 02/21/2023 -- Removed get_three_month_avg_daily_volume due to value now missing in Yahoo data.
1.14 02/21/2023 -- Added unit test for get_ten_day_avg_daily_volume
1.15 04/30/2023 -- Don't retry requests if response code is a client error (4xx).
1.14 02/21/2023 -- Added unit test for get_ten_day_avg_daily_volume
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setup(
name='yahoofinancials',
version='1.15',
version='1.14',
description='A powerful financial data module used for pulling both fundamental and technical data from Yahoo Finance',
long_description=long_description,
url='https://github.com/JECSand/yahoofinancials',
Expand Down
Empty file removed test/__init__.py
Empty file.
80 changes: 30 additions & 50 deletions yahoofinancials/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,28 +181,21 @@ def _construct_url(self, symbol, config, params, freq, request_type):
# Private method to execute a web scrape request and decrypt the return
def _request_handler(self, url, res_field=""):
urlopener = UrlOpener()
# Try to open the URL up to 10 times sleeping random time if the server responds with a 5xx code
# Try to open the URL up to 10 times sleeping random time if something goes wrong
max_retry = 10

for i in range(0, max_retry):
response = urlopener.open(url, proxy=self._get_proxy(), timeout=self.timeout)

if response.status_code == 200:
res_content = response.text
if response.status_code != 200:
time.sleep(random.randrange(10, 20))
response.close()
else:
res_content = response.text
response.close()
self._cache[url] = loads(res_content).get(res_field)
break

if 400 <= response.status_code < 500:
raise ManagedException("Server replied with client error code, HTTP " + str(response.status_code) +
" code while opening the url: " + str(url) + " . Not retrying.")
if response.status_code >= 500:
time.sleep(random.randrange(10, 20))
response.close()

if i == max_retry - 1:
# Raise a custom exception if we can't get the web page within max_retry attempts
raise ManagedException("Server replied with server error code, HTTP " + str(response.status_code) +
raise ManagedException("Server replied with HTTP " + str(response.status_code) +
" code while opening the url: " + str(url))

@staticmethod
Expand Down Expand Up @@ -471,16 +464,6 @@ def _recursive_api_request(self, hist_obj, up_ticker, clean=True, i=0):
return self._recursive_api_request(hist_obj, up_ticker, clean, i)
elif clean:
return self._clean_historical_data(re_data, True)

# Private method, acting as wrapper to call _create_dict_ent() and log exceptions as warnings
def _safe_create_dict_ent(self, up_ticker, statement_type, tech_type, report_name, hist_obj):
try:
return self._create_dict_ent(up_ticker, statement_type, tech_type, report_name, hist_obj)
except ManagedException as e:
logging.warning("yahoofinancials ticker: %s error getting %s - %s\n\tContinuing extraction...",
str(up_ticker), str(statement_type), str(e))

return {up_ticker: None}

# Private Method to take scrapped data and build a data dictionary with, used by get_stock_data()
def _create_dict_ent(self, up_ticker, statement_type, tech_type, report_name, hist_obj):
Expand Down Expand Up @@ -556,33 +539,30 @@ def get_time_code(self, time_interval):
# Public Method to get stock data
def get_stock_data(self, statement_type='income', tech_type='', report_name='', hist_obj={}):
data = {}
tickers = [self.ticker] if isinstance(self.ticker, str) else self.ticker

if self.concurrent:
data = self._get_stock_data_concurrently(tickers, statement_type, tech_type, report_name, hist_obj)
if isinstance(self.ticker, str):
dict_ent = self._create_dict_ent(self.ticker, statement_type, tech_type, report_name, hist_obj)
data.update(dict_ent)
else:
for tick in tickers:
dict_ent = self._safe_create_dict_ent(tick, statement_type, tech_type, report_name, hist_obj)
if dict_ent[tick]:
data.update(dict_ent)

return data

def _get_stock_data_concurrently(self, tickers, statement_type='income', tech_type='', report_name='', hist_obj={}):
data = {}

with Pool(self._get_worker_count()) as pool:
dict_ents = pool.map(partial(self._safe_create_dict_ent,
statement_type=statement_type,
tech_type=tech_type,
report_name=report_name,
hist_obj=hist_obj), tickers)
for dict_ent in dict_ents:
if dict_ent:
data.update(dict_ent)

pool.close()
pool.join()
if self.concurrent:
with Pool(self._get_worker_count()) as pool:
dict_ents = pool.map(partial(self._create_dict_ent,
statement_type=statement_type,
tech_type=tech_type,
report_name=report_name,
hist_obj=hist_obj), self.ticker)
for dict_ent in dict_ents:
data.update(dict_ent)
pool.close()
pool.join()
else:
for tick in self.ticker:
try:
dict_ent = self._create_dict_ent(tick, statement_type, tech_type, report_name, hist_obj)
data.update(dict_ent)
except ManagedException:
logging.warning("yahoofinancials ticker: %s error getting %s - %s\n\tContinuing extraction...",
str(tick), statement_type, str(ManagedException))
continue
return data

# Public Method to get technical stock data
Expand Down

0 comments on commit 2482bd0

Please sign in to comment.