-
Notifications
You must be signed in to change notification settings - Fork 215
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #112 from JECSand/development
Development
- Loading branch information
Showing
4 changed files
with
75 additions
and
85 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,20 +10,20 @@ | |
|
||
setup( | ||
name='yahoofinancials', | ||
version='1.7', | ||
version='1.8', | ||
description='A powerful financial data module used for pulling both fundamental and technical data from Yahoo Finance', | ||
long_description=long_description, | ||
url='https://github.com/JECSand/yahoofinancials', | ||
download_url='https://github.com/JECSand/yahoofinancials/archive/1.7.tar.gz', | ||
download_url='https://github.com/JECSand/yahoofinancials/archive/1.8.tar.gz', | ||
author='Connor Sanders', | ||
author_email='[email protected]', | ||
license='MIT', | ||
keywords=['finance data', 'stocks', 'commodities', 'cryptocurrencies', 'currencies', 'forex', 'yahoo finance'], | ||
packages=['yahoofinancials'], | ||
install_requires=[ | ||
"beautifulsoup4", | ||
"beautifulsoup4>=4.11.1", | ||
"pytz", | ||
"pycryptodome" | ||
"cryptography>=3.3.2" | ||
], | ||
classifiers=[ | ||
'Development Status :: 5 - Production/Stable', | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,12 @@ | ||
""" | ||
============================== | ||
The Yahoo Financials Module | ||
Version: 1.7 | ||
Version: 1.8 | ||
============================== | ||
Author: Connor Sanders | ||
Email: [email protected] | ||
Version Released: 01/01/2023 | ||
Version Released: 01/09/2023 | ||
Tested on Python 3.6, 3.7, 3.8, 3.9, and 3.10 | ||
Copyright (c) 2023 Connor Sanders | ||
|
@@ -55,13 +55,13 @@ | |
from urllib import FancyURLopener | ||
except: | ||
from urllib.request import FancyURLopener | ||
|
||
import hashlib | ||
from base64 import b64decode | ||
from Crypto.Cipher import AES | ||
from Crypto.Util.Padding import unpad | ||
from cryptography.hazmat.primitives import padding | ||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes | ||
import json | ||
|
||
|
||
# track the last get timestamp to add a minimum delay between gets - be nice! | ||
_lastget = 0 | ||
|
||
|
@@ -76,6 +76,62 @@ class UrlOpener(FancyURLopener): | |
version = 'w3m/0.5.3+git20180125' | ||
|
||
|
||
def decrypt_cryptojs_aes(data): | ||
encrypted_stores = data['context']['dispatcher']['stores'] | ||
_cs = data["_cs"] | ||
_cr = data["_cr"] | ||
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"]) | ||
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex() | ||
encrypted_stores = b64decode(encrypted_stores) | ||
assert encrypted_stores[0:8] == b"Salted__" | ||
salt = encrypted_stores[8:16] | ||
encrypted_stores = encrypted_stores[16:] | ||
|
||
def EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") -> tuple: | ||
"""OpenSSL EVP Key Derivation Function | ||
Args: | ||
password (Union[str, bytes, bytearray]): Password to generate key from. | ||
salt (Union[bytes, bytearray]): Salt to use. | ||
keySize (int, optional): Output key length in bytes. Defaults to 32. | ||
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16. | ||
iterations (int, optional): Number of iterations to perform. Defaults to 1. | ||
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'. | ||
Returns: | ||
key, iv: Derived key and Initialization Vector (IV) bytes. | ||
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3 | ||
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78 | ||
""" | ||
assert iterations > 0, "Iterations can not be less than 1." | ||
if isinstance(password, str): | ||
password = password.encode("utf-8") | ||
final_length = keySize + ivSize | ||
key_iv = b"" | ||
block = None | ||
while len(key_iv) < final_length: | ||
hasher = hashlib.new(hashAlgorithm) | ||
if block: | ||
hasher.update(block) | ||
hasher.update(password) | ||
hasher.update(salt) | ||
block = hasher.digest() | ||
for _ in range(1, iterations): | ||
block = hashlib.new(hashAlgorithm, block).digest() | ||
key_iv += block | ||
key, iv = key_iv[:keySize], key_iv[keySize:final_length] | ||
return key, iv | ||
|
||
key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") | ||
cipher = Cipher(algorithms.AES(key), modes.CBC(iv)) | ||
decryptor = cipher.decryptor() | ||
plaintext = decryptor.update(encrypted_stores) + decryptor.finalize() | ||
unpadder = padding.PKCS7(128).unpadder() | ||
plaintext = unpadder.update(plaintext) + unpadder.finalize() | ||
plaintext = plaintext.decode("utf-8") | ||
decoded_stores = json.loads(plaintext) | ||
return decoded_stores | ||
|
||
|
||
# Class containing Yahoo Finance ETL Functionality | ||
class YahooFinanceETL(object): | ||
|
||
|
@@ -168,7 +224,11 @@ def _scrape_data(self, url, tech_type, statement_type): | |
raise ManagedException("Server replied with HTTP " + str(response.getcode()) + | ||
" code while opening the url: " + str(url)) | ||
data = self._cache[url] | ||
data = self._decryptData(data) | ||
if "_cs" in data and "_cr" in data: | ||
data = decrypt_cryptojs_aes(data) | ||
if "context" in data and "dispatcher" in data["context"]: | ||
# Keep old code, just in case | ||
data = data['context']['dispatcher']['stores'] | ||
if tech_type == '' and statement_type != 'history': | ||
stores = data["QuoteSummaryStore"] | ||
elif tech_type != '' and statement_type != 'history': | ||
|
@@ -177,74 +237,6 @@ def _scrape_data(self, url, tech_type, statement_type): | |
stores = data["HistoricalPriceStore"] | ||
return stores | ||
|
||
def _decryptData(self,data): | ||
#function taken from another package at https://github.com/ranaroussi/yfinance/pull/1253/commits/8e5f0984af347afda6be74b27a989422e49a975b | ||
encrypted_stores = data['context']['dispatcher']['stores'] | ||
_cs = data["_cs"] | ||
_cr = data["_cr"] | ||
|
||
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"]) | ||
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex() | ||
|
||
encrypted_stores = b64decode(encrypted_stores) | ||
assert encrypted_stores[0:8] == b"Salted__" | ||
salt = encrypted_stores[8:16] | ||
encrypted_stores = encrypted_stores[16:] | ||
|
||
def EVPKDF( | ||
password, | ||
salt, | ||
keySize=32, | ||
ivSize=16, | ||
iterations=1, | ||
hashAlgorithm="md5", | ||
) -> tuple: | ||
"""OpenSSL EVP Key Derivation Function | ||
Args: | ||
password (Union[str, bytes, bytearray]): Password to generate key from. | ||
salt (Union[bytes, bytearray]): Salt to use. | ||
keySize (int, optional): Output key length in bytes. Defaults to 32. | ||
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16. | ||
iterations (int, optional): Number of iterations to perform. Defaults to 1. | ||
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'. | ||
Returns: | ||
key, iv: Derived key and Initialization Vector (IV) bytes. | ||
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3 | ||
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78 | ||
""" | ||
|
||
assert iterations > 0, "Iterations can not be less than 1." | ||
|
||
if isinstance(password, str): | ||
password = password.encode("utf-8") | ||
|
||
final_length = keySize + ivSize | ||
key_iv = b"" | ||
block = None | ||
|
||
while len(key_iv) < final_length: | ||
hasher = hashlib.new(hashAlgorithm) | ||
if block: | ||
hasher.update(block) | ||
hasher.update(password) | ||
hasher.update(salt) | ||
block = hasher.digest() | ||
for _ in range(1, iterations): | ||
block = hashlib.new(hashAlgorithm, block).digest() | ||
key_iv += block | ||
|
||
key, iv = key_iv[:keySize], key_iv[keySize:final_length] | ||
return key, iv | ||
|
||
key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") | ||
|
||
cipher = AES.new(key, AES.MODE_CBC, iv=iv) | ||
plaintext = cipher.decrypt(encrypted_stores) | ||
plaintext = unpad(plaintext, 16, style="pkcs7") | ||
decoded_stores = json.loads(plaintext) | ||
|
||
return decoded_stores | ||
|
||
# Private static method to determine if a numerical value is in the data object being cleaned | ||
@staticmethod | ||
def _determine_numeric_value(value_dict): | ||
|
@@ -260,7 +252,7 @@ def _format_time(self, in_time): | |
utc_dt = self._convert_to_utc(form_date_time) | ||
return utc_dt | ||
|
||
# Private method to return the a sub dictionary entry for the earning report cleaning | ||
# Private method to return a sub dictionary entry for the earning report cleaning | ||
def _get_cleaned_sub_dict_ent(self, key, val_list): | ||
sub_list = [] | ||
for rec in val_list: | ||
|
@@ -325,9 +317,6 @@ def _clean_reports(self, raw_data): | |
dict_ent = {k: formatted_date} | ||
elif v is None or isinstance(v, str) or isinstance(v, int) or isinstance(v, float): | ||
dict_ent = {k: v} | ||
# Python 2 and Unicode | ||
elif sys.version_info < (3, 0) and isinstance(v, unicode): | ||
dict_ent = {k: v} | ||
else: | ||
numerical_val = self._determine_numeric_value(v) | ||
dict_ent = {k: numerical_val} | ||
|