Skip to content

Commit

Permalink
Merge pull request #112 from JECSand/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
JECSand authored Jan 10, 2023
2 parents 342d6b9 + a721d81 commit 00e6e19
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 85 deletions.
1 change: 1 addition & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@
1.6 10/18/2020 -- Merged in two pull requests with bug fixes from sylvandb.
1.7 01/01/2023 -- Merged in pull request with fixes from sedwards2000.
1.7 01/01/2023 -- Support for python 2.7 dropped.
1.8 01/09/2023 -- Improved decryption support
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ A python module that returns stock, cryptocurrency, forex, mutual fund, commodit
.. image:: https://github.com/JECSand/yahoofinancials/actions/workflows/test.yml/badge.svg?branch=master
:target: https://github.com/JECSand/yahoofinancials/actions/workflows/test.yml

Current Version: v1.7
Current Version: v1.8

Version Released: 01/01/2023
Version Released: 01/09/2023

Report any bugs by opening an issue here: https://github.com/JECSand/yahoofinancials/issues

Expand All @@ -22,7 +22,7 @@ A powerful financial data module used for pulling both fundamental and technical
Installation
-------------
- yahoofinancials runs on Python 3.6, 3.7, 3.8, 3.9, and 3.10.
- This package depends on beautifulsoup4, pytz, and pycryptodome to work.
- This package depends on beautifulsoup4, pytz, and cryptography to work.

1. Installation using pip:

Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,20 @@

setup(
name='yahoofinancials',
version='1.7',
version='1.8',
description='A powerful financial data module used for pulling both fundamental and technical data from Yahoo Finance',
long_description=long_description,
url='https://github.com/JECSand/yahoofinancials',
download_url='https://github.com/JECSand/yahoofinancials/archive/1.7.tar.gz',
download_url='https://github.com/JECSand/yahoofinancials/archive/1.8.tar.gz',
author='Connor Sanders',
author_email='[email protected]',
license='MIT',
keywords=['finance data', 'stocks', 'commodities', 'cryptocurrencies', 'currencies', 'forex', 'yahoo finance'],
packages=['yahoofinancials'],
install_requires=[
"beautifulsoup4",
"beautifulsoup4>=4.11.1",
"pytz",
"pycryptodome"
"cryptography>=3.3.2"
],
classifiers=[
'Development Status :: 5 - Production/Stable',
Expand Down
145 changes: 67 additions & 78 deletions yahoofinancials/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""
==============================
The Yahoo Financials Module
Version: 1.7
Version: 1.8
==============================
Author: Connor Sanders
Email: [email protected]
Version Released: 01/01/2023
Version Released: 01/09/2023
Tested on Python 3.6, 3.7, 3.8, 3.9, and 3.10
Copyright (c) 2023 Connor Sanders
Expand Down Expand Up @@ -55,13 +55,13 @@
from urllib import FancyURLopener
except:
from urllib.request import FancyURLopener

import hashlib
from base64 import b64decode
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
import json


# track the last get timestamp to add a minimum delay between gets - be nice!
_lastget = 0

Expand All @@ -76,6 +76,62 @@ class UrlOpener(FancyURLopener):
version = 'w3m/0.5.3+git20180125'


def decrypt_cryptojs_aes(data):
encrypted_stores = data['context']['dispatcher']['stores']
_cs = data["_cs"]
_cr = data["_cr"]
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
encrypted_stores = b64decode(encrypted_stores)
assert encrypted_stores[0:8] == b"Salted__"
salt = encrypted_stores[8:16]
encrypted_stores = encrypted_stores[16:]

def EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") -> tuple:
"""OpenSSL EVP Key Derivation Function
Args:
password (Union[str, bytes, bytearray]): Password to generate key from.
salt (Union[bytes, bytearray]): Salt to use.
keySize (int, optional): Output key length in bytes. Defaults to 32.
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16.
iterations (int, optional): Number of iterations to perform. Defaults to 1.
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'.
Returns:
key, iv: Derived key and Initialization Vector (IV) bytes.
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78
"""
assert iterations > 0, "Iterations can not be less than 1."
if isinstance(password, str):
password = password.encode("utf-8")
final_length = keySize + ivSize
key_iv = b""
block = None
while len(key_iv) < final_length:
hasher = hashlib.new(hashAlgorithm)
if block:
hasher.update(block)
hasher.update(password)
hasher.update(salt)
block = hasher.digest()
for _ in range(1, iterations):
block = hashlib.new(hashAlgorithm, block).digest()
key_iv += block
key, iv = key_iv[:keySize], key_iv[keySize:final_length]
return key, iv

key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
decryptor = cipher.decryptor()
plaintext = decryptor.update(encrypted_stores) + decryptor.finalize()
unpadder = padding.PKCS7(128).unpadder()
plaintext = unpadder.update(plaintext) + unpadder.finalize()
plaintext = plaintext.decode("utf-8")
decoded_stores = json.loads(plaintext)
return decoded_stores


# Class containing Yahoo Finance ETL Functionality
class YahooFinanceETL(object):

Expand Down Expand Up @@ -168,7 +224,11 @@ def _scrape_data(self, url, tech_type, statement_type):
raise ManagedException("Server replied with HTTP " + str(response.getcode()) +
" code while opening the url: " + str(url))
data = self._cache[url]
data = self._decryptData(data)
if "_cs" in data and "_cr" in data:
data = decrypt_cryptojs_aes(data)
if "context" in data and "dispatcher" in data["context"]:
# Keep old code, just in case
data = data['context']['dispatcher']['stores']
if tech_type == '' and statement_type != 'history':
stores = data["QuoteSummaryStore"]
elif tech_type != '' and statement_type != 'history':
Expand All @@ -177,74 +237,6 @@ def _scrape_data(self, url, tech_type, statement_type):
stores = data["HistoricalPriceStore"]
return stores

def _decryptData(self,data):
#function taken from another package at https://github.com/ranaroussi/yfinance/pull/1253/commits/8e5f0984af347afda6be74b27a989422e49a975b
encrypted_stores = data['context']['dispatcher']['stores']
_cs = data["_cs"]
_cr = data["_cr"]

_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()

encrypted_stores = b64decode(encrypted_stores)
assert encrypted_stores[0:8] == b"Salted__"
salt = encrypted_stores[8:16]
encrypted_stores = encrypted_stores[16:]

def EVPKDF(
password,
salt,
keySize=32,
ivSize=16,
iterations=1,
hashAlgorithm="md5",
) -> tuple:
"""OpenSSL EVP Key Derivation Function
Args:
password (Union[str, bytes, bytearray]): Password to generate key from.
salt (Union[bytes, bytearray]): Salt to use.
keySize (int, optional): Output key length in bytes. Defaults to 32.
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16.
iterations (int, optional): Number of iterations to perform. Defaults to 1.
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'.
Returns:
key, iv: Derived key and Initialization Vector (IV) bytes.
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78
"""

assert iterations > 0, "Iterations can not be less than 1."

if isinstance(password, str):
password = password.encode("utf-8")

final_length = keySize + ivSize
key_iv = b""
block = None

while len(key_iv) < final_length:
hasher = hashlib.new(hashAlgorithm)
if block:
hasher.update(block)
hasher.update(password)
hasher.update(salt)
block = hasher.digest()
for _ in range(1, iterations):
block = hashlib.new(hashAlgorithm, block).digest()
key_iv += block

key, iv = key_iv[:keySize], key_iv[keySize:final_length]
return key, iv

key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")

cipher = AES.new(key, AES.MODE_CBC, iv=iv)
plaintext = cipher.decrypt(encrypted_stores)
plaintext = unpad(plaintext, 16, style="pkcs7")
decoded_stores = json.loads(plaintext)

return decoded_stores

# Private static method to determine if a numerical value is in the data object being cleaned
@staticmethod
def _determine_numeric_value(value_dict):
Expand All @@ -260,7 +252,7 @@ def _format_time(self, in_time):
utc_dt = self._convert_to_utc(form_date_time)
return utc_dt

# Private method to return the a sub dictionary entry for the earning report cleaning
# Private method to return a sub dictionary entry for the earning report cleaning
def _get_cleaned_sub_dict_ent(self, key, val_list):
sub_list = []
for rec in val_list:
Expand Down Expand Up @@ -325,9 +317,6 @@ def _clean_reports(self, raw_data):
dict_ent = {k: formatted_date}
elif v is None or isinstance(v, str) or isinstance(v, int) or isinstance(v, float):
dict_ent = {k: v}
# Python 2 and Unicode
elif sys.version_info < (3, 0) and isinstance(v, unicode):
dict_ent = {k: v}
else:
numerical_val = self._determine_numeric_value(v)
dict_ent = {k: numerical_val}
Expand Down

0 comments on commit 00e6e19

Please sign in to comment.