Skip to content

Commit

Permalink
Update fast_upload.py
Browse files Browse the repository at this point in the history
Added function to upload large dataframes to sql by chunks
  • Loading branch information
jafetcc02 committed Dec 19, 2023
1 parent 020f3c4 commit cee3855
Showing 1 changed file with 46 additions and 1 deletion.
47 changes: 46 additions & 1 deletion pydbsmgr/fast_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,51 @@ def _infer_schema_query(self, datatype: str) -> str:
else:
raise ValueError("Data type could not be inferred!")

class DFChunksToSQL(DataFrameToSQL):
"""Allows you to create a table using chunks of a dataframe efficiently through DataFrameToSQL class"""

def __init__(self, connection_string: str) -> None:
"""Set the connection with database using DataFrameToSQL class"""
super().__init__(connection_string)


def upload_chunks(
self,
df: DataFrame,
table_name: str,
chunk_size: int,
overwrite: bool = True,
char_length: int = 512,
override_length: bool = True,
close_cursor: bool = True,
auto_resolve: bool = True
) -> None:

"""Check number of chunks corresponds to dataframe"""
assert len(df) > chunk_size, "chunk_size cant be bigger than length of df, change chunk size"


""" Obtain chunks of DataFrame"""
if auto_resolve:
if len(df)>= 0.5e6:
n = int((df).shape[0] * 0.01)
df_chunks = [(df)[i : i + n] for i in range(0, (df).shape[0], n)]
else:
df_chunks = np.array_split(df, chunk_size)
else:
df_chunks = np.array_split(df, chunk_size)

""" Insert First Chunk """

self.import_table(df_chunks[0], table_name, overwrite, char_length, override_length, close_cursor)

""" Insert Chunks"""

for i in range(1, len(df_chunks)):
self.upload_table(df_chunks[i], table_name, overwrite)




########################################################################################

Expand All @@ -176,4 +221,4 @@ def _infer_schema_query(self, datatype: str) -> str:
data = {"Name": ["Alexis", "Ivan", "Cordero"], "Age": [27, 27, 28]}
df = pd.DataFrame(data)

upload_from_df.upload_table(df, table_name)
upload_from_df.upload_table(df, table_name)

0 comments on commit cee3855

Please sign in to comment.