Skip to content

Commit

Permalink
Update fast_upload.py
Browse files Browse the repository at this point in the history
* Add `_infer_schema_query` method
  • Loading branch information
jzsmoreno committed Nov 15, 2023
1 parent d8da101 commit 4a9f399
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 26 deletions.
47 changes: 22 additions & 25 deletions pydbsmgr/fast_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,6 @@
class DataFrameToSQL(ColumnsCheck):
"""Allows you to create a table from a dataframe"""

sql_types = [
"FLOAT",
"INT",
"BIGINT",
"DATE",
"VARCHAR(MAX)",
"BIT",
"VARCHAR(MAX)",
"INT",
"BIGINT",
]
pandas_types = [
"float64",
"int32",
"int64",
"datetime64[ns]",
"object",
"bool",
"category",
"Int32",
"Int64",
]
datatype_dict = dict(zip(pandas_types, sql_types))

def __init__(self, connection_string: str) -> None:
"""Set the connection with the database"""
self._connection_string = connection_string
Expand Down Expand Up @@ -134,7 +110,7 @@ def _create_table_query(
query = "CREATE TABLE " + table_name + "("
for j, column in enumerate(df.columns):
matches = re.findall(r"([^']*)", str(df.iloc[:, j].dtype))
dtype = self.datatype_dict[matches[0]]
dtype = self._infer_schema_query(matches[0])
if dtype == "VARCHAR(MAX)":
element = max(list(df[column].astype(str)), key=len)
max_string_length = len(element)
Expand All @@ -153,6 +129,27 @@ def _insert_table_query(self, table_name: str, df: DataFrame) -> str:
query = query.format(",".join(df.columns), ",".join("?" * len(df.columns)))
return query

def _infer_schema_query(self, datatype: str) -> str:
"""Infer schema from a given datatype string"""
datatype = datatype.lower()
if datatype.find("float") != -1:
return "FLOAT"
elif datatype.find("int") != -1:
if datatype.find("64") != -1:
return "BIGINT"
else:
return "INT"
elif datatype.find("datetime") != -1:
return "DATE"
elif datatype.find("object") != -1:
return "VARCHAR(MAX)"
elif datatype.find("category") != -1:
return "VARCHAR(MAX)"
elif datatype.find("bool") != -1:
return "BIT"
else:
raise ValueError("Data type could not be inferred!")


########################################################################################

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="pydbsmgr",
version="0.7.2",
version="0.7.3",
author="J. A. Moreno-Guerra",
author_email="[email protected]",
description="Testing installation of Package",
Expand Down

0 comments on commit 4a9f399

Please sign in to comment.