Skip to content

Commit

Permalink
Update tools.py
Browse files Browse the repository at this point in the history
* Improved date logic
  • Loading branch information
jzsmoreno committed Oct 21, 2023
1 parent cf373db commit e5c1f3c
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 8 deletions.
16 changes: 13 additions & 3 deletions pydbsmgr/lightest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,20 @@ def clean_frame(self) -> DataFrame:
for column_index, datatype in enumerate(table.dtypes):
if datatype == "object" or datatype == "datetime64[ns]":
x = (table[cols[column_index]].values)[0]
datetype_column = True
if isinstance(x, str):
if (x.find("/") != -1 or x.find("-")) != -1 and not (
x.find("//") or x.find("\\")
) != -1:
if (
x == ""
or x.find("/") != -1
or x.find("-") != -1
or x == np.datetime64("NaT")
):
datetype_column = (
(table[cols[column_index]].apply(check_if_contains_dates))
.isin([True])
.any()
)
if not (x.find("//") or x.find("\\")) != -1 and datetype_column:
with concurrent.futures.ThreadPoolExecutor() as executor:
table[cols[column_index]] = list(
executor.map(clean_and_convert_to, table[cols[column_index]])
Expand Down
11 changes: 11 additions & 0 deletions pydbsmgr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@
########################################################################################


def check_if_contains_dates(input_string: str) -> bool:
"""Check if a string contains date."""
if input_string == "":
return False
else:
if re.search(r"\d{4}(-|/)\d{1,2}(-|/)\d{1,2}", str(input_string)):
return True
else:
return False


def remove_numeric_char(input_string: str) -> str:
"""Remove all numeric characters from a string.
Expand Down
16 changes: 12 additions & 4 deletions pydbsmgr/utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pandas.errors import IntCastingNaNError
from pyarrow import Table

from pydbsmgr.main import is_number_regex
from pydbsmgr.main import is_number_regex, check_if_contains_dates


class ColumnsCheck:
Expand Down Expand Up @@ -280,10 +280,18 @@ def _check_datetime(self) -> None:
col = cols[column_index]
if datatype == "object":
x = (df_[col].values)[0]
datetype_column = True
if isinstance(x, str):
if (x.find("/") != -1 or x.find("-")) != -1 and not (
x.find("//") or x.find("\\")
) != -1:
if (
x == ""
or x.find("/") != -1
or x.find("-") != -1
or x == np.datetime64("NaT")
):
datetype_column = (
(df_[col].apply(check_if_contains_dates)).isin([True]).any()
)
if not (x.find("//") or x.find("\\")) != -1 and datetype_column:
try:
with concurrent.futures.ThreadPoolExecutor() as executor:
df_[col] = list(
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="pydbsmgr",
version="0.5.7",
version="0.5.8",
author="J. A. Moreno-Guerra",
author_email="[email protected]",
description="Testing installation of Package",
Expand Down

0 comments on commit e5c1f3c

Please sign in to comment.