Skip to content

Commit

Permalink
[FIX] process_dates function
Browse files Browse the repository at this point in the history
* add `errors` handler
  • Loading branch information
jzsmoreno committed Apr 22, 2024
1 parent acbb8be commit 74d7f11
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pydbsmgr/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.9.4
0.9.5
10 changes: 7 additions & 3 deletions pydbsmgr/lightest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pydbsmgr.utils.tools import coerce_datetime, most_repeated_item


def process_dates(x: str, format_type: str, auxiliary_type: str) -> str:
def process_dates(x: str, format_type: str, auxiliary_type: str, errors: str = "ignore") -> str:
"""Auxiliary function in date type string processing
Parameters
Expand Down Expand Up @@ -43,7 +43,7 @@ def process_dates(x: str, format_type: str, auxiliary_type: str) -> str:
except:
if auxiliary_type is not None:
x = str(pd.to_datetime(x, format=auxiliary_type, errors="ignore"))[:10]
else:
elif errors == "raise":
raise ValueError("Date value does not match the expected format.")
else:
if str(x).find(":") != -1:
Expand All @@ -52,7 +52,7 @@ def process_dates(x: str, format_type: str, auxiliary_type: str) -> str:
except:
if auxiliary_type is not None:
x = str(pd.to_datetime(x[:8], format=auxiliary_type, errors="ignore"))[:10]
else:
elif errors == "raise":
raise ValueError("Date value does not match the expected format.")
return x

Expand Down Expand Up @@ -92,6 +92,7 @@ def clean_frame(
table = (self.df).copy()
cols = table.columns
table_sample = table.sample(frac=sample_frac)
errors = kwargs["errors"] if "errors" in kwargs else "ignore"
for column_index, datatype in enumerate(table.dtypes):
if datatype == "object":
datetype_column = (
Expand All @@ -116,6 +117,7 @@ def clean_frame(
process_dates,
format_type=format_type,
auxiliary_type=None,
errors=errors,
)
vpartial_dates = np.vectorize(partial_dates)
table[cols[column_index]] = vpartial_dates(table[cols[column_index]])
Expand All @@ -125,6 +127,7 @@ def clean_frame(
process_dates,
format_type=format_type,
auxiliary_type=None,
errors=errors,
)
vpartial_dates = np.vectorize(partial_dates)
table[cols[column_index]] = vpartial_dates(table[cols[column_index]])
Expand All @@ -134,6 +137,7 @@ def clean_frame(
process_dates,
format_type=format_type,
auxiliary_type=None,
errors=errors,
)
vpartial_dates = np.vectorize(partial_dates)
table[cols[column_index]] = vpartial_dates(table[cols[column_index]])
Expand Down
2 changes: 1 addition & 1 deletion test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def lightest_with_data() -> Callable:
}
)
handler = LightCleaner(df)
df = handler.clean_frame(sample_frac=1.0, fast_execution=False)
df = handler.clean_frame(sample_frac=1.0, fast_execution=False, errors="raise")

return (
df["fecha"].astype(str).to_list(),
Expand Down

0 comments on commit 74d7f11

Please sign in to comment.