diff --git a/doc/_templates/demo_tabular_pipeline.html b/doc/_templates/demo_tabular_pipeline.html index b52b60a84..13f2c732b 100644 --- a/doc/_templates/demo_tabular_pipeline.html +++ b/doc/_templates/demo_tabular_pipeline.html @@ -47,26 +47,26 @@ - 0 - F - POL - Department of Police - MSB Information Mgmt and... + 1 + M + FRS + Fire and Rescue Services + Third Battalion - Administration Fulltime-Regular - Office Services Coordinator - 09/22/1986 - 1986 + Fire/Rescue Lieutenant + 06/07/2004 + 2004 - 1 + 2 M - POL - Department of Police - ISB Major Crimes... + HHS + Department of Health and Human Services + Environmental Health and Regulatory Services Fulltime-Regular - Master Police Officer - 09/12/1988 - 1988 + Environmental Health Specialist III + 02/20/2007 + 2007 ... @@ -82,24 +82,24 @@ 9226 M - CCL - County Council - Council Central Staff + DGS + Department of General Services + Facilities Maintenance Fulltime-Regular - Manager II - 09/05/2006 - 2006 + Master Plumber + 03/26/2001 + 2001 9227 - M - DLC - Department of Liquor Control - Licensure, Regulation... + F + HHS + Department of Health and Human Services + Infants and Toddlers Fulltime-Regular - Alcohol/Tobacco Enforcement Specialist II - 01/30/2012 - 2012 + Program Specialist II + 03/25/2013 + 2013 diff --git a/doc/table_report.py b/doc/table_report.py index 639af7e35..01476db12 100644 --- a/doc/table_report.py +++ b/doc/table_report.py @@ -4,6 +4,8 @@ def generate_demo(): X = fetch_employee_salaries().X + X = X.sample(frac=1, random_state=145).reset_index(drop=True) + with open( "_templates/demo_table_report_generated.html", "w", encoding="utf-8" ) as f: diff --git a/skrub/_to_datetime.py b/skrub/_to_datetime.py index 81bbeb577..2d3b0356f 100644 --- a/skrub/_to_datetime.py +++ b/skrub/_to_datetime.py @@ -91,7 +91,7 @@ class ToDatetime(SingleColumnTransformer): Parameters ---------- - format : str or None, optional, default=None + format : str or None or a given list of str, optional, default=None Format to use for parsing dates that are stored as strings, e.g. ``"%Y-%m-%dT%H:%M%S"``. If not specified, the format is inferred from the data when possible. @@ -424,7 +424,7 @@ def transform(self, column): return sbd.cast(column, self.output_dtype_) def _get_datetime_format(self, column): - if self.format is not None: + if self.format is not None and isinstance(self.format, str) : return self.format not_null = sbd.drop_nulls(column) sample = sbd.sample( @@ -432,6 +432,13 @@ def _get_datetime_format(self, column): ) if not sbd.is_string(sample): return None + if isinstance(self.format, list): + for format_option in self.format: + try: + sbd.to_datetime(column, format=format_option, strict=False) + return proposed_format + except Exception: + continue return _guess_datetime_format(sample)