Update schema_config.py

jzsmoreno · Apr 10, 2024 · 0e90ea8 · 0e90ea8
1 parent d967dfd
commit 0e90ea8
Show file tree

Hide file tree

Showing 5 changed files with 64 additions and 21 deletions.
diff --git a/merge_by_lev/VERSION b/merge_by_lev/VERSION
@@ -0,0 +1 @@
+0.2.8
diff --git a/merge_by_lev/__init__.py b/merge_by_lev/__init__.py
@@ -1,6 +1,6 @@
 """
 Authors: J. A. Moreno-Guerra
-Last modification: 05/23/2022
+Last modification: 04/09/2024
 Corresponding author: [email protected] 
 """
 

diff --git a/merge_by_lev/schema_config.py b/merge_by_lev/schema_config.py
@@ -8,30 +8,32 @@
 from pydbsmgr.main import *
 from pydbsmgr.main import DataFrame
 from pydbsmgr.utils.azure_sdk import *
+import pandas as pd
 
 
 class StandardColumns:
-    """Allows generic column renaming and creates a `.json` file with the equivalent names.
-    Allows columns containing questions to be transformed according to `SQL` standards."""
+    """Allows columns to be transformed according to `SQL` standards
+    or creates a `.json` file with the obfuscated columns."""
 
     def __init__(self, df: DataFrame) -> None:
         self.df = df.copy()
 
     def get_frame(
         self,
         json_name: str = "output.json",
-        write_to_cloud: bool = True,
+        write_to_cloud: bool = False,
         connection_string: str = "",
         container_name: str = "",
         overwrite: bool = True,
         encoding: str = "utf-8",
         get_standard: bool = True,
+        **kwargs,
     ) -> DataFrame:
-        """Returns the `DataFrame` with the obfuscated columns.
+        """Returns the `DataFrame` with the obfuscated columns or SQL standard format.
 
         Args:
             json_name (`str`, optional): name of the dictionary `.json` file. By default it is set to `output.json`.
-            write_to_cloud (`bool`, optional): boolean variable to write to an Azure storage account. By default it is set to `True`.
+            write_to_cloud (`bool`, optional): boolean variable to write to an Azure storage account. By default it is set to `False`.
             connection_string (`str`, optional): the connection string to storage account. By default it is set to "".
             container_name (`str`, optional): Azure container name. By default it is set to "".
             overwrite (`bool`, optional): boolean variable that indicates whether to overwrite. By default it is set to `True`.
@@ -40,27 +42,56 @@ def get_frame(
 
         Returns:
             `DataFrame`: `DataFrame` with changed columns
+
+        Keyword Arguments:
+        ----------
+        - snake_case (`bool`, optional): If true - transforms column names into snake
+            case otherwise camel case will be used. Default is `True`.
+        - sort (`bool`, optional): If true - sorts columns by their names in alphabetical order.
+            Default is `False`.
+        - surrounding (`bool`, optional): If true - removes brackets from column names before transformation.
+            Default is `True`.
         """
         self._generate_dict(encoding)
         self._writer(json_name, write_to_cloud, connection_string, container_name, overwrite)
         if get_standard:
-            df_renamed = self._sql_standards()
+            df_renamed = self._sql_standards(**kwargs)
         else:
             df_renamed = (self.df).rename(columns=self.obfuscator)
         return df_renamed
 
-    def _sql_standards(self) -> DataFrame:
+    def _sql_standards(
+        self, snake_case: bool = True, sort: bool = False, surrounding: bool = True
+    ) -> DataFrame:
+        """Transforms all column names into SQL standard format.
+
+        Args:
+            snake_case (`bool`, optional): If true - transforms column names into snake
+            case otherwise camel case will be used. Default is `True`.
+            sort (`bool`, optional): If true - sorts columns by their names in alphabetical order.
+            Default is `False`.
+            surrounding (`bool`, optional): If true - removes brackets from column names before transformation.
+            Default is `True`.
+        Returns:
+            `DataFrame`: `DataFrame` with transformed columns.
+
+        """
         df = (self.df).copy()
+        if surrounding:
+            df.columns = [col[1:-1] for col in df.columns]
         df.columns = df.columns.str.lower()
-        df.columns = df.columns.str.replace("_", " ")
-        df.columns = df.columns.str.replace("__", " ")
+        df.columns = df.columns.str.replace("_+", " ", regex=True)
         df.columns = df.columns.str.title()
         df.columns = df.columns.str.strip()
         df.columns = df.columns.str.replace(" ", "")
         df.columns = df.columns.str.replace("\n", "_")
-        df.columns = [self._camel_to_snake(col) for col in df.columns]
+        if snake_case:
+            df.columns = [self._camel_to_snake(col) for col in df.columns]
         df.columns = [self._truncate(col) for col in df.columns]
-        # df = self._sort_columns_by_length(df)
+        if sort:
+            df = self._sort_columns_by_length(df)
+        if surrounding:
+            df.columns = [f"[{col}]" for col in df.columns]
         return df
 
     def _truncate(self, column_name: str) -> str:
@@ -327,5 +358,5 @@ def get_table(self) -> Table:
     schema = data_handler.get_schema()
     table = data_handler.get_table()
     column_handler = StandardColumns(df)
-    df = column_handler.get_frame(write_to_cloud=False)
+    df = column_handler.get_frame(surrounding=False)
     breakpoint()
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 numpy
 pandas
 tabulate
-git+https://github.com/jzsmoreno/pydbsmgr
+pydbsmgr
 pyarrow
diff --git a/setup.py b/setup.py
@@ -1,11 +1,27 @@
+from pathlib import Path
+
 import setuptools
+from pip._internal.req import parse_requirements
+
+# Parse the requirements.txt file
+requirements = parse_requirements("requirements.txt", session="hack")
+
+# Get the list of requirements as strings
+install_requires = [str(req.requirement) for req in requirements]
 
 with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 
+about = {}
+ROOT_DIR = Path(__file__).resolve().parent
+PACKAGE_DIR = ROOT_DIR / "merge_by_lev"
+with open(PACKAGE_DIR / "VERSION") as f:
+    _version = f.read().strip()
+    about["__version__"] = _version
+
 setuptools.setup(
     name="merge_by_lev",
-    version="0.2.7",
+    version=about["__version__"],
     author="J. A. Moreno-Guerra",
     author_email="[email protected]",
     description="Testing installation of Package",
@@ -15,12 +31,7 @@
     project_urls={"Bug Tracker": "https://github.com/jzsmoreno/merge_by_lev"},
     license="MIT",
     packages=["merge_by_lev"],
-    install_requires=[
-        "numpy",
-        "pandas",
-        "tabulate",
-        "pyarrow",
-    ],
+    install_requires=install_requires,
     classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License",