Update documentation (#25)

jzsmoreno · May 10, 2024 · 1da7d92 · 1da7d92
1 parent 8c0763c
commit 1da7d92
Show file tree

Hide file tree

Showing 10 changed files with 159 additions and 103 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -0,0 +1,5 @@
+# This is a CODEOWNERS file
+# Each line specifies a file pattern followed by one or more GitHub usernames or team names
+
+# Owners for the entire repository
+*       @jzsmoreno
diff --git a/.github/workflows/generate-docs.yml b/.github/workflows/generate-docs.yml
@@ -3,7 +3,12 @@ name: Auto-documentation Generation
 on:
   push:
     branches:
-      - main
+      - 'main'
+    paths:
+      - 'pydbsmgr/**'
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
 
 jobs:
   generate-docs:
@@ -24,6 +29,17 @@ jobs:
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         pip install pdoc3
 
+    - name: Set up Git
+      env:
+        GITHUB_TOKEN: ${{ secrets.TOKEN }}
+        GITHUB_NAME: ${{ secrets.NAME }}
+        GITHUB_EMAIL: ${{ secrets.EMAIL }}
+      run: |
+        git config user.email "${GITHUB_EMAIL}"
+        git config user.name "${GITHUB_NAME}"
+        git config credential.helper "store --file=.git/credentials"
+        echo "https://${{ secrets.TOKEN }}@github.com/${{ github.repository }}" > .git/credentials
+
     - name: Remove existing documentation files
       run: rm -rf docs/*
 
@@ -43,13 +59,7 @@ jobs:
         rm -rf docs/${{ steps.get_package_name.outputs.name }}
 
     - name: Commit documentation changes
-      env:
-        GITHUB_TOKEN: ${{ secrets.TOKEN }}
-        GITHUB_ACTOR: ${{ github.actor }}
-        GITHUB_EMAIL: [email protected]
       run: |
-        git config user.email "${GITHUB_EMAIL}"
-        git config user.name "${GITHUB_ACTOR}"
         if git status --porcelain | grep .; then
           echo "Changes detected, proceeding with workflow steps..."
           git add docs/

diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml
@@ -7,7 +7,7 @@ on:
     branches: 
       - 'main'
     paths:
-      - 'docs/*'
+      - 'docs/**'
 
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:

diff --git a/pydbsmgr/VERSION b/pydbsmgr/VERSION
@@ -1 +1 @@
-0.9.5
+0.9.6
diff --git a/pydbsmgr/health.py b/pydbsmgr/health.py
@@ -33,10 +33,12 @@ def __init__(self, _df: DataFrame | List[DataFrame], df_names: str | List[str] =
     def fix(self, cols_upper_case: bool = False, drop_empty_cols: bool = True) -> None:
         """Performs the clean of the data and validation
 
-        Args:
-        -----
-            cols_upper_case (`bool`, optional): Indicates whether to convert column names to uppercase. Defaults to `False`.
-            drop_empty_cols (`bool`, optional): Variable indicating whether columns with all their values empty should be removed. Defaults to `True`.
+        Parameters
+        ----------
+        cols_upper_case : `bool`, `optional`
+            Indicates whether to convert column names to uppercase. Defaults to `False`.
+        drop_empty_cols : `bool`, `optional`
+            Variable indicating whether columns with all their values empty should be removed. Defaults to `True`.
         """
         if drop_empty_cols:
             for count, df in enumerate(self._dfs):
@@ -60,14 +62,20 @@ def generate_report(
     ) -> None:
         """Generate a `.html` health check report.
 
-        Args:
-        -----
-            report_name (`str`, optional): Name of the quality assessment report. Defaults to `./report.html`.
-            yaml_name (`str`, optional): Indicates the name of the `.yaml` file that will serve as a template for the creation of the SQL table. Defaults to `./output.yaml`.
-            database_name (`str`, optional): The header of the `.yaml` file. Default value is `database`
-            directory_name (`str`, optional): Folder in which the reports will be saved. Defaults to `summary`.
-            concat_vertically: (`bool`, optional), Variable indicating whether the list of dataframes should be vertically concatenated into a single one. Default value is `False`.
-            encoding (`str`, optional): The encoding of dataframes. Defaults to `utf-8`.
+        Parameters
+        ----------
+        report_name : `str`, `optional`
+            Name of the quality assessment report. Defaults to `./report.html`.
+        yaml_name : `str`, `optional`
+            Indicates the name of the `.yaml` file that will serve as a template for the creation of the SQL table. Defaults to `./output.yaml`.
+        database_name : `str`, `optional`
+            The header of the `.yaml` file. Default value is `database`
+        directory_name : `str`, `optional`
+            Folder in which the reports will be saved. Defaults to `summary`.
+        concat_vertically : `bool`, `optional`
+            Variable indicating whether the list of dataframes should be vertically concatenated into a single one. Default value is `False`.
+        encoding : `str`, `optional`
+            The encoding of dataframes. Defaults to `utf-8`.
         """
         self.df_files_info = pd.DataFrame()
         self.yaml_name = yaml_name

diff --git a/pydbsmgr/lightest.py b/pydbsmgr/lightest.py
@@ -9,13 +9,13 @@ def process_dates(x: str, format_type: str, auxiliary_type: str, errors: str = "
 
     Parameters
     ----------
-        x : `str`
-            character of type date.
+    x : `str`
+        character of type date.
 
     Returns
     ----------
-        x : `str`
-            character after processing with format `YYYY-MM-DD`.
+    x : `str`
+        character after processing with format `YYYY-MM-DD`.
     """
     # performing data type conversion
     x = str(x)
@@ -78,16 +78,17 @@ def clean_frame(
 
         Parameters
         ----------
-        - sample_frac (`float`): The fraction of rows to use for date type inference. Default is 0.1 i.e., 10%.
-        - fast_execution (`bool`): If `False` use `applymap` pandas for extra text cleanup. Default is `True`.
+        sample_frac : `float`
+            The fraction of rows to use for date type inference. Default is 0.1 i.e., 10%.
+        fast_execution : `bool`
+            If `False` use `applymap` pandas for extra text cleanup. Default is `True`.
 
         Keyword Arguments:
         ----------
-        - no_emoji: (`bool`): By default it is set to `False`.
-        If `True`, removes all emojis from text data. Works only when `fast_execution` = `False`.
-        - title_mode: (`bool`): By default it is set to `True`.
-        If `False`, converts the text to lowercase. Works only when `fast_execution` = `False`.
-        By default, converts everything to `title`.
+        no_emoji : `bool`
+            By default it is set to `False`. If `True`, removes all emojis from text data. Works only when `fast_execution` = `False`.
+        title_mode : `bool`
+            By default it is set to `True`. If `False`, converts the text to lowercase. Works only when `fast_execution` = `False`. By default, converts everything to `title`.
         """
         table = (self.df).copy()
         cols = table.columns

diff --git a/pydbsmgr/main.py b/pydbsmgr/main.py
@@ -55,11 +55,15 @@ def check_if_contains_dates(input_string: str) -> bool:
 def remove_numeric_char(input_string: str) -> str:
     """Remove all numeric characters from a string.
 
-    Args:
-        input_string (`str`): character string to be cleaned of numeric characters
+    Parameters
+    ----------
+    input_string : `str`
+        character string to be cleaned of numeric characters
 
-    Returns:
-        `str`: clean character string
+    Returns
+    -------
+    `str`
+        clean character string
     """
     return re.sub(r"\d", "", input_string)
 

diff --git a/pydbsmgr/utils/config.py b/pydbsmgr/utils/config.py
@@ -6,12 +6,12 @@ def load_config(config_file):
 
     Parameters
     ----------
-    config_file : str
+    config_file : `str`
         The path to the configuration file.
 
     Returns
     -------
-    config : ConfigParser
+    config : `ConfigParser`
         A configuration object loaded from file.
     """
 
@@ -26,12 +26,12 @@ def parse_config(config):
 
     Parameters
     ----------
-    config : ConfigParser
+    config : `ConfigParser`
         A configuration object loaded from file.
 
     Returns
     -------
-    parsed_config : dict
+    parsed_config : `dict`
         A dictionary of parsed configuration values.
     """
 

diff --git a/pydbsmgr/utils/sql_functions.py b/pydbsmgr/utils/sql_functions.py
@@ -28,18 +28,22 @@ def insert_data(
     ) -> None:
         """Insert data into SQL Server.
 
-        Parameters:
+        Parameters
         ----------
-            df (`Dataframe` or `str`): The pandas dataframe that will be inserted into sql server
-            table_name (`str`): Name of the table in which the data is being inserted
-            overwrite (`bool`): If `True` it will delete and recreate the table before inserting new data
-            if `False` it will append the new data onto the end of the existing table
-            char_length (`int`): Length of varchar fields for text columns
-            override_length (`bool`): Override length of varchar fields for text columns.
-
-        Returns:
-        ----------
-            `None`
+        df : `Dataframe` | `str`
+            The pandas dataframe that will be inserted into sql server
+        table_name : `str`
+            Name of the table in which the data is being inserted
+        overwrite : `bool`
+            If `True` it will delete and recreate the table before inserting new data if `False` it will append the new data onto the end of the existing table.
+        char_length : `int`
+            Length of varchar fields for text columns.
+        override_length : `bool`
+            Override length of varchar fields for text columns.
+
+        Returns
+        -------
+        `None`
         """
 
         self.file_type = None
@@ -131,22 +135,33 @@ def bulk_insert_from_csv(
     ) -> bool:
         """Insert data from csv files in Azure Blob Storage into SQL Server with Bulk command
 
-        Parameters:
-        ----------
-            file_path (`str`): Path to the file in Azure Blob Storage
-            db_table_name (`str`): Name of the table in which the data is being inserted
-            sas_str (`str`): SAS string to the storage account
-            storage_connection_string (`str`): Connection string to the storage account
-            storage_account (`str`): Name of the storage account
-            container_name (`str`): Name of the container in which the data is being inserted
-            credential_name (`str`): Name of the credentials
-            data_source_name (`str`): Name of the data source
-            char_length (`int`): Length of varchar fields for text columns
-            overwrite (`bool`): If `True` it will delete and recreate the table before inserting new data
-            if `False` it will append the new data onto the end of the existing table
-        Returns:
+        Parameters
         ----------
-            `bool`: True if the data was inserted successfully
+        file_path : `str`
+            Path to the file in Azure Blob Storage
+        db_table_name : `str`
+            Name of the table in which the data is being inserted
+        sas_str : `str`
+            SAS string to the storage account
+        storage_connection_string : `str`
+            Connection string to the storage account
+        storage_account : `str`
+            Name of the storage account
+        container_name : `str`
+            Name of the container in which the data is being inserted
+        credential_name : `str`
+            Name of the credentials
+        data_source_name : `str`
+            Name of the data source
+        char_length : `int`
+            Length of varchar fields for text columns
+        overwrite : `bool`
+            If `True` it will delete and recreate the table before inserting new data if `False` it will append the new data onto the end of the existing table.
+
+        Returns
+        -------
+        `bool`
+            True if the data was inserted successfully
         """
         # Get all the files in the container or file individually
         filter_condition = ""
@@ -268,13 +283,17 @@ def drop_dropables(
     ) -> bool:
         """Drop dropable objects
 
-        Parameters:
-        ----------
-            data_source_name (`str`): Name of the data source
-            masterkey (`bool`): If `True` it will drop the master key
-        Returns:
+        Parameters
         ----------
-            `Bool`: True if the data was inserted successfully
+        data_source_name : `str`
+            Name of the data source
+        masterkey : `bool`
+            If `True` it will drop the master key
+
+        Returns
+        -------
+        `bool`
+            True if the data was inserted successfully
         """
         print("DROPPING EXTERNAL DATA SOURCE")
         self._cur.execute(f"DROP EXTERNAL DATA SOURCE {data_source_name}")
@@ -339,14 +358,20 @@ def write_csv_from_parquet(
         write_to_csv: bool = True,
     ) -> None:
         """Write a csv file from parquet files in a container
-        Parameters:
-        ----------
-            connection_string (`str`): Connection string to the storage account
-            container_name (`str`): Name of the container in which the data is being inserted
-            directory (`str`): Directory in which the parquet files are located
-        Returns:
+
+        Parameters
         ----------
-            `bool`: True if the file was created successfully
+        connection_string : `str`
+            Connection string to the storage account
+        container_name : `str`
+            Name of the container in which the data is being inserted
+        directory : `str`
+            Directory in which the parquet files are located
+
+        Returns
+        -------
+        `bool`
+            `True` if the file was created successfully
         """
         # Write the csv files
         if write_to_csv: