diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..dd90351 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,50 @@ +# Security Policy + +- [Security Response Team](#security-response-team) +- [Supported Versions](#supported-versions) +- [Reporting a Vulnerability](#reporting-a-vulnerability) +- [Acknowledgments](#acknowledgments) +- [Disclosure Policy](#disclosure-policy) +- [CVSS v3.0](#cvss-v30-summary) + +## Security Response Team + +Our security response team is available to handle security issues. You can contact us at [jzs.gm27@gmail.com](mailto:jzs.gm27@gmail.com). + +## Supported Versions + +We release patches for security vulnerabilities. Which versions are eligible for receiving such patches depends on the severity of the vulnerability: + +| CVSS v3.0 | Supported Versions | +| --------- | ----------------------------------------- | +| 9.0-10.0 | Releases within the previous three months | +| 4.0-8.9 | Most recent release | +| < 4.0 | 0.0.3 release | + +The following versions of Python packages are currently supported: + +- numpy: >=1.19.0, <2.0.0 +- pandas: >=1.5.3 +- azure-storage-blob: >=12.16.0 +- python-dotenv: >=1.0.0 +- openpyxl: >=3.1.2 +- pyarrow: >=15.0.0 +- fastparquet: >=2024.2.0 + +## Reporting a Vulnerability + +Please report any suspected security vulnerabilities to **[jzs.gm27@gmail.com](jzs.gm27@gmail.com)**. You will receive a response from us within 48 hours. If the issue is confirmed, we will release a patch as soon as possible, typically within a few days depending on complexity. + +We appreciate your help in keeping our project secure. + +## Acknowledgments + +We acknowledge and credit individuals who report security vulnerabilities responsibly. If you report a security issue, we will include your name or alias in our acknowledgments, unless you request to remain anonymous. + +## Disclosure Policy + +We follow a responsible disclosure policy. After receiving a report, we will work with the reporter to address the issue and disclose it publicly once a fix is available. We may also coordinate with package maintainers if necessary. + +## CVSS v3.0 Summary + +The Common Vulnerability Scoring System (CVSS) v3.0 is an open standard for assessing the severity of security vulnerabilities. It provides a numerical score from 0.0 to 10.0, with higher scores indicating more severe vulnerabilities. Organizations use CVSS scores to prioritize their response to security vulnerabilities based on their severity. \ No newline at end of file diff --git a/pydbsmgr/VERSION b/pydbsmgr/VERSION index 9cf0386..bae256f 100644 --- a/pydbsmgr/VERSION +++ b/pydbsmgr/VERSION @@ -1 +1 @@ -0.9.6 \ No newline at end of file +0.9.7 \ No newline at end of file diff --git a/pydbsmgr/__init__.py b/pydbsmgr/__init__.py index 50e96d8..3c55d81 100644 --- a/pydbsmgr/__init__.py +++ b/pydbsmgr/__init__.py @@ -1,7 +1,15 @@ """ -Authors: J. A. Moreno-Guerra -Last modification: 05/15/2023 -Corresponding author: jzs.gm27@gmail.com +pydbsmgr: Initialize the Package +===================================== + +This is the entry point of pydbsmgr, your comprehensive database management companion. It initializes all necessary modules and provides a central hub for accessing various tools and functions. + +Main Modules: +- pydbsmgr.main: Provides access to core functionality, including data manipulation, query execution, and database operations. +- pydbsmgr.utils.azure_sdk: Offers integration with Azure SDK for seamless interaction with Microsoft's cloud-based services. +- pydbsmgr.utils.tools: Contains utility functions for data processing, normalization, and visualization. + +By importing the main modules directly or accessing them through this central entry point (i.e., from pydbsmgr import *), you can leverage the full range of pydbsmgr's capabilities to streamline your database management workflow. """ from pydbsmgr.main import * diff --git a/pydbsmgr/main.py b/pydbsmgr/main.py index 8401434..cf89648 100644 --- a/pydbsmgr/main.py +++ b/pydbsmgr/main.py @@ -244,9 +244,9 @@ def convert_date(date_string: str) -> str: return proper_date -def is_number_regex(s): - """Returns True if string is a number.""" - if re.match("^\d+?\.\d+?$", s) is None: +def is_number_regex(s: str) -> bool: + """Returns `True` if string is a number.""" + if re.match(r"^\d+?\.\d+?$", s) is None: return s.isdigit() return True @@ -337,7 +337,7 @@ def clean_and_convert_to(x: str) -> str: def correct_nan(check_missing: str) -> str: """ - Corrects the format of missing values in a `str` to the correct `empty str`. + Corrects the format of missing values in a `str` to the correct empty `str`. Parameters ---------- @@ -347,7 +347,7 @@ def correct_nan(check_missing: str) -> str: Returns ------- check_missing : `str` - The corrected string format or `empty str`. + The corrected string format or empty `str`. """ if str(check_missing).lower() == "nan": return "" @@ -412,12 +412,12 @@ def intersection_cols(dfs_: List[DataFrame]) -> DataFrame: Parameters ---------- - dfs_ : List[`DataFrame`] + dfs_ : List[DataFrame] The `list` of dataframes with columns to be resolves. Returns ------- - dfs_ : List[`DataFrame`] + dfs_ : List[DataFrame] The `list` of dataframes with the corrections in their columns (intersection). """ min_cols = [] diff --git a/pydbsmgr/utils/azure_sdk.py b/pydbsmgr/utils/azure_sdk.py index b93c749..1ead030 100644 --- a/pydbsmgr/utils/azure_sdk.py +++ b/pydbsmgr/utils/azure_sdk.py @@ -15,7 +15,7 @@ def get_connection_string() -> str: - """Get connection string. Load env variables from .env""" + """Get connection string. Load env variables from `.env`""" load_dotenv() return os.getenv("CONNECTION_STRING") diff --git a/pydbsmgr/utils/sql_functions.py b/pydbsmgr/utils/sql_functions.py index cc8a2d8..5f86a3f 100644 --- a/pydbsmgr/utils/sql_functions.py +++ b/pydbsmgr/utils/sql_functions.py @@ -161,7 +161,7 @@ def bulk_insert_from_csv( Returns ------- `bool` - True if the data was inserted successfully + `True` if the data was inserted successfully """ # Get all the files in the container or file individually filter_condition = "" @@ -293,7 +293,7 @@ def drop_dropables( Returns ------- `bool` - True if the data was inserted successfully + `True` if the data was inserted successfully """ print("DROPPING EXTERNAL DATA SOURCE") self._cur.execute(f"DROP EXTERNAL DATA SOURCE {data_source_name}") diff --git a/pydbsmgr/utils/tools/tools.py b/pydbsmgr/utils/tools.py similarity index 97% rename from pydbsmgr/utils/tools/tools.py rename to pydbsmgr/utils/tools.py index f74ffa6..3aab621 100644 --- a/pydbsmgr/utils/tools/tools.py +++ b/pydbsmgr/utils/tools.py @@ -230,7 +230,8 @@ def merge_by_coincidence(df1: DataFrame, df2: DataFrame, tol: float = 0.9) -> Da return df -def terminate_process_holding_file(file_path): +def terminate_process(file_path: str) -> None: + """Terminate the process holding the file.""" for proc in psutil.process_iter(["pid", "open_files"]): try: if any(file_path in file_info.path for file_info in proc.open_files()): @@ -241,11 +242,12 @@ def terminate_process_holding_file(file_path): def erase_files(format: str = "log") -> None: + """Erase all files with the given format.""" for filename in glob.glob("*." + format): try: os.remove(filename) except: - terminate_process_holding_file(filename) + terminate_process(filename) os.remove(filename) @@ -256,10 +258,10 @@ def get_extraction_date( Parameters ---------- - filename : `str` | List[`str`] + filename : Union[str, List[str]] file path inside the storage account REGEX_PATTERN : `str`, `optional` - regular expression pattern to extract the date. Defaults to `r"\d{4}-\d{2}-\d{2}"`. + regular expression pattern to extract the date. Returns ------- @@ -401,8 +403,3 @@ def create_directories_from_yaml(yaml_file): with open(yaml_file, "r") as file: data = yaml.safe_load(file) create_directory(data) - - -if __name__ == "__main__": - yaml_file = "directories.yaml" - create_directories_from_yaml(yaml_file) diff --git a/pydbsmgr/utils/tools/__init__.py b/pydbsmgr/utils/tools/__init__.py deleted file mode 100644 index 1193035..0000000 --- a/pydbsmgr/utils/tools/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .tools import * diff --git a/requirements.txt b/requirements.txt index e054531..4816245 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ pyarrow fastparquet loguru psutil -Unidecode +Unidecode \ No newline at end of file