diff --git a/.gitignore b/.gitignore index 9c61f10..d3b7fee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,2 @@ gitleaks-report.json gitleaks-report-detailed.json -temp.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 9699603..9b5e085 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,25 @@ All notable changes to this project will be documented in this file. +## [1.1.0] - 2024-07-24 + +[1.1.0]: https://github.com/abdullahkhawer/find-and-report-secrets-in-code/releases/tag/v1.1.0 + +### š Features + +- Update shell script to prepare and add URL for each finding in the JSON report. +- Update python script to improve logging, comments, pylint score from 1.44 to 9.25/10 by refactoring code, HTML content template to add link to the file reference where secret is detected and Slack notification message along with its format in case of both no secrets found and 1 or more secrets found. + +### š Documentation + +- Update READMEs to add 2 new ENVs, add 1 new JSON field and fix some existing commands and descriptions mentioned. + +### āļø Miscellaneous Tasks + +- Remove unnecessary file from .gitignore. +- Add 2 new variables and use image 1.1.0 +- Update version to v1.1.0 + ## [1.0.1] - 2024-07-03 [1.0.1]: https://github.com/abdullahkhawer/find-and-report-secrets-in-code/releases/tag/v1.0.1 diff --git a/README.md b/README.md index ee90cdc..d677f0c 100644 --- a/README.md +++ b/README.md @@ -15,21 +15,24 @@ Below you can find an example of the JSON report generated: ```json [ { - "Description": "Identified a HashiCorp Terraform password field, risking unauthorized infrastructure configuration and security breaches.", - "File": "./code/main.py", + "Description": "Detected a Generic API Key, potentially exposing access to various services and sensitive operations.", + "File": "scripts/main.py", "Line No.": "11", - "Secret Type": "hashicorp-tf-password", + "Link": "https://gitlab.com/my-projects/my-repo/-/blob/master/scripts/main.py#L11", + "Secret Type": "generic-api-key", "Commit": "__REDACTED__", "Author": "__REDACTED__" }, { "Description": "Identified a HashiCorp Terraform password field, risking unauthorized infrastructure configuration and security breaches.", - "File": "./code/main.conf", - "Line No.": "30", + "File": "configurations/main.tf", + "Line No.": "6", + "Link": "https://gitlab.com/my-projects/my-repo/-/blob/master/configurations/main.tf#L6", "Secret Type": "hashicorp-tf-password", "Commit": "__REDACTED__", "Author": "__REDACTED__" } + ... ] ``` @@ -82,8 +85,16 @@ Following are the prerequisites to be met once before you begin: ### Execution Instructions Once all the prerequisites are met, set the following environment variables: - - `PATH_TO_GIT_REPO` - - Description: To keep the size of the git repository to be cloned lower to make the job faster. + - `LOCAL_PATH_TO_GIT_REPO` + - Description: Local path to the Git repository. + - Example: `/Users/Abdullah.Khawer/Desktop/my-projects/my-repo` + - Requirement: REQUIRED + - `REMOTE_PATH_TO_GIT_REPO` + - Description: Remote path to the Git repository. + - Example: `https://gitlab.com/my-projects/my-repo` + - Requirement: REQUIRED + - `BRANCH_NAME` + - Description: Name of the branch in the Git repository against which secrets detection tool will be executed. - Example: `/Users/Abdullah.Khawer/Desktop/myrepo` - Requirement: REQUIRED - `CONFLUENCE_ENABLED` @@ -120,10 +131,10 @@ Once all the prerequisites are met, set the following environment variables: - Example: `[https://mydomain.atlassian.net](https://hooks.slack.com/services/__REDACTED__/__REDACTED__/__REDACTED__)` - Requirement: REQUIRED (if `SLACK_ENABLED` is set to `1`) -And then simply run the following 3 commands in the correct order: +And then simply run the following 2 commands: - `bash gitleaks.sh` - `python3 main.py TIME_ZONE REPOSITORY_NAME BRANCH_NAME [JSON_REPORT_URL]` - - Example: `python3 main.py Europe/Amsterdam myproj/myrepo master` + - Example: `python3 main.py Europe/Amsterdam my-projects/my-repo master` - Note: Details about supported time zones and their constant names can be found here: [pypi.org > project > pytz > Helpers](https://pypi.org/project/pytz/#:~:text=through%20multiple%20timezones.-,Helpers,-There%20are%20two) ## Automatically via CI/CD Pipeline diff --git a/VERSION b/VERSION index b18d465..795460f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v1.0.1 +v1.1.0 diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 2198517..ffc4fe4 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -25,7 +25,7 @@ stage: scan extends: - .find-secrets:variables - image: abdullahkhawer/find-and-report-secrets-in-code:1.0.0 + image: abdullahkhawer/find-and-report-secrets-in-code:1.1.0 before_script: - | if [ -n "$CONFLUENCE_ENABLED" ] && [ "$CONFLUENCE_ENABLED" -eq 1 ]; then @@ -63,13 +63,15 @@ fi - git fetch origin $CI_COMMIT_BRANCH script: + - export LOCAL_PATH_TO_GIT_REPO=$(pwd) + - export REMOTE_PATH_TO_GIT_REPO=$CI_PROJECT_URL + - export BRANCH_NAME=$CI_COMMIT_BRANCH + - export REPO_NAME=$CI_PROJECT_PATH - export PATH=$PATH:/usr/local/gitleaks - - export PATH_TO_GIT_REPO=$(pwd) - - export REPO_NAME=$(echo "$CI_PROJECT_DIR" | sed 's|/builds/||') - cd /find-and-report-secrets-in-code/ - bash ./gitleaks.sh - - python3 main.py "Europe/Amsterdam" $REPO_NAME $CI_COMMIT_BRANCH $CI_JOB_URL/artifacts/raw/gitleaks-report.json - - cp ./gitleaks-report.json $PATH_TO_GIT_REPO/gitleaks-report.json + - python3 main.py "Europe/Amsterdam" $REPO_NAME $BRANCH_NAME $CI_JOB_URL/artifacts/raw/gitleaks-report.json + - cp ./gitleaks-report.json $LOCAL_PATH_TO_GIT_REPO/gitleaks-report.json artifacts: paths: - gitleaks-report.json diff --git a/docker/README.md b/docker/README.md index 48bc37e..552d0bf 100644 --- a/docker/README.md +++ b/docker/README.md @@ -15,21 +15,24 @@ Below you can find an example of the JSON report generated: ```json [ { - "Description": "Identified a HashiCorp Terraform password field, risking unauthorized infrastructure configuration and security breaches.", - "File": "./code/main.py", + "Description": "Detected a Generic API Key, potentially exposing access to various services and sensitive operations.", + "File": "scripts/main.py", "Line No.": "11", - "Secret Type": "hashicorp-tf-password", + "Link": "https://gitlab.com/my-projects/my-repo/-/blob/master/scripts/main.py#L11", + "Secret Type": "generic-api-key", "Commit": "__REDACTED__", "Author": "__REDACTED__" }, { "Description": "Identified a HashiCorp Terraform password field, risking unauthorized infrastructure configuration and security breaches.", - "File": "./code/main.conf", - "Line No.": "30", + "File": "configurations/main.tf", + "Line No.": "6", + "Link": "https://gitlab.com/my-projects/my-repo/-/blob/master/configurations/main.tf#L6", "Secret Type": "hashicorp-tf-password", "Commit": "__REDACTED__", "Author": "__REDACTED__" } + ... ] ``` @@ -46,8 +49,16 @@ Below you can find an example of the Slack notification: ### Execution Instructions Set the following environment variables: - - `PATH_TO_GIT_REPO` - - Description: To keep the size of the git repository to be cloned lower to make the job faster. + - `LOCAL_PATH_TO_GIT_REPO` + - Description: Local path to the Git repository. + - Example: `/Users/Abdullah.Khawer/Desktop/my-projects/my-repo` + - Requirement: REQUIRED + - `REMOTE_PATH_TO_GIT_REPO` + - Description: Remote path to the Git repository. + - Example: `https://gitlab.com/my-projects/my-repo` + - Requirement: REQUIRED + - `BRANCH_NAME` + - Description: Name of the branch in the Git repository against which secrets detection tool will be executed. - Example: `/Users/Abdullah.Khawer/Desktop/myrepo` - Requirement: REQUIRED - `CONFLUENCE_ENABLED` @@ -85,11 +96,11 @@ Set the following environment variables: - Requirement: REQUIRED (if `SLACK_ENABLED` is set to `1`) And then simply run the following 4 commands: -- `docker run --platform linux/amd64 -it -e PATH_TO_GIT_REPO=/git_repo -e CONFLUENCE_ENABLED=1 -e CONFLUENCE_SITE=$CONFLUENCE_SITE -e CONFLUENCE_USER_EMAIL_ID=$CONFLUENCE_USER_EMAIL_ID -e CONFLUENCE_USER_TOKEN=$CONFLUENCE_USER_TOKEN -e CONFLUENCE_PAGE_TITLE=$CONFLUENCE_PAGE_TITLE -e CONFLUENCE_PAGE_SPACE=$CONFLUENCE_PAGE_SPACE -e SLACK_ENABLED=1 -e SLACK_WEBHOOK_URL=$SLACK_WEBHOOK_URL -v $PATH_TO_GIT_REPO:/git_repo abdullahkhawer/find-and-report-secrets-in-code:latest` +- `docker run --platform linux/amd64 -it -e LOCAL_PATH_TO_GIT_REPO=$LOCAL_PATH_TO_GIT_REPO -e REMOTE_PATH_TO_GIT_REPO=$REMOTE_PATH_TO_GIT_REPO -e BRANCH_NAME=$BRANCH_NAME -e CONFLUENCE_ENABLED=$CONFLUENCE_ENABLED -e CONFLUENCE_SITE=$CONFLUENCE_SITE -e CONFLUENCE_USER_EMAIL_ID=$CONFLUENCE_USER_EMAIL_ID -e CONFLUENCE_USER_TOKEN=$CONFLUENCE_USER_TOKEN -e CONFLUENCE_PAGE_TITLE=$CONFLUENCE_PAGE_TITLE -e CONFLUENCE_PAGE_SPACE=$CONFLUENCE_PAGE_SPACE -e SLACK_ENABLED=$SLACK_ENABLED -e SLACK_WEBHOOK_URL=$SLACK_WEBHOOK_URL -v $LOCAL_PATH_TO_GIT_REPO:$LOCAL_PATH_TO_GIT_REPO abdullahkhawer/find-and-report-secrets-in-code:latest` - `export PATH=$PATH:/usr/local/gitleaks` - `bash /find-and-report-secrets-in-code/gitleaks.sh` - `python3 /find-and-report-secrets-in-code/main.py TIME_ZONE REPOSITORY_NAME BRANCH_NAME [JSON_REPORT_URL]` - - Example: `python3 /find-and-report-secrets-in-code/main.py Europe/Amsterdam myproj/myrepo master` + - Example: `python3 /find-and-report-secrets-in-code/main.py Europe/Amsterdam my-projects/my-repo master` - Note: Details about supported time zones and their constant names can be found here: [pypi.org > project > pytz > Helpers](https://pypi.org/project/pytz/#:~:text=through%20multiple%20timezones.-,Helpers,-There%20are%20two) ## Automatically via CI/CD Pipeline diff --git a/gitleaks.sh b/gitleaks.sh index 0b35043..4036511 100644 --- a/gitleaks.sh +++ b/gitleaks.sh @@ -4,25 +4,25 @@ echo "Script Execution Started!" # remove Gitleaks reports if they exist already echo "Removing Gitleaks reports if they exist already..." -rm -rf ${PATH_TO_GIT_REPO}/gitleaks-report-detailed.json +rm -rf ${LOCAL_PATH_TO_GIT_REPO}/gitleaks-report-detailed.json rm -rf ./gitleaks-report.json # run Gitleaks to find secrets and generate a detailed report in JSON for the secrets found echo "Running Gitleaks to find secrets and generating a detailed report in JSON for the secrets found..." -gitleaks detect -r ${PATH_TO_GIT_REPO}/gitleaks-report-detailed.json -f json -s ${PATH_TO_GIT_REPO} --redact --no-git +gitleaks detect -r ${LOCAL_PATH_TO_GIT_REPO}/gitleaks-report-detailed.json -f json -s ${LOCAL_PATH_TO_GIT_REPO} --redact --no-git # create a final report in JSON using the detailed report having relevant information only echo "Creating a final report in JSON using the detailed report having relevant information only..." echo "[" > ./gitleaks-report.json -cat ${PATH_TO_GIT_REPO}/gitleaks-report-detailed.json | jq -c '.[]' | while read -r line; do +cat ${LOCAL_PATH_TO_GIT_REPO}/gitleaks-report-detailed.json | jq -c '.[]' | while read -r line; do description=$(jq -r '.Description' <<< "$line") start_line=$(jq -r '.StartLine' <<< "$line") file=$(jq -r '.File' <<< "$line") - file=$(echo "$file" | sed "s|^${PATH_TO_GIT_REPO}|.|") + file=$(echo "$file" | sed "s|^${LOCAL_PATH_TO_GIT_REPO}/||") secret_type=$(jq -r '.RuleID' <<< "$line") # use 'git blame' to find the commit id and author for each finding - blame=$(cd ${PATH_TO_GIT_REPO} && git blame -L "$start_line","$start_line" "$file" --porcelain) + blame=$(cd ${LOCAL_PATH_TO_GIT_REPO} && git blame -L "$start_line","$start_line" ./"$file" --porcelain) commit_id=$(echo "$blame" | awk 'NR==1' | awk -F ' ' '{print $1}') author=$(echo "$blame" | awk 'NR==2' | awk -F 'author ' '{print $2}') @@ -31,10 +31,11 @@ cat ${PATH_TO_GIT_REPO}/gitleaks-report-detailed.json | jq -c '.[]' | while read --arg desc "$description" \ --arg file "$file" \ --arg line_no "$start_line" \ + --arg url "${REMOTE_PATH_TO_GIT_REPO}/-/blob/${BRANCH_NAME}/${file}#L${start_line}" \ --arg type "$secret_type" \ --arg commit "$commit_id" \ --arg author "$author" \ - '{"Description": $desc, "File": $file, "Line No.": $line_no, "Secret Type": $type, "Commit": $commit, "Author": $author}' >> ./gitleaks-report.json + '{"Description": $desc, "File": $file, "Line No.": $line_no, "Link": $url, "Secret Type": $type, "Commit": $commit, "Author": $author}' >> ./gitleaks-report.json echo "," >> ./gitleaks-report.json done diff --git a/main.py b/main.py index cbd7a53..09998ff 100644 --- a/main.py +++ b/main.py @@ -1,26 +1,37 @@ +#!/usr/bin/python + +""" +Python script to update an Atlassian Confluence page with the secrets found +based on the report generated by the custom shell script 'gitleaks.sh' which +is using Gitleaks and to send a notification on Slack. +""" + +# required imports import json import os -import pytz import re -import requests import sys -from atlassian import Confluence from datetime import datetime +import pytz +import requests +from atlassian import Confluence + +print("Script Execution Started!") # get time zone, repository name and branch name from the arguments passed to the script if len(sys.argv) < 4 or len(sys.argv) > 5: - print("ERROR: Invalid arguments passed.") - print("Usage: python main.py TIME_ZONE REPOSITORY_NAME BRANCH_NAME [JSON_REPORT_URL]") - print("Example: python main.py Europe/Amsterdam myproj/myrepo master") - sys.exit(1) + print("ERROR: Invalid arguments passed.") + print("Usage: python main.py TIME_ZONE REPOSITORY_NAME BRANCH_NAME [JSON_REPORT_URL]") + print("Example: python main.py Europe/Amsterdam my-projects/my-repo master") + sys.exit(1) time_zone = sys.argv[1] repo_name = sys.argv[2] branch_name = sys.argv[3] json_report_url = "" if len(sys.argv) == 5: - json_report_url = sys.argv[4] + json_report_url = sys.argv[4] -# Get the current time in UTC and convert it into the desired time zone's time +# get the current time in UTC and convert it into the desired time zone's time time_now = datetime.now() target_timezone = pytz.timezone(time_zone) time_now = time_now.astimezone(target_timezone) @@ -29,195 +40,273 @@ # get environment variables related to Confluence confluence_enabled = os.getenv("CONFLUENCE_ENABLED") if confluence_enabled is None: - print("ERROR: CONFLUENCE_ENABLED environment variable is not set.") - sys.exit(1) + print("ERROR: CONFLUENCE_ENABLED environment variable is not set.") + sys.exit(1) elif confluence_enabled == "1": - confluence_site = os.getenv("CONFLUENCE_SITE") - confluence_user = os.getenv("CONFLUENCE_USER_EMAIL_ID") - confluence_pass = os.getenv("CONFLUENCE_USER_TOKEN") - page_title = os.getenv("CONFLUENCE_PAGE_TITLE") - page_space = os.getenv("CONFLUENCE_PAGE_SPACE") - if confluence_site is None: - print("ERROR: CONFLUENCE_SITE environment variable is not set.") - sys.exit(1) - if confluence_user is None: - print("ERROR: CONFLUENCE_USER_EMAIL_ID environment variable is not set.") - sys.exit(1) - if confluence_pass is None: - print("ERROR: CONFLUENCE_USER_TOKEN environment variable is not set.") - sys.exit(1) - if page_title is None: - print("ERROR: CONFLUENCE_PAGE_TITLE environment variable is not set.") - sys.exit(1) - if page_space is None: - print("ERROR: CONFLUENCE_PAGE_SPACE environment variable is not set.") - sys.exit(1) + confluence_site = os.getenv("CONFLUENCE_SITE") + confluence_user = os.getenv("CONFLUENCE_USER_EMAIL_ID") + confluence_pass = os.getenv("CONFLUENCE_USER_TOKEN") + page_title = os.getenv("CONFLUENCE_PAGE_TITLE") + page_space = os.getenv("CONFLUENCE_PAGE_SPACE") + if confluence_site is None: + print("ERROR: CONFLUENCE_SITE environment variable is not set.") + sys.exit(1) + if confluence_user is None: + print("ERROR: CONFLUENCE_USER_EMAIL_ID environment variable is not set.") + sys.exit(1) + if confluence_pass is None: + print("ERROR: CONFLUENCE_USER_TOKEN environment variable is not set.") + sys.exit(1) + if page_title is None: + print("ERROR: CONFLUENCE_PAGE_TITLE environment variable is not set.") + sys.exit(1) + if page_space is None: + print("ERROR: CONFLUENCE_PAGE_SPACE environment variable is not set.") + sys.exit(1) # get environment variables related to Slack slack_enabled = os.getenv("SLACK_ENABLED") if slack_enabled is None: - print("ERROR: SLACK_ENABLED environment variable is not set.") - sys.exit(1) + print("ERROR: SLACK_ENABLED environment variable is not set.") + sys.exit(1) elif slack_enabled == "1": - slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL") - if slack_webhook_url is None: - print("ERROR: SLACK_WEBHOOK_URL environment variable is not set.") - sys.exit(1) + slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL") + if slack_webhook_url is None: + print("ERROR: SLACK_WEBHOOK_URL environment variable is not set.") + sys.exit(1) # define HTML page template if confluence_enabled == "1": - html_template = """ -
- |
- Description - |
-
- File - |
-
- Line No. - |
-
- Secret Type - |
-
- Commit ID - |
-
- Commit Author - |
-
---|
+ |
+ Description + |
+
+ File Reference + |
+
+ Secret Type + |
+
+ Commit ID + |
+
+ Commit Author + |
+
---|
{}
-{}
-{}
-{}
-{}
-{}
-{}
+{}
+{}
+{}
+{}
+