diff --git a/README.md b/README.md index 7876241..aa2e068 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,26 @@ by clicking "Use latest/xxx version" button. - arguments: [Flawfinder command arguments](ttps://github.com/david-a-wheeler/flawfinder/blob/master/README.md#usage) - output: Flawfinder output file name. Can be uploaded to GitHub. +# SonarQube Integration + +Flawfinder integrates with SonarQube's cxx plugin. Add the Flawfinder rules in +SonarQube under "Administration > CXX External Analyers > Other Rule Definitions" +by running Flawfinder once using the following options. + +~~~~ +flawfinder --listrules --sonar [...] +~~~~ + +The in your CI/CD run Flawfinder like this to generate output that SonarQube +can understand. + +~~~~ +flawfinder -CDQ --sonar [...] +~~~~ + +You want to redirect the output to a file that you can then load into +SonarQube via the `sonar.cxx.other.reportPaths` option. + # Contributions We love contributions! For more information on contributing, see diff --git a/flawfinder.py b/flawfinder.py index f8c82a4..4db0516 100755 --- a/flawfinder.py +++ b/flawfinder.py @@ -41,6 +41,7 @@ from __future__ import division from __future__ import print_function +from xml.sax.saxutils import quoteattr import functools import sys import re @@ -88,6 +89,7 @@ csv_output = 0 # 1 = Generate CSV csv_writer = None sarif_output = 0 # 1 = Generate SARIF report +sonar_output = 0 # 1 = Generate SonarQube report omit_time = 0 # 1 = omit time-to-run (needed for testing) required_regex = None # If non-None, regex that must be met to report required_regex_compiled = None @@ -130,6 +132,68 @@ def to_json(o): return json.dumps(o, default=lambda o: o.__dict__, sort_keys=False, indent=2) +class SonarLogger(object): + _hitlist = None + + def __init__ (self, hits): + self._hitlist = hits + + def output_sonar(self): + str = '\n' + str += '\n' + for hit in self._hitlist: + file = os.path.realpath(hit.filename) + msg = quoteattr(hit.warning) + str += '\t\n' % \ + (hit.name, file, hit.line, hit.column, msg) + str += '' + return str + +class SonarRulesLogger(object): + _ruleset = None + + def __init__(self, rules): + self._ruleset = rules + + def output_rules(self): + SONAR_SEVERITIES = ["INFO", "INFO", "MINOR", "MAJOR", "CRITICAL", "BLOCKER"] + RULE_NAMES = { + 'access': 'Unsafe privileges could occur using function "%s"', + 'buffer': 'Buffer overflow using function "%s"', + 'crypto': 'Insecure cryptography using function "%s"', + 'format': 'Format string vulnerability using function "%s"', + 'input': 'Input from outside program using function "%s"', + 'integer': 'Integer overflow could occur using function "%s"', + 'misc': 'Miscellaneous finding using function "%s"', + 'obsolete': 'Obsolete function "%s"', + 'race': 'Race condition using function "%s"', + 'random': 'Insecure random function "%s"', + 'shell': 'Program execution using function "%s"', + 'tmpfile': 'Temporary file vulnerability using function "%s"', + 'free': 'Avoid usage of function "%s"' + } + str = '\n' + str += '\n' + for key in self._ruleset.keys(): + name = RULE_NAMES[self._ruleset[key][4]] % (key) + str += '\t\n' + str += '\t\tflawfinder.%s\n' % (key) + str += '\t\t%s\n' % (name) + str += '\t\t\n' + str += '\t\tflawfinder/%s\n' % (key) + str += '\t\t%s\n' % (SONAR_SEVERITIES[self._ruleset[key][1]]) + str += '\t\tVULNERABILITY\n' + str += '\t\tcwe\n' + str += '\t\tflawfinder\n' + str += '\t\tCONSTANT_ISSUE\n' + str += '\t\t2min\n' + str += '\t\n' + str += '' + return str + # The following implements the SarifLogger. # We intentionally merge all of flawfinder's functionality into 1 file # so it's trivial to copy & use elsewhere. @@ -609,6 +673,8 @@ def show(self): return if sarif_output: return + if sonar_output: + return if output_format: print("
  • ", end='') sys.stdout.write(h(self.filename)) @@ -1291,7 +1357,7 @@ def found_system(hit): "access": # ???: TODO: analyze TOCTOU more carefully. (normal, 4, - "This usually indicates a security flaw. If an " + "This usually indicates a security flaw. If an " "attacker can change anything along the path between the " "call to access() and the file's actual use (e.g., by moving " "files), the attacker can exploit the race condition (CWE-362/CWE-367!)", @@ -1332,7 +1398,7 @@ def found_system(hit): (normal, 5, "This accepts filename arguments; if an attacker " "can move those files or change the link content, " - "a race condition results. " + "a race condition results. " "Also, it does not terminate with ASCII NUL. (CWE-362, CWE-20)", # This is often just a bad idea, and it's hard to suggest a # simple alternative: @@ -1360,7 +1426,7 @@ def found_system(hit): "mkstemp": (normal, 2, - "Potential for temporary file vulnerability in some circumstances. Some older Unix-like systems create temp files with permission to write by all by default, so be sure to set the umask to override this. Also, some older Unix systems might fail to use O_EXCL when opening the file, so make sure that O_EXCL is used by the library (CWE-377)", + "Potential for temporary file vulnerability in some circumstances. Some older Unix-like systems create temp files with permission to write by all by default, so be sure to set the umask to override this. Also, some older Unix systems might fail to use O_EXCL when opening the file, so make sure that O_EXCL is used by the library (CWE-377)", "", "tmpfile", "avoid-race", {}, "FF1039"), @@ -1469,7 +1535,7 @@ def found_system(hit): "getenv|curl_getenv": (normal, 3, "Environment variables are untrustable input if they can be" - " set by an attacker. They can have any content and" + " set by an attacker. They can have any content and" " length, and the same variable can be set more than once (CWE-807, CWE-20)", "Check environment variables carefully before using them", "buffer", "", {'input': 1}, "FF1053"), @@ -1477,7 +1543,7 @@ def found_system(hit): "g_get_home_dir": (normal, 3, "This function is synonymous with 'getenv(\"HOME\")';" "it returns untrustable input if the environment can be" - "set by an attacker. It can have any content and length, " + "set by an attacker. It can have any content and length, " "and the same variable can be set more than once (CWE-807, CWE-20)", "Check environment variables carefully before using them", "buffer", "", {'input': 1}, "FF1054"), @@ -1485,7 +1551,7 @@ def found_system(hit): "g_get_tmp_dir": (normal, 3, "This function is synonymous with 'getenv(\"TMP\")';" "it returns untrustable input if the environment can be" - "set by an attacker. It can have any content and length, " + "set by an attacker. It can have any content and length, " "and the same variable can be set more than once (CWE-807, CWE-20)", "Check environment variables carefully before using them", "buffer", "", {'input': 1}, "FF1055"), @@ -1544,25 +1610,25 @@ def found_system(hit): "getlogin": (normal, 4, - "It's often easy to fool getlogin. Sometimes it does not work at all, because some program messed up the utmp file. Often, it gives only the first 8 characters of the login name. The user currently logged in on the controlling tty of our program need not be the user who started it. Avoid getlogin() for security-related purposes (CWE-807)", + "It's often easy to fool getlogin. Sometimes it does not work at all, because some program messed up the utmp file. Often, it gives only the first 8 characters of the login name. The user currently logged in on the controlling tty of our program need not be the user who started it. Avoid getlogin() for security-related purposes (CWE-807)", "Use getpwuid(geteuid()) and extract the desired information instead", "misc", "", {}, "FF1062"), "cuserid": (normal, 4, - "Exactly what cuserid() does is poorly defined (e.g., some systems use the effective uid, like Linux, while others like System V use the real uid). Thus, you can't trust what it does. It's certainly not portable (The cuserid function was included in the 1988 version of POSIX, but removed from the 1990 version). Also, if passed a non-null parameter, there's a risk of a buffer overflow if the passed-in buffer is not at least L_cuserid characters long (CWE-120)", + "Exactly what cuserid() does is poorly defined (e.g., some systems use the effective uid, like Linux, while others like System V use the real uid). Thus, you can't trust what it does. It's certainly not portable (The cuserid function was included in the 1988 version of POSIX, but removed from the 1990 version). Also, if passed a non-null parameter, there's a risk of a buffer overflow if the passed-in buffer is not at least L_cuserid characters long (CWE-120)", "Use getpwuid(geteuid()) and extract the desired information instead", "misc", "", {}, "FF1063"), "getpw": (normal, 4, - "This function is dangerous; it may overflow the provided buffer. It extracts data from a 'protected' area, but most systems have many commands to let users modify the protected area, and it's not always clear what their limits are. Best to avoid using this function altogether (CWE-676, CWE-120)", + "This function is dangerous; it may overflow the provided buffer. It extracts data from a 'protected' area, but most systems have many commands to let users modify the protected area, and it's not always clear what their limits are. Best to avoid using this function altogether (CWE-676, CWE-120)", "Use getpwuid() instead", "buffer", "", {}, "FF1064"), "getpass": (normal, 4, - "This function is obsolete and not portable. It was in SUSv2 but removed by POSIX.2. What it does exactly varies considerably between systems, particularly in where its prompt is displayed and where it gets its data (e.g., /dev/tty, stdin, stderr, etc.). In addition, some implementations overflow buffers. (CWE-676, CWE-120, CWE-20)", + "This function is obsolete and not portable. It was in SUSv2 but removed by POSIX.2. What it does exactly varies considerably between systems, particularly in where its prompt is displayed and where it gets its data (e.g., /dev/tty, stdin, stderr, etc.). In addition, some implementations overflow buffers. (CWE-676, CWE-120, CWE-20)", "Make the specific calls to do exactly what you want. If you continue to use it, or write your own, be sure to zero the password as soon as possible to avoid leaving the cleartext password visible in the process' address space", "misc", "", {'input': 1}, "FF1065"), @@ -1574,7 +1640,7 @@ def found_system(hit): "memalign": (normal, 1, - "On some systems (though not Linux-based systems) an attempt to free() results from memalign() may fail. This may, on a few systems, be exploitable. Also note that memalign() may not check that the boundary parameter is correct (CWE-676)", + "On some systems (though not Linux-based systems) an attempt to free() results from memalign() may fail. This may, on a few systems, be exploitable. Also note that memalign() may not check that the boundary parameter is correct (CWE-676)", "Use posix_memalign instead (defined in POSIX's 1003.1d). Don't switch to valloc(); it is marked as obsolete in BSD 4.3, as legacy in SUSv2, and is no longer defined in SUSv3. In some cases, malloc()'s alignment may be sufficient", "free", "", {}, "FF1067"), @@ -1586,7 +1652,7 @@ def found_system(hit): "usleep": (normal, 1, - "This C routine is considered obsolete (as opposed to the shell command by the same name). The interaction of this function with SIGALRM and other timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is unspecified (CWE-676)", + "This C routine is considered obsolete (as opposed to the shell command by the same name). The interaction of this function with SIGALRM and other timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is unspecified (CWE-676)", "Use nanosleep(2) or setitimer(2) instead", "obsolete", "", {}, "FF1069"), @@ -1955,7 +2021,10 @@ def initialize_ruleset(): if output_format: print("

    ") if list_rules: - display_ruleset(c_ruleset) + if sonar_output: + print(SonarRulesLogger(c_ruleset).output_rules()) + else: + display_ruleset(c_ruleset) sys.exit(0) @@ -1970,6 +2039,8 @@ def display_header(): return if sarif_output: return + if sonar_output: + return if not showheading: return if not displayed_header: @@ -2131,8 +2202,8 @@ def usage(): [--inputs | -I] [--minlevel X | -m X] [--falsepositive | -F] [--neverignore | -n] [--context | -c] [--columns | -C] [--dataonly | -D] - [--html | -H] [--immediate | -i] [--singleline | -S] - [--omittime] [--quiet | -Q] + [--html | -H] [--immediate | -i] [--sarif | --sonar] + [--singleline | -S] [--omittime] [--quiet | -Q] [--loadhitlist F] [--savehitlist F] [--diffhitlist F] [--] [source code file or source root directory]+ @@ -2193,6 +2264,7 @@ def usage(): --immediate | -i Immediately display hits (don't just wait until the end). --sarif Generate output in SARIF format. + --sonar Generate output in SonarQube format. --singleline | -S Single-line output. --omittime Omit time to run. @@ -2225,7 +2297,7 @@ def usage(): def process_options(): global show_context, show_inputs, allowlink, skipdotdir, omit_time global output_format, minimum_level, show_immediately, single_line - global csv_output, csv_writer, sarif_output + global csv_output, csv_writer, sarif_output, sonar_output global error_level global required_regex, required_regex_compiled global falsepositive @@ -2239,7 +2311,7 @@ def process_options(): "falsepositive", "falsepositives", "columns", "listrules", "omittime", "allowlink", "patch=", "followdotdir", "neverignore", "regex=", "quiet", "dataonly", "html", "singleline", "csv", - "error-level=", "sarif", + "error-level=", "sarif", "sonar", "loadhitlist=", "savehitlist=", "diffhitlist=", "version", "help" ]) for (opt, value) in optlist: @@ -2282,6 +2354,12 @@ def process_options(): sarif_output = 1 quiet = 1 showheading = 0 + sonar_output = 0 + elif opt == "--sonar": + sarif_output = 0 + sonar_output = 1 + quiet = 1 + showheading = 0 elif opt == "--error-level": error_level = int(value) elif opt in ("--immediate", "-i"): @@ -2538,6 +2616,8 @@ def flawfind(): if process_files(): if sarif_output: print(SarifLogger(hitlist).output_sarif()) + elif sonar_output: + print(SonarLogger(hitlist).output_sonar()) else: show_final_results() save_if_desired()