diff --git a/.travis.yml b/.travis.yml index 44ff3ad4a1..5c476df244 100644 --- a/.travis.yml +++ b/.travis.yml @@ -174,7 +174,9 @@ script: BUILD_DIR=$TRAVIS_BUILD_DIR/build make -C analyzer \ test_unit \ test_functional \ - test_tu_collector && + test_tu_collector \ + test_merge_clang_extdef_mappings \ + test_statistics_collector && if [[ "$TRAVIS_OS_NAME" = "linux" ]]; then make -C analyzer test_build_logger fi diff --git a/Makefile b/Makefile index 67520b0c43..84c10e198f 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,7 @@ CC_CLIENT = $(CC_WEB)/client/ CC_ANALYZER = $(CURRENT_DIR)/analyzer CC_TOOLS = $(CURRENT_DIR)/tools +CC_ANALYZER_TOOLS = $(CC_ANALYZER)/tools # Root of the repository. ROOT = $(CURRENT_DIR) @@ -69,7 +70,27 @@ build_report_hash: package_report_hash: build_report_hash package_dir_structure cp -r $(CC_TOOLS)/codechecker_report_hash/build/codechecker_report_hash/codechecker_report_hash $(CC_BUILD_LIB_DIR) -package: package_dir_structure set_git_commit_template package_plist_to_html package_tu_collector package_report_converter package_report_hash +build_merge_clang_extdef_mappings: + $(MAKE) -C $(CC_ANALYZER)/tools/merge_clang_extdef_mappings build + +package_merge_clang_extdef_mappings: build_merge_clang_extdef_mappings package_dir_structure + # Copy merge-clang-extdef-mappings files. + cp -r $(CC_ANALYZER)/tools/merge_clang_extdef_mappings/build/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings $(CC_BUILD_LIB_DIR) && \ + chmod u+x $(CC_BUILD_LIB_DIR)/codechecker_merge_clang_extdef_mappings/cli.py && \ + cd $(CC_BUILD_DIR) && \ + ln -sf ../lib/python3/codechecker_merge_clang_extdef_mappings/cli.py bin/merge-clang-extdef-mappings + +build_statistics_collector: + $(MAKE) -C $(CC_ANALYZER_TOOLS)/statistics_collector build + +package_statistics_collector: build_statistics_collector package_dir_structure + # Copy statistics-collector files. + cp -r $(CC_ANALYZER_TOOLS)/statistics_collector/build/statistics_collector/codechecker_statistics_collector $(CC_BUILD_LIB_DIR) && \ + chmod u+x $(CC_BUILD_LIB_DIR)/codechecker_statistics_collector/cli.py && \ + cd $(CC_BUILD_DIR) && \ + ln -sf ../lib/python3/codechecker_statistics_collector/cli.py bin/post-process-stats + +package: package_dir_structure set_git_commit_template package_plist_to_html package_tu_collector package_report_converter package_report_hash package_merge_clang_extdef_mappings package_statistics_collector BUILD_DIR=$(BUILD_DIR) BUILD_LOGGER_64_BIT_ONLY=$(BUILD_LOGGER_64_BIT_ONLY) $(MAKE) -C $(CC_ANALYZER) package_analyzer BUILD_DIR=$(BUILD_DIR) $(MAKE) -C $(CC_WEB) package_web @@ -154,7 +175,7 @@ clean_venv_dev: clean: clean_package $(MAKE) -C $(CC_WEB) clean -clean_package: clean_plist_to_html clean_tu_collector clean_report_converter clean_report_hash +clean_package: clean_plist_to_html clean_tu_collector clean_report_converter clean_report_hash clean_statistics_collector rm -rf $(BUILD_DIR) find . -name "*.pyc" -delete @@ -170,6 +191,9 @@ clean_report_converter: clean_report_hash: $(MAKE) -C $(CC_TOOLS)/codechecker_report_hash clean +clean_statistics_collector: + $(MAKE) -C $(CC_ANALYZER_TOOLS)/statistics_collector clean + clean_travis: # Clean CodeChecker config files stored in the users home directory. rm -rf ~/.codechecker* diff --git a/analyzer/Makefile b/analyzer/Makefile index 8f522b283e..4f3f0d3e58 100644 --- a/analyzer/Makefile +++ b/analyzer/Makefile @@ -54,12 +54,32 @@ package_tu_collector: build_tu_collector package_dir_structure cd $(CC_BUILD_DIR) && \ ln -sf ../lib/python3/tu_collector/tu_collector.py bin/tu_collector +build_merge_clang_extdef_mappings: + $(MAKE) -C tools/merge_clang_extdef_mappings build + +package_merge_clang_extdef_mappings: build_merge_clang_extdef_mappings package_dir_structure + # Copy plist-to-html files. + cp -r tools/merge_clang_extdef_mappings/build/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings $(CC_BUILD_LIB_DIR) && \ + chmod u+x $(CC_BUILD_LIB_DIR)/codechecker_merge_clang_extdef_mappings/cli.py && \ + cd $(CC_BUILD_DIR) && \ + ln -sf ../lib/python3/codechecker_merge_clang_extdef_mappings/cli.py bin/merge-clang-extdef-mappings + +build_statistics_collector: + $(MAKE) -C $(CC_ANALYZER_TOOLS)/statistics_collector build + +package_statistics_collector: build_statistics_collector package_dir_structure + # Copy statistics-collector files. + cp -r tools/statistics_collector/build/statistics_collector/codechecker_statistics_collector $(CC_BUILD_LIB_DIR) && \ + chmod u+x $(CC_BUILD_LIB_DIR)/codechecker_statistics_collector/cli.py && \ + cd $(CC_BUILD_DIR) && \ + ln -sf ../lib/python3/codechecker_statistics_collector/cli.py bin/post-process-stats + # This target should be used from the top level Makefile to build the package # together with the web part. This way we will not build plist-to-html # multiple times. package_analyzer: package_dir_structure -package: package_plist_to_html package_tu_collector package_analyzer +package: package_plist_to_html package_tu_collector package_analyzer package_merge_clang_extdef_mappings package_statistics_collector # Copy libraries. cp -r $(ROOT)/codechecker_common $(CC_BUILD_LIB_DIR) && \ cp -r $(CURRENT_DIR)/codechecker_analyzer $(CC_BUILD_LIB_DIR) diff --git a/analyzer/codechecker_analyzer/analysis_manager.py b/analyzer/codechecker_analyzer/analysis_manager.py index de5fd0fefa..31c392f709 100644 --- a/analyzer/codechecker_analyzer/analysis_manager.py +++ b/analyzer/codechecker_analyzer/analysis_manager.py @@ -28,12 +28,14 @@ from codechecker_common import plist_parser from codechecker_common.logger import get_logger +from codechecker_statistics_collector.collectors.special_return_value import \ + SpecialReturnValueCollector + from . import gcc_toolchain from .analyzers import analyzer_types from .analyzers.config_handler import CheckerState from .analyzers.clangsa.analyzer import ClangSA -from .analyzers.clangsa.statistics_collector import SpecialReturnValueCollector LOG = get_logger('analyzer') @@ -166,15 +168,9 @@ def is_ctu_active(source_analyzer): source_analyzer.is_ctu_enabled() -def prepare_check(action, analyzer_config, output_dir, - severity_map, skip_handler, statistics_data, - disable_ctu=False): - """ - Construct the source analyzer build the analysis command - and result handler for the analysis. - """ - reanalyzed = False - +def prepare_check(action, analyzer_config, output_dir, severity_map, + skip_handler, statistics_data, disable_ctu=False): + """ Construct the source analyzer and result handler. """ # Create a source analyzer. source_analyzer = \ analyzer_types.construct_analyzer(action, @@ -198,7 +194,11 @@ def prepare_check(action, analyzer_config, output_dir, stats_cfg = \ SpecialReturnValueCollector.checker_analyze_cfg(stats_dir) - source_analyzer.add_checker_config(stats_cfg) + if os.path.exists(SpecialReturnValueCollector.stats_file(stats_dir)): + source_analyzer.add_checker_config(stats_cfg) + else: + LOG.debug('No checker statistics file was found for %s', + SpecialReturnValueCollector.checker_analyze) # Source is the currently analyzed source file # there can be more in one buildaction. @@ -217,14 +217,7 @@ def prepare_check(action, analyzer_config, output_dir, # The analyzer output file is based on the currently # analyzed source. rh.analyzed_source_file = action.source - - if os.path.exists(rh.analyzer_result_file): - reanalyzed = True - - # Construct the analyzer cmd. - analyzer_cmd = source_analyzer.construct_analyzer_cmd(rh) - - return source_analyzer, analyzer_cmd, rh, reanalyzed + return source_analyzer, rh def handle_success(rh, result_file, result_base, skip_handler, @@ -477,10 +470,14 @@ def check(check_data): if analyzer_config is None: raise Exception("Analyzer configuration is missing.") - source_analyzer, analyzer_cmd, rh, reanalyzed = \ - prepare_check(action, analyzer_config, - output_dir, context.severity_map, - skip_handler, statistics_data) + source_analyzer, rh = prepare_check(action, analyzer_config, + output_dir, context.severity_map, + skip_handler, statistics_data) + + reanalyzed = os.path.exists(rh.analyzer_result_file) + + # Construct the analyzer cmd. + analyzer_cmd = source_analyzer.construct_analyzer_cmd(rh) # The analyzer invocation calls __create_timeout as a callback # when the analyzer starts. This callback creates the timeout @@ -594,14 +591,15 @@ def __create_timeout(analyzer_process): if ctu_active and ctu_reanalyze_on_failure: LOG.error("Try to reanalyze without CTU") # Try to reanalyze with CTU disabled. - source_analyzer, analyzer_cmd, rh, reanalyzed = \ - prepare_check(action, - analyzer_config, - output_dir, - context.severity_map, - skip_handler, - statistics_data, + source_analyzer, rh = \ + prepare_check(action, analyzer_config, + output_dir, context.severity_map, + skip_handler, statistics_data, True) + reanalyzed = os.path.exists(rh.analyzer_result_file) + + # Construct the analyzer cmd. + analyzer_cmd = source_analyzer.construct_analyzer_cmd(rh) # Fills up the result handler with # the analyzer information. diff --git a/analyzer/codechecker_analyzer/analyzer.py b/analyzer/codechecker_analyzer/analyzer.py index 419f68c012..86fd92c2b8 100644 --- a/analyzer/codechecker_analyzer/analyzer.py +++ b/analyzer/codechecker_analyzer/analyzer.py @@ -20,14 +20,17 @@ from codechecker_common.logger import get_logger +from codechecker_statistics_collector.collectors.special_return_value import \ + SpecialReturnValueCollector +from codechecker_statistics_collector.collectors.return_value import \ + ReturnValueCollector + from . import analysis_manager, pre_analysis_manager, env, checkers from .analyzers import analyzer_types from .analyzers.config_handler import CheckerState from .analyzers.clangsa.analyzer import ClangSA -from .analyzers.clangsa.statistics_collector import \ - SpecialReturnValueCollector -from .analyzers.clangsa.statistics_collector import ReturnValueCollector +from .makefile import MakeFileCreator LOG = get_logger('analyzer') @@ -110,16 +113,20 @@ def __mgr_init(): signal.signal(signal.SIGINT, signal.SIG_IGN) -def __get_statistics_data(args, manager): +def __get_statistics_data(args): + """ Get statistics data. """ statistics_data = None if 'stats_enabled' in args and args.stats_enabled: - statistics_data = manager.dict({ - 'stats_out_dir': os.path.join(args.output_path, "stats")}) + statistics_data = { + 'stats_out_dir': os.path.join(args.output_path, "stats")} if 'stats_output' in args and args.stats_output: - statistics_data = manager.dict({'stats_out_dir': - args.stats_output}) + statistics_data = {'stats_out_dir': args.stats_output} + + if statistics_data: + statistics_data['stat_tmp_dir'] = \ + os.path.join(statistics_data.get('stats_out_dir'), 'tmp') if 'stats_min_sample_count' in args and statistics_data: if args.stats_min_sample_count > 1: @@ -142,6 +149,15 @@ def __get_statistics_data(args, manager): return statistics_data +def __get_ctu_data(config_map, ctu_dir): + """ Get CTU data. """ + ctu_capability = config_map[ClangSA.ANALYZER_NAME].ctu_capability + return {'ctu_dir': ctu_dir, + 'ctu_func_map_cmd': ctu_capability.mapping_tool_path, + 'ctu_func_map_file': ctu_capability.mapping_file_name, + 'ctu_temp_fnmap_folder': 'tmpExternalFnMaps'} + + def perform_analysis(args, skip_handler, context, actions, metadata_tool, compile_cmd_count): """ @@ -237,6 +253,20 @@ def perform_analysis(args, skip_handler, context, actions, metadata_tool, metadata_tool['analyzers'][analyzer] = metadata_info + if 'makefile' in args and args.makefile: + statistics_data = __get_statistics_data(args) + + ctu_data = None + if ctu_collect or statistics_data: + ctu_data = __get_ctu_data(config_map, ctu_dir) + + makefile_creator = MakeFileCreator(analyzers, args.output_path, + config_map, context, skip_handler, + ctu_collect, statistics_data, + ctu_data) + makefile_creator.create(actions) + return + if ctu_collect: shutil.rmtree(ctu_dir, ignore_errors=True) elif ctu_analyze and not os.path.exists(ctu_dir): @@ -253,19 +283,14 @@ def perform_analysis(args, skip_handler, context, actions, metadata_tool, actions_map = create_actions_map(actions, manager) # Setting to not None value will enable statistical analysis features. - statistics_data = __get_statistics_data(args, manager) + statistics_data = __get_statistics_data(args) + if statistics_data: + statistics_data = manager.dict(statistics_data) if ctu_collect or statistics_data: ctu_data = None if ctu_collect or ctu_analyze: - ctu_capability = config_map[ClangSA.ANALYZER_NAME].ctu_capability - ctu_data = manager.dict({'ctu_dir': ctu_dir, - 'ctu_func_map_cmd': - ctu_capability.mapping_tool_path, - 'ctu_func_map_file': - ctu_capability.mapping_file_name, - 'ctu_temp_fnmap_folder': - 'tmpExternalFnMaps'}) + ctu_data = manager.dict(__get_ctu_data(config_map, ctu_dir)) pre_analyze = [a for a in actions if a.analyzer_type == ClangSA.ANALYZER_NAME] diff --git a/analyzer/codechecker_analyzer/analyzers/clangsa/ctu_manager.py b/analyzer/codechecker_analyzer/analyzers/clangsa/ctu_manager.py index e4ac2d5bbd..e3d8fcb16e 100644 --- a/analyzer/codechecker_analyzer/analyzers/clangsa/ctu_manager.py +++ b/analyzer/codechecker_analyzer/analyzers/clangsa/ctu_manager.py @@ -17,101 +17,61 @@ from codechecker_common.logger import get_logger +from codechecker_merge_clang_extdef_mappings.merge_clang_extdef_mappings \ + import merge + from .. import analyzer_base from . import ctu_triple_arch LOG = get_logger('analyzer') -def generate_func_map_lines(fnmap_dir): - """ Iterate over all lines of input files in random order. """ - - files = glob.glob(os.path.join(fnmap_dir, '*')) - for filename in files: - with open(filename, 'r', encoding='utf-8', - errors="ignore") as in_file: - for line in in_file: - yield line - - -def create_global_ctu_function_map(func_map_lines): - """ Takes iterator of individual function maps and creates a global map - keeping only unique names. We leave conflicting names out of CTU. - A function map contains the id of a function (mangled name) and the - originating source (the corresponding AST file) name.""" - - mangled_to_asts = {} - - for line in func_map_lines: - mangled_name, ast_file = line.strip().split(' ', 1) - # We collect all occurences of a function name into a list - if mangled_name not in mangled_to_asts: - mangled_to_asts[mangled_name] = {ast_file} - else: - mangled_to_asts[mangled_name].add(ast_file) - - mangled_ast_pairs = [] - - for mangled_name, ast_files in mangled_to_asts.items(): - if len(ast_files) == 1: - mangled_ast_pairs.append((mangled_name, ast_files.pop())) - - return mangled_ast_pairs +def merge_clang_extdef_mappings(ctu_dir, ctu_func_map_file, + ctu_temp_fnmap_folder): + """ Merge individual function maps into a global one.""" + triple_arches = glob.glob(os.path.join(ctu_dir, '*')) + for triple_path in triple_arches: + if not os.path.isdir(triple_path): + continue -def write_global_map(ctu_dir, arch, ctu_func_map_file, mangled_ast_pairs): - """ Write (mangled function name, ast file) pairs into final file. """ + triple_arch = os.path.basename(triple_path) + fnmap_dir = os.path.join(ctu_dir, triple_arch, + ctu_temp_fnmap_folder) - extern_fns_map_file = os.path.join(ctu_dir, arch, ctu_func_map_file) - with open(extern_fns_map_file, 'w', - encoding='utf-8', errors='ignore') as out_file: - for mangled_name, ast_file in mangled_ast_pairs: - out_file.write('%s %s\n' % (mangled_name, ast_file)) + merged_fn_map = os.path.join(ctu_dir, triple_arch, + ctu_func_map_file) + merge(fnmap_dir, merged_fn_map) + # Remove all temporary files. + shutil.rmtree(fnmap_dir, ignore_errors=True) -def merge_ctu_func_maps(ctu_dir, ctu_func_map_file, ctu_temp_fnmap_folder): - """ Merge individual function maps into a global one. - As the collect phase runs parallel on multiple threads, all compilation - units are separately mapped into a temporary file in ctu_temp_fnmap_folder. - These function maps contain the mangled names of functions and the source - (AST generated from the source) which had them. - These files should be merged at the end into a global map file: - ctu_func_map_file.""" +def generate_ast_cmd(action, config, triple_arch, source): + """ Command to generate AST (or PCH) file. """ + ast_joined_path = os.path.join(config.ctu_dir, triple_arch, 'ast', + os.path.realpath(source)[1:] + '.ast') + ast_path = os.path.abspath(ast_joined_path) + ast_dir = os.path.dirname(ast_path) - triple_arches = glob.glob(os.path.join(ctu_dir, '*')) - for triple_path in triple_arches: - if os.path.isdir(triple_path): - triple_arch = os.path.basename(triple_path) - fnmap_dir = os.path.join(ctu_dir, triple_arch, - ctu_temp_fnmap_folder) + cmd = ctu_triple_arch.get_compile_command(action, config, source) - func_map_lines = generate_func_map_lines(fnmap_dir) - mangled_ast_pairs = create_global_ctu_function_map(func_map_lines) - write_global_map(ctu_dir, triple_arch, ctu_func_map_file, - mangled_ast_pairs) + # __clang__analyzer__ macro needs to be set in the imported TUs too. + cmd.extend(['-emit-ast', '-D__clang_analyzer__', '-w', '-o', ast_path]) - # Remove all temporary files - shutil.rmtree(fnmap_dir, ignore_errors=True) + return cmd, ast_dir def generate_ast(triple_arch, action, source, config, env): """ Generates ASTs for the current compilation command. """ + cmd, ast_dir = generate_ast_cmd(action, config, triple_arch, source) - ast_joined_path = os.path.join(config.ctu_dir, triple_arch, 'ast', - os.path.realpath(source)[1:] + '.ast') - ast_path = os.path.abspath(ast_joined_path) - ast_dir = os.path.dirname(ast_path) if not os.path.isdir(ast_dir): try: os.makedirs(ast_dir) except OSError: pass - cmd = ctu_triple_arch.get_compile_command(action, config, source) - # __clang__analyzer__ macro needs to be set in the imported TUs too. - cmd.extend(['-emit-ast', '-D__clang_analyzer__', '-w', '-o', ast_path]) - cmdstr = ' '.join(cmd) LOG.debug_analyzer("Generating AST using '%s'", cmdstr) ret_code, _, err = analyzer_base.SourceAnalyzer.run_proc(cmd, @@ -140,14 +100,19 @@ def func_map_list_src_to_ast(func_src_list): return func_ast_list -def map_functions(triple_arch, action, source, config, env, - func_map_cmd, temp_fnmap_folder): - """ Generate function map file for the current source. """ - +def get_extdef_mapping_cmd(action, config, source, func_map_cmd): + """ Get command to create CTU index file. """ cmd = ctu_triple_arch.get_compile_command(action, config) cmd[0] = func_map_cmd cmd.insert(1, source) cmd.insert(2, '--') + return cmd + + +def map_functions(triple_arch, action, source, config, env, + func_map_cmd, temp_fnmap_folder): + """ Generate function map file for the current source. """ + cmd = get_extdef_mapping_cmd(action, config, source, func_map_cmd) cmdstr = ' '.join(cmd) LOG.debug_analyzer("Generating function map using '%s'", cmdstr) diff --git a/analyzer/codechecker_analyzer/analyzers/clangsa/statistics.py b/analyzer/codechecker_analyzer/analyzers/clangsa/statistics.py new file mode 100644 index 0000000000..35ae27e730 --- /dev/null +++ b/analyzer/codechecker_analyzer/analyzers/clangsa/statistics.py @@ -0,0 +1,82 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + +""" +Calculates call statistics from analysis output +""" + +from codechecker_common.logger import get_logger + +from codechecker_statistics_collector.collectors.special_return_value import \ + SpecialReturnValueCollector +from codechecker_statistics_collector.collectors.return_value import \ + ReturnValueCollector + +from ..flag import has_flag +from ..flag import prepend_all + +LOG = get_logger('analyzer') + + +def build_stat_coll_cmd(action, config, source): + """ + Build the statistics collector analysis command. + """ + + cmd = [config.analyzer_binary, '-c', '-x', action.lang, '--analyze', + # Do not warn about the unused gcc/g++ arguments. + '-Qunused-arguments', + '--analyzer-output', 'text'] + + for plugin in config.analyzer_plugins: + cmd.extend(["-Xclang", "-plugin", + "-Xclang", "checkercfg", + "-Xclang", "-load", + "-Xclang", plugin]) + + cmd.extend(['-Xclang', + '-analyzer-opt-analyze-headers']) + + cmd.extend(config.analyzer_extra_arguments) + cmd.extend(action.analyzer_options) + + # Enable the statistics collector checkers only. + collector_checkers = [] + for checker_name, _ in config.checks().items(): + if SpecialReturnValueCollector.checker_collect in checker_name: + collector_checkers.append(checker_name) + + if ReturnValueCollector.checker_collect in checker_name: + collector_checkers.append(checker_name) + + if not collector_checkers: + LOG.debug('No available statistics collector checkers were found') + return [], False + + for coll_check in collector_checkers: + cmd.extend(['-Xclang', + '-analyzer-checker=' + coll_check]) + + compile_lang = action.lang + if not has_flag('-x', cmd): + cmd.extend(['-x', compile_lang]) + + if not has_flag('--target', cmd) and \ + action.target.get(compile_lang, "") != "": + cmd.append("--target=" + action.target[compile_lang]) + + if not has_flag('-std', cmd) and not has_flag('--std', cmd): + cmd.append(action.compiler_standard.get(compile_lang, "")) + + cmd.extend(prepend_all( + '-isystem', + action.compiler_includes.get(compile_lang, []))) + + if source: + cmd.append(source) + return cmd, True diff --git a/analyzer/codechecker_analyzer/analyzers/clangsa/statistics_collector.py b/analyzer/codechecker_analyzer/analyzers/clangsa/statistics_collector.py deleted file mode 100644 index 87c23233f3..0000000000 --- a/analyzer/codechecker_analyzer/analyzers/clangsa/statistics_collector.py +++ /dev/null @@ -1,362 +0,0 @@ -# ------------------------------------------------------------------------- -# -# Part of the CodeChecker project, under the Apache License v2.0 with -# LLVM Exceptions. See LICENSE for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ------------------------------------------------------------------------- - -""" -Calculates call statistics from analysis output -""" - - -from io import StringIO - -from collections import defaultdict -import os -import re - -from codechecker_common.logger import get_logger - -from ..flag import has_flag -from ..flag import prepend_all - -LOG = get_logger('analyzer') - - -def build_stat_coll_cmd(action, config, source): - """ - Build the statistics collector analysis command. - """ - - cmd = [config.analyzer_binary, '-c', '-x', action.lang, '--analyze', - # Do not warn about the unused gcc/g++ arguments. - '-Qunused-arguments', - '--analyzer-output', 'text'] - - for plugin in config.analyzer_plugins: - cmd.extend(["-Xclang", "-plugin", - "-Xclang", "checkercfg", - "-Xclang", "-load", - "-Xclang", plugin]) - - cmd.extend(['-Xclang', - '-analyzer-opt-analyze-headers']) - - cmd.extend(config.analyzer_extra_arguments) - cmd.extend(action.analyzer_options) - - # Enable the statistics collector checkers only. - collector_checkers = [] - for checker_name, _ in config.checks().items(): - if SpecialReturnValueCollector.checker_collect in checker_name: - collector_checkers.append(checker_name) - - if ReturnValueCollector.checker_collect in checker_name: - collector_checkers.append(checker_name) - - if not collector_checkers: - LOG.debug('No available statistics collector checkers were found') - return [], False - - for coll_check in collector_checkers: - cmd.extend(['-Xclang', - '-analyzer-checker=' + coll_check]) - - compile_lang = action.lang - if not has_flag('-x', cmd): - cmd.extend(['-x', compile_lang]) - - if not has_flag('--target', cmd) and \ - action.target.get(compile_lang, "") != "": - cmd.append("--target=" + action.target[compile_lang]) - - if not has_flag('-std', cmd) and not has_flag('--std', cmd): - cmd.append(action.compiler_standard.get(compile_lang, "")) - - cmd.extend(prepend_all( - '-isystem', - action.compiler_includes.get(compile_lang, []))) - - if source: - cmd.append(source) - return cmd, True - - -class SpecialReturnValueCollector(object): - """ - Collect special return value statistics. - - This script lists functions of which the return - - value is checked for negative (integers) or null (pointers). - """ - - # Checker name used for pre analysis. - checker_collect = 'statisticscollector.SpecialReturnValue' - - # Checker name which runs the analysis. - checker_analyze = 'statisticsbased.SpecialReturnValue' - - def __init__(self, stats_min_sample_count, - stats_relevance_threshold): - - self.stats_min_sample_count = stats_min_sample_count - self.stats_relevance_threshold = stats_relevance_threshold - # Matching these lines - """"/.../x.c:551:12: warning: - Special Return Value:/.../x.c:551:12,parsedate,0,0 - """ - ptrn = \ - r'.*warning: Special Return Value:'\ - '.*:[0-9]*:[0-9]*.*,(.*),([0,1]),([0,1])' - self.special_ret_val_regexp = re.compile(ptrn) - - # collected statistics - self.stats = { - 'total': defaultdict(int), - 'nof_negative': defaultdict(int), - 'nof_null': defaultdict(int) - } - - @staticmethod - def stats_file(path): - return os.path.join(path, 'SpecialReturn.yaml') - - @staticmethod - def checker_analyze_cfg(path): - """ - Return the checker config parameter for the analyzer checker. - """ - if not os.path.exists(SpecialReturnValueCollector.stats_file(path)): - LOG.debug('No checker statistics file was found for %s', - SpecialReturnValueCollector.checker_analyze) - return [] - else: - return ['-Xclang', '-analyzer-config', - '-Xclang', - 'alpha.ericsson.statisticsbased:APIMetadataPath=' + path] - - def total(self): - return self.stats.get('total') - - def nof_null(self): - return self.stats.get('nof_null') - - def nof_negative(self): - return self.stats.get('nof_negative') - - def process_line(self, line): - """ - Match regex on the line - """ - m = self.special_ret_val_regexp.match(line) - if m: - func = m.group(1) - ret_negative = m.group(2) - ret_null = m.group(3) - - self.stats['total'][func] += 1 - self.stats['nof_negative'][func] += int(ret_negative) - self.stats['nof_null'][func] += int(ret_null) - - def filter_stats(self): - - neg = [] - null = [] - stats = self.stats - total = stats.get('total') - - for key in sorted(stats.get('total').keys()): - negative_ratio = stats['nof_negative'][key]/stats['total'][key] - if (self.stats_relevance_threshold < negative_ratio < 1 and - total[key] >= self.stats_min_sample_count): - neg.append(key) - - null_ratio = stats['nof_null'][key]/stats['total'][key] - if (self.stats_relevance_threshold < null_ratio < 1 and - total[key] >= self.stats_min_sample_count): - null.append(key) - return neg, null - - def get_yaml(self): - """ - FIXME proper yaml generation. - """ - stats_yaml = StringIO() - - stats_yaml.write("#\n") - stats_yaml.write("# SpecialReturn metadata format 1.0\n") - neg, null = self.filter_stats() - - for n in neg: - stats_yaml.write( - "{name: " + n + ", relation: LT, value: 0}\n") - for n in null: - stats_yaml.write( - "{name: " + n + ", relation: EQ, value: 0}\n") - - return stats_yaml.getvalue() - - -class ReturnValueCollector(object): - """ - Collect return value statistics. - This script lists functions of which the return value is mostly checked. - """ - - # Checker name used for pre analysis. - checker_collect = 'statisticscollector.ReturnValueCheck' - - # Checker name which runs the analysis. - checker_analyze = 'statisticsbased.UncheckedReturnValue' - - def __init__(self, stats_min_sample_count, - stats_relevance_threshold): - - self.stats_min_sample_count = stats_min_sample_count - self.stats_relevance_threshold = stats_relevance_threshold - # Matching these lines - """ - /.../x.c:551:12: - warning: Return Value Check:/.../x.c:551:12,parsedate,0 - """ - - self.ret_val_regexp = \ - re.compile(r'.*warning: Return Value Check:' - '.*:[0-9]*:[0-9]*.*,(.*),([0,1])') - - self.stats = {'total': defaultdict(int), - 'nof_unchecked': defaultdict(int)} - - @staticmethod - def stats_file(path): - return os.path.join(path, 'UncheckedReturn.yaml') - - @staticmethod - def checker_analyze_cfg(path): - """ - Return the checker config parameter for the analyzer checker. - """ - if not os.path.exists(ReturnValueCollector.stats_file(path)): - LOG.debug('No checker statistics file was found for %s', - ReturnValueCollector.checker_analyze) - return [] - else: - return ['-Xclang', '-analyzer-config', - '-Xclang', - 'alpha.ericsson.statisticsbased:APIMetadataPath=' + path] - - def total(self): - return self.stats.get('total') - - def nof_unchecked(self): - return self.stats.get('nof_unchecked') - - def unchecked(self): - return self.stats.get('unchecked') - - def process_line(self, line): - """ - Match regex on the line - """ - m = self.ret_val_regexp.match(line) - if m: - func = m.group(1) - checked = m.group(2) - self.stats['total'][func] += 1 - self.stats['nof_unchecked'][func] += int(checked) - - def filter_stats(self): - """ - Filter the collected statistics based on the threshold. - Return a lisf of function names where the return value - was unchecked above the threshold. - """ - unchecked_functions = [] - total = self.stats.get('total') - for key in sorted(total): - checked_ratio = 1 - \ - self.stats['nof_unchecked'][key]/self.stats['total'][key] - if (self.stats_relevance_threshold < checked_ratio < 1 and - self.stats['total'][key] >= self.stats_min_sample_count): - unchecked_functions.append(key) - return unchecked_functions - - def get_yaml(self): - """ - FIXME proper yaml generation. - """ - stats_yaml = StringIO() - - stats_yaml.write("#\n") - stats_yaml.write("# UncheckedReturn metadata format 1.0\n") - for function_name in self.filter_stats(): - stats_yaml.write("- " + function_name + '\n') - - return stats_yaml.getvalue() - - -def postprocess_stats(clang_output_dir, stats_dir, stats_min_sample_count, - stats_relevance_threshold): - """ - Read the clang analyzer outputs where the statistics emitter checkers - were enabled and collect the statistics. - - After the statistics collection cleanup the output files. - """ - - # Statistics yaml files will be stored in stats_dir - try: - os.stat(stats_dir) - except Exception as ex: - LOG.debug(ex) - os.mkdir(stats_dir) - - if not os.path.exists(clang_output_dir): - LOG.debug("No statistics directory was found") - return - - clang_outs = [] - try: - for f in os.listdir(clang_output_dir): - if os.path.isfile(os.path.join(clang_output_dir, f)): - clang_outs.append(os.path.join(clang_output_dir, f)) - except OSError as oerr: - LOG.debug(oerr) - LOG.debug("Statistics can not be collected.") - LOG.debug("Analyzer output error.") - return - - if not clang_outs: - LOG.warning("No output files were found to collect statistics.") - return - ret_collector = ReturnValueCollector(stats_min_sample_count, - stats_relevance_threshold) - special_ret_collector =\ - SpecialReturnValueCollector(stats_min_sample_count, - stats_relevance_threshold) - - for clang_output in clang_outs: - with open(clang_output, 'r', - encoding='utf-8', errors='ignore') as out: - clang_output = "" - for line in out: - clang_output += line + "\n" - ret_collector.process_line(line) - special_ret_collector.process_line(line) - LOG.debug("Collecting statistics finished.") - - # Write out statistics. - unchecked_yaml = ReturnValueCollector.stats_file(stats_dir) - LOG.debug("Writing out statistics to %s", unchecked_yaml) - with open(unchecked_yaml, 'w', - encoding='utf-8', errors='ignore') as uyaml: - uyaml.write(ret_collector.get_yaml()) - - special_ret_yaml = SpecialReturnValueCollector.stats_file(stats_dir) - LOG.debug("Writing out statistics to %s", special_ret_yaml) - with open(special_ret_yaml, 'w', - encoding='utf-8', errors='ignore') as uyaml: - uyaml.write(special_ret_collector.get_yaml()) diff --git a/analyzer/codechecker_analyzer/analyzers/clangtidy/analyzer.py b/analyzer/codechecker_analyzer/analyzers/clangtidy/analyzer.py index a2900996fa..6b882a2629 100644 --- a/analyzer/codechecker_analyzer/analyzers/clangtidy/analyzer.py +++ b/analyzer/codechecker_analyzer/analyzers/clangtidy/analyzer.py @@ -172,7 +172,7 @@ def construct_analyzer_cmd(self, result_handler): analyzer_cmd.extend(config.analyzer_extra_arguments) if config.checker_config and config.checker_config != '{}': - analyzer_cmd.append('-config=' + config.checker_config) + analyzer_cmd.append("-config=" + config.checker_config) analyzer_cmd.append(self.source_file) diff --git a/analyzer/codechecker_analyzer/cmd/analyze.py b/analyzer/codechecker_analyzer/cmd/analyze.py index e8a310169e..f3b9ff4b64 100644 --- a/analyzer/codechecker_analyzer/cmd/analyze.py +++ b/analyzer/codechecker_analyzer/cmd/analyze.py @@ -197,6 +197,19 @@ def add_arguments_to_parser(parser): help="Specify the format the analysis results should " "use.") + parser.add_argument('--makefile', + dest="makefile", + required=False, + action='store_true', + default=False, + help="Generate a Makefile in the given output " + "directory from the analyzer commands and do not " + "execute the analysis. The analysis can be " + "executed by calling the make command like " + "'make -f output_dir/Makefile'. You can ignore " + "errors with the -i/--ignore-errors options: " + "'make -f output_dir/Makefile -i'.") + parser.add_argument('-q', '--quiet', dest="quiet", action='store_true', diff --git a/analyzer/codechecker_analyzer/makefile.py b/analyzer/codechecker_analyzer/makefile.py new file mode 100644 index 0000000000..133f6cd36f --- /dev/null +++ b/analyzer/codechecker_analyzer/makefile.py @@ -0,0 +1,315 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" Creates a Makefile from analyzer actions. """ + +import hashlib +import os +import shlex +import uuid + +from codechecker_common.logger import get_logger + +from codechecker_statistics_collector.collectors.special_return_value import \ + SpecialReturnValueCollector + +from . import analysis_manager, env +from .analyzers.clangsa.analyzer import ClangSA +from .analyzers.clangsa.ctu_manager import generate_ast_cmd, \ + get_extdef_mapping_cmd +from .analyzers.clangsa.ctu_triple_arch import get_triple_arch +from .analyzers.clangsa.statistics import build_stat_coll_cmd +from .analyzers.clangtidy.analyzer import ClangTidy + + +LOG = get_logger('analyzer') + + +class MakeFileCreator(object): + """ Creates a Makefile from analyzer actions. """ + + def __init__(self, analyzers, output_path, config_map, context, + skip_handler, pre_analysis, statistics_data, ctu_data): + self.__analyzers = analyzers + self.__output_path = output_path + self.__config_map = config_map + self.__context = context + self.__skip_handler = skip_handler + self.__pre_analysis = pre_analysis + self.__log_info = "[`date +'%Y-%m-%d %H:%M:%S'`] -" + + self.__ctu_data = ctu_data + self.__ctu_dir = None + self.__ctu_temp_fnmap_folder = None + if ctu_data: + self.__ctu_dir = ctu_data['ctu_dir'] if ctu_data else None + self.__ctu_temp_fnmap_folder = ctu_data['ctu_temp_fnmap_folder'] + + self.__statistics_data = statistics_data + self.__stat_tmp_dir = None + self.__stats_dir = None + if statistics_data: + self.__stat_tmp_dir = statistics_data['stat_tmp_dir'] + self.__stats_dir = statistics_data['stats_out_dir'] + + self.__makefile = os.path.join(output_path, 'Makefile') + + self.__analyzer_env = env.extend(context.path_env_extra, + context.ld_lib_path_extra) + + self.__config = None + self.__func_map_cmd = None + if ClangSA.ANALYZER_NAME in config_map: + self.__config = config_map[ClangSA.ANALYZER_NAME] + + ctu_capability = config_map[ClangSA.ANALYZER_NAME].ctu_capability + self.__func_map_cmd = ctu_capability.mapping_tool_path + + def __format_analyzer_type(self, analyzer_type): + """ Format the given analyzer type. """ + return analyzer_type.replace('-', '') + + def __get_target_name(self, action): + """ Get target name for the given action. """ + analyzer_name = self.__format_analyzer_type(action.analyzer_type) + target_name = analyzer_name + '_' + action.source + '_' + \ + action.original_command + return hashlib.md5(target_name.encode('utf-8')).hexdigest() + + def __write_header(self, mfile): + """ Write header section to the given file. + + Write a header section to this file which tells the user that this + file is auto generated by CodeChecker and print out the exact + CodeChecker version. + """ + mfile.write("#\n# Autogenerated by CodeChecker v{0}.\n#\n" + "# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT " + "YOU ARE DOING.\n#\n\n".format(self.__context.version)) + + def __write_env_exports(self, mfile): + """ Exports environment variables. """ + package_root = os.getenv("CC_PACKAGE_ROOT") + if package_root: + bin_dir = os.path.join(package_root, 'bin') + python3_bin = os.path.join(package_root, 'python3', 'bin') + + mfile.write('export PATH := {0}:{1}:${{PATH}}\n'.format( + bin_dir, python3_bin)) + + for env_var in ["LD_LIBRARY_PATH", "PYTHONPATH", "PYTHONHOME"]: + value = os.getenv(env_var) + if value: + mfile.write('export {0} := {1}\n'.format(env_var, value)) + + mfile.write('\n') + + def __write_default_targets(self, mfile): + """ Write default targets to the given file. + + This will add 'all' target to be the default target and creates an + 'all_' target to run all analysis of the specified + analyzer. + """ + mfile.write("# Default target to run all analysis.\n" + "default: all\n\n") + + for analyzer in self.__analyzers: + analyzer_name = self.__format_analyzer_type(analyzer) + mfile.write("# Target to run only '{0}' analysis.\n" + "all: all_{0}\n\n".format(analyzer_name)) + + def __get_ctu_pre_analysis_cmds(self, action): + """ Get CTU pre-analysis commands. """ + cmds = [] + + # Get architecture part of the target triple. + triple_arch = get_triple_arch(action, action.source, + self.__config, self.__analyzer_env) + + # Get command to generate PCH file. + cmd, ast_dir = generate_ast_cmd(action, self.__config, + triple_arch, action.source) + cmds.append('mkdir -p {0}'.format(ast_dir)) + cmds.append(' '.join(cmd)) + + # Get command to create CTU index file. + cmd = get_extdef_mapping_cmd(action, self.__config, + action.source, self.__func_map_cmd) + + fnmap_tmp_dir = os.path.join(self.__ctu_dir, triple_arch, + self.__ctu_temp_fnmap_folder) + cmds.append('mkdir -p {0}'.format(fnmap_tmp_dir)) + + func_def_map = os.path.join(fnmap_tmp_dir, str(uuid.uuid4())) + cmds.append('{0} > {1} 2>/dev/null'.format(' '.join(cmd), + func_def_map)) + + # Modify externalDefMap.txt file to contain relative paths and + # modify the extension to '.cpp.ast'. + # The sed command is a bit different in Mac OS X, the ā€˜-iā€™ option + # requires a parameter to tell what extension to add for the backup + # file. For this reason we do not use this option, instead redirects + # the sed output to a temp file and overwrite the original file with + # this file. + tmp_func_def_map = func_def_map + '_tmp' + cmds.append('sed -E "s|/(.*)|ast/\\1.ast|" {0} > {1}'.format( + func_def_map, tmp_func_def_map)) + cmds.append('mv -f {0} {1}'.format(tmp_func_def_map, func_def_map)) + + return cmds + + def __get_stats_pre_analysis_cmds(self, action): + """ Get statistics pre-analysis commands. """ + cmds = [] + + stats_cmd, can_collect = build_stat_coll_cmd(action, self.__config, + action.source) + if can_collect: + cmds.append('mkdir -p ' + self.__stat_tmp_dir) + _, source_filename = os.path.split(action.source) + output_id = source_filename + str(uuid.uuid4()) + '.stat' + + stat_for_source = os.path.join(self.__stat_tmp_dir, output_id) + cmds.append('{0} > {1} 2>&1'.format(' '.join(stats_cmd), + stat_for_source)) + + return cmds + + def __write_pre_analysis_targets(self, mfile, action, pre_all_target): + """ Creates the pre-analysis targets. """ + pre_all_cmds = [] + + # Get CTU pre-analysis commands. + if self.__ctu_data: + pre_all_cmds.extend(self.__get_ctu_pre_analysis_cmds(action)) + + # Get statistics pre-analysis commands. + if self.__statistics_data: + pre_all_cmds.extend(self.__get_stats_pre_analysis_cmds(action)) + + commands = '\n'.join(['\t@' + c for c in pre_all_cmds]) + + target = self.__get_target_name(action) + mfile.write('{0}:\n' + '\t@echo "{4} Pre-analysis of {3}."\n' + '{1}\n' + '{2}: {0}\n\n'.format('pre_' + target, + commands, + pre_all_target, + action.source, + self.__log_info)) + + def __write_post_pre_analysis_targets(self, mfile, pre_all_target): + """ Creates targets to post-process pre-analysis results. """ + # Get CTU pre-analysis commands. + post_all_cmds = [] + + if self.__ctu_data: + # Merge individual function maps into a global one. + post_all_cmds.append("find {0} -maxdepth 1 -mindepth 1 -type d " + "-exec merge-clang-extdef-mappings " + "-i {{}}/{1} -o {{}}/externalDefMap.txt " + "\\;".format( + self.__ctu_dir, + self.__ctu_temp_fnmap_folder)) + + if self.__statistics_data: + # Collect statistics from the clang analyzer output. + post_all_cmds.append("post-process-stats -i {0} {1}".format( + self.__stat_tmp_dir, + self.__stats_dir)) + + commands = '\n'.join(['\t@' + c for c in post_all_cmds]) + + mfile.write('post_{0}: {0}\n' + '{1}\n\n'.format(pre_all_target, + commands)) + + def __write_analysis_targets(self, mfile, action, post_pre_all_target): + """ Creates normal analysis targets. """ + source_analyzer, rh = analysis_manager.prepare_check( + action, self.__config_map.get(action.analyzer_type), + self.__output_path, self.__context.severity_map, + self.__skip_handler, self.__statistics_data) + + if self.__statistics_data and post_pre_all_target: + stats_cfg = SpecialReturnValueCollector.checker_analyze_cfg( + self.__stats_dir) + + source_analyzer.add_checker_config(stats_cfg) + + analyzer_cmd = source_analyzer.construct_analyzer_cmd(rh) + + # Escape elements before join theme into one string. + analyzer_cmd = map(shlex.quote, analyzer_cmd) + + target = self.__get_target_name(action) + analyzer_name = self.__format_analyzer_type(action.analyzer_type) + + if action.analyzer_type == ClangTidy.ANALYZER_NAME: + analyzer_output_file = rh.analyzer_result_file + ".output" + file_name = "{source_file}_{analyzer}_" + target + report_converter_cmd = ["report-converter", + "-t", "clang-tidy", + "-o", self.__output_path, + "--filename", file_name, + analyzer_output_file] + + command = "@{0} > {1}\n" \ + "\t@{2} 1>/dev/null\n" \ + "\t@rm -rf {1}\n".format(' '.join(analyzer_cmd), + analyzer_output_file, + ' '.join(report_converter_cmd)) + else: + command = "@{0} 1>/dev/null".format(' '.join(analyzer_cmd)) + + mfile.write('{0}: {1}\n' + '\t@echo "{6} {4} analyze {5}."\n' + '\t{2}\n' + 'all_{3}: {0}\n\n'.format(target, + post_pre_all_target, + command, + analyzer_name, + action.analyzer_type, + action.source, + self.__log_info)) + + def create(self, actions): + """ Creates a Makefile from the given actions. """ + LOG.info("Creating Makefile from the analyzer commands: '%s'...", + self.__makefile) + + with open(self.__makefile, 'w+', + encoding='utf-8', errors='ignore') as mfile: + self.__write_header(mfile) + self.__write_env_exports(mfile) + self.__write_default_targets(mfile) + + clangsa_analyzer_name = \ + self.__format_analyzer_type(ClangSA.ANALYZER_NAME) + pre_all_target = 'pre_all_' + clangsa_analyzer_name + + for action in actions: + need_pre_analysis_targets = self.__pre_analysis and \ + action.analyzer_type == ClangSA.ANALYZER_NAME + + post_pre_all_target = '' + if need_pre_analysis_targets: + self.__write_pre_analysis_targets(mfile, action, + pre_all_target) + post_pre_all_target = 'post_' + pre_all_target + + self.__write_analysis_targets(mfile, action, + post_pre_all_target) + + # Write targets which will be run after pre-analysis phases + # to post-process the results. + if self.__pre_analysis: + self.__write_post_pre_analysis_targets(mfile, + pre_all_target) + LOG.info("Done.") diff --git a/analyzer/codechecker_analyzer/pre_analysis_manager.py b/analyzer/codechecker_analyzer/pre_analysis_manager.py index 5d07b46ad7..ced2d6c5ab 100644 --- a/analyzer/codechecker_analyzer/pre_analysis_manager.py +++ b/analyzer/codechecker_analyzer/pre_analysis_manager.py @@ -21,9 +21,11 @@ from codechecker_analyzer import env from codechecker_common.logger import get_logger +from codechecker_statistics_collector import post_process_stats + from .analyzers import analyzer_base from .analyzers.clangsa import ctu_manager, ctu_triple_arch -from .analyzers.clangsa import statistics_collector +from .analyzers.clangsa import statistics from .analyzers.clangsa.analyzer import ClangSA @@ -36,9 +38,8 @@ def collect_statistics(action, source, clangsa_config, Run the statistics collection command and save the stdout and stderr to a file. """ - cmd, can_collect = statistics_collector.build_stat_coll_cmd(action, - clangsa_config, - source) + cmd, can_collect = statistics.build_stat_coll_cmd(action, clangsa_config, + source) if not can_collect: LOG.debug('Can not collect statistical data.') @@ -172,8 +173,7 @@ def signal_handler(signum, frame): if statistics_data: # Statistics collection is enabled setup temporary # directories. - stat_tmp_dir = os.path.join(statistics_data.get('stats_out_dir'), - 'tmp') + stat_tmp_dir = statistics_data['stat_tmp_dir'] # Cleaning previous outputs. if os.path.exists(stat_tmp_dir): @@ -181,8 +181,6 @@ def signal_handler(signum, frame): os.makedirs(stat_tmp_dir) - statistics_data['stat_tmp_dir'] = stat_tmp_dir - try: collect_actions = [(build_action, context, @@ -202,7 +200,7 @@ def signal_handler(signum, frame): # Postprocessing the pre analysis results. if ctu_data: - ctu_manager.merge_ctu_func_maps( + ctu_manager.merge_clang_extdef_mappings( ctu_data.get('ctu_dir'), ctu_data.get('ctu_func_map_file'), ctu_data.get('ctu_temp_fnmap_folder')) @@ -212,12 +210,11 @@ def signal_handler(signum, frame): stats_in = statistics_data.get('stat_tmp_dir') stats_out = statistics_data.get('stats_out_dir') - statistics_collector.postprocess_stats(stats_in, stats_out, - statistics_data.get( - 'stats_min_sample_count'), - statistics_data.get( - 'stats_relevance_threshold') - ) + post_process_stats.process(stats_in, stats_out, + statistics_data.get( + 'stats_min_sample_count'), + statistics_data.get( + 'stats_relevance_threshold')) if os.path.exists(stats_in): LOG.debug('Cleaning up temporary statistics directory') diff --git a/analyzer/tests/Makefile b/analyzer/tests/Makefile index b099c90794..a9bf0edec9 100644 --- a/analyzer/tests/Makefile +++ b/analyzer/tests/Makefile @@ -20,8 +20,9 @@ pycodestyle: pycodestyle_in_env: venv_dev $(ACTIVATE_DEV_VENV) && $(PYCODESTYLE_TEST_CMD) -PYLINT_TEST_CMD = PYLINTRC=$(ROOT)/.pylintrc \ - pylint ./bin/** ./codechecker_analyzer ./tests/** +PYLINT_TEST_CMD = $(MAKE) -C $(CURRENT_DIR)/tools/merge_clang_extdef_mappings pylint && \ + $(MAKE) -C $(CURRENT_DIR)/tools/statistics_collector pylint && \ + PYLINTRC=$(ROOT)/.pylintrc pylint ./bin/** ./codechecker_analyzer ./tests/** pylint: $(PYLINT_TEST_CMD) @@ -65,3 +66,17 @@ test_tu_collector: test_tu_collector_in_env: $(ACTIVATE_DEV_VENV) && \ $(REPO_ROOT) make -C $(ROOT)/tools/tu_collector test + +test_merge_clang_extdef_mappings: + make -C tools/merge_clang_extdef_mappings test + +test_merge_clang_extdef_mappings_in_env: + $(ACTIVATE_DEV_VENV) && \ + make -C tools/merge_clang_extdef_mappings test + +test_statistics_collector: + make -C tools/statistics_collector test + +test_statistics_collector_in_env: + $(ACTIVATE_DEV_VENV) && \ + make -C tools/statistics_collector test diff --git a/analyzer/tests/functional/analyze/test_analyze.py b/analyzer/tests/functional/analyze/test_analyze.py index fb4cbce2a1..2d436aa450 100644 --- a/analyzer/tests/functional/analyze/test_analyze.py +++ b/analyzer/tests/functional/analyze/test_analyze.py @@ -11,7 +11,7 @@ Test case for the CodeChecker analyze command's direct functionality. """ - +import glob import json import os import re @@ -809,8 +809,60 @@ def test_multiple_invalid_checker_names(self): self.assertTrue("other.missing.checker" in out) errcode = process.returncode + self.assertEqual(errcode, 0) + def test_makefile_generation(self): + """ Test makefile generation. """ + build_json = os.path.join(self.test_workspace, "build_extra_args.json") + analyze_cmd = [self._codechecker_cmd, "analyze", build_json, + "-o", self.report_dir, '--makefile'] + + source_file = os.path.join(self.test_dir, "extra_args.c") + build_log = [{"directory": self.test_workspace, + "command": "gcc -DTIDYARGS -c " + source_file, + "file": source_file + }, + {"directory": self.test_workspace, + "command": "gcc -DSAARGS -DTIDYARGS -c " + source_file, + "file": source_file + }] + + with open(build_json, 'w', + encoding="utf-8", errors="ignore") as outfile: + json.dump(build_log, outfile) + + process = subprocess.Popen( + analyze_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=self.test_dir, + encoding="utf-8", + errors="ignore") + process.communicate() + + errcode = process.returncode + self.assertEqual(errcode, 0) + + # Check the existence of the Makefile. + makefile = os.path.join(self.report_dir, 'Makefile') + self.assertTrue(os.path.exists(makefile)) + + # Run the generated Makefile and check the return code of it. + process = subprocess.Popen(["make"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=self.report_dir, + encoding="utf-8", + errors="ignore") + process.communicate() + + errcode = process.returncode + self.assertEqual(errcode, 0) + + plist_files = glob.glob(os.path.join(self.report_dir, '*.plist')) + self.assertEqual(len(plist_files), 4) + def test_analyzer_and_checker_config(self): """Test analyzer configuration through command line flags.""" build_json = os.path.join(self.test_workspace, "build_success.json") diff --git a/analyzer/tests/functional/ctu/test_ctu.py b/analyzer/tests/functional/ctu/test_ctu.py index fc4a12a0a2..ac9d02abf7 100644 --- a/analyzer/tests/functional/ctu/test_ctu.py +++ b/analyzer/tests/functional/ctu/test_ctu.py @@ -158,3 +158,22 @@ def __check_ctu_analyze(self, output): self.assertIn("no defects in main.c", output) self.assertIn("lib.c:3:", output) self.assertIn("[core.NullDereference]", output) + + def test_ctu_makefile_generation(self): + """ Test makefile generation in CTU mode. """ + if not self.ctu_capable: + self.skipTest(NO_CTU_MESSAGE) + + cmd = [self._codechecker_cmd, 'analyze', '-o', self.report_dir, + '--analyzers', 'clangsa', '--ctu', '--makefile'] + cmd.append(self.buildlog) + call_command(cmd, cwd=self.test_dir, env=self.env) + + call_command(["make"], cwd=self.report_dir, env=self.env) + + # Check the output. + cmd = [self._codechecker_cmd, 'parse', self.report_dir] + output, _ = call_command(cmd, cwd=self.test_dir, env=self.env) + self.assertIn("defect(s) in lib.c", output) + self.assertIn("lib.c:3:", output) + self.assertIn("[core.NullDereference]", output) diff --git a/analyzer/tests/unit/__init__.py b/analyzer/tests/unit/__init__.py index 50969a75db..5e173885e2 100644 --- a/analyzer/tests/unit/__init__.py +++ b/analyzer/tests/unit/__init__.py @@ -18,3 +18,5 @@ sys.path.append(REPO_ROOT) sys.path.append(os.path.join(REPO_ROOT, 'tools', 'codechecker_report_hash')) +sys.path.append(os.path.join(REPO_ROOT, 'analyzer', 'tools', + 'statistics_collector')) diff --git a/analyzer/tools/merge_clang_extdef_mappings/.gitignore b/analyzer/tools/merge_clang_extdef_mappings/.gitignore new file mode 100644 index 0000000000..e195e845f1 --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/.gitignore @@ -0,0 +1,3 @@ +build/ +dist/ +merge_clang_extdef_mappings.egg-info diff --git a/analyzer/tools/merge_clang_extdef_mappings/.noserc b/analyzer/tools/merge_clang_extdef_mappings/.noserc new file mode 100644 index 0000000000..512f4e1a08 --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/.noserc @@ -0,0 +1,13 @@ +[nosetests] + +# increase verbosity level +verbosity=3 + +# more detailed error messages on failed asserts +detailed-errors=1 + +# stop running tests on first error +stop=1 + +# do not capture stdout +#nocapture=1 diff --git a/analyzer/tools/merge_clang_extdef_mappings/.pylintrc b/analyzer/tools/merge_clang_extdef_mappings/.pylintrc new file mode 100644 index 0000000000..4822958e6c --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/.pylintrc @@ -0,0 +1,377 @@ +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. +jobs=1 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist= + +# Allow optimization of some AST trees. This will activate a peephole AST +# optimizer, which will apply various small optimizations. For instance, it can +# be used to obtain the result of joining multiple strings with the addition +# operator. Joining a lot of strings can lead to a maximum recursion error in +# Pylint and this flag can prevent that. It has one side effect, the resulting +# AST will be different than the one from reality. +optimize-ast=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable=all + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time. See also the "--disable" option for examples. +enable=logging-format-interpolation,old-style-class,unused-import,unused-variable,len-as-condition,bad-indentation,unpacking-in-except,import-star-module-level,parameter-unpacking,long-suffix,old-octal-literal,old-ne-operator,backtick,old-raise-syntax,print-statement,unpacking-in-except,import-star-module-level,parameter-unpacking,long-suffix,old-octal-literal,old-ne-operator,backtick,old-raise-syntax,print-statement,not-in-loop,function-redefined,continue-in-finally,abstract-class-instantiated,sstar-needs-assignment-target,duplicate-argument-name,too-many-star-expressions,nonlocal-and-global,return-outside-function,return-arg-in-generator,invalid-star-assignment-target,bad-reversed-sequence,nonexistent-operator,yield-outside-function,init-is-generator,nonlocal-without-binding,invalid-unary-operand-type,unsupported-binary-operation,no-member,not-callable,redundant-keyword-arg,assignment-from-no-return,assignment-from-none,not-context-manager,repeated-keyword,missing-kwoa,no-value-for-parameter,invalid-sequence-index,invalid-slice-index,too-many-function-args,unexpected-keyword-arg,unsupported-membership-test,unsubscriptable-object,unpacking-non-sequence,invalid-all-object,no-name-in-module,unbalanced-tuple-unpacking,undefined-variable,undefined-all-variable,used-before-assignment,format-needs-mapping,truncated-format-string,missing-format-string-key,mixed-format-string,too-few-format-args,bad-str-strip-call,too-many-format-args,bad-format-character,access-member-before-definition,method-hidden,assigning-non-slot,duplicate-bases,inconsistent-mro,inherit-non-class,invalid-slots,invalid-slots-object,no-method-argument,no-self-argument,unexpected-special-method-signature,non-iterator-returned,invalid-length-returned + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". +files-output=no + +# Tells whether to display a full report or only the messages +reports=yes + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +msg-template=[{msg_id}] {path}:{line:3d}:{column}: {msg} + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma,dict-separator + +# Maximum number of lines in a module +max-module-lines=2000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +#notes=FIXME,XXX,TODO + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=_$|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + + +[BASIC] + +# List of builtins function names that should not be used, separated by a comma +bad-functions= + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# Regular expression matching correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for function names +function-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for variable names +variable-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct constant names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Naming hint for constant names +const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression matching correct attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for attribute names +attr-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for argument names +argument-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Naming hint for class attribute names +class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Naming hint for inline iteration names +inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Naming hint for class names +class-name-hint=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression matching correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Naming hint for module names +module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression matching correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for method names +method-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=50 + + +[ELIF] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# List of classes names for which member attributes should not be checked +# (useful for classes with attributes dynamically set). This supports can work +# with qualified names. +ignored-classes= + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=8 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=20 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branches=12 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of boolean expressions in a if statement +max-bool-expr=5 + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,TERMIOS,Bastion,rexec + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=Exception diff --git a/analyzer/tools/merge_clang_extdef_mappings/.pypirc b/analyzer/tools/merge_clang_extdef_mappings/.pypirc new file mode 100644 index 0000000000..52d57ec25f --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/.pypirc @@ -0,0 +1,10 @@ +[distutils] +index-servers = + pypi + testpypi + +[pypi] +repository: https://upload.pypi.org/legacy/ + +[testpypi] +repository: https://test.pypi.org/legacy/ diff --git a/analyzer/tools/merge_clang_extdef_mappings/LICENSE.txt b/analyzer/tools/merge_clang_extdef_mappings/LICENSE.txt new file mode 100644 index 0000000000..bd8b243dfa --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/LICENSE.txt @@ -0,0 +1,218 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. diff --git a/analyzer/tools/merge_clang_extdef_mappings/MANIFEST.in b/analyzer/tools/merge_clang_extdef_mappings/MANIFEST.in new file mode 100644 index 0000000000..c1ebcaeab4 --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/MANIFEST.in @@ -0,0 +1,2 @@ +include README.md +include *.txt diff --git a/analyzer/tools/merge_clang_extdef_mappings/Makefile b/analyzer/tools/merge_clang_extdef_mappings/Makefile new file mode 100644 index 0000000000..b6633c2a8f --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/Makefile @@ -0,0 +1,64 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + +CURRENT_DIR = $(shell pwd) +ROOT = $(CURRENT_DIR) + +BUILD_DIR = $(CURRENT_DIR)/build +MERGE_CLANG_EXTDEF_MAPS_DIR = $(BUILD_DIR)/merge_clang_extdef_mappings + +ACTIVATE_DEV_VENV ?= . venv_dev/bin/activate +ACTIVATE_RUNTIME_VENV ?= . venv/bin/activate + +VENV_DEV_REQ_FILE ?= requirements_py/dev/requirements.txt + +default: all + +all: package + +venv: + # Create a virtual environment which can be used to run the build package. + virtualenv -p python3 venv && $(ACTIVATE_RUNTIME_VENV) + +venv_dev: + # Create a virtual environment for development. + virtualenv -p python3 venv_dev && \ + $(ACTIVATE_DEV_VENV) && pip install -r $(VENV_DEV_REQ_FILE) + +clean_venv_dev: + rm -rf venv_dev + +include tests/Makefile + +package: + # Install package in 'development mode'. + python setup.py develop + +build: + python setup.py build --build-purelib $(MERGE_CLANG_EXTDEF_MAPS_DIR) + +dist: + # Create a source distribution. + python setup.py sdist + +upload_test: dist + # Upload package to the TestPyPI repository. + $(eval PKG_NAME := $(shell python setup.py --name)) + $(eval PKG_VERSION := $(shell python setup.py --version)) + twine upload -r testpypi dist/$(PKG_NAME)-$(PKG_VERSION).tar.gz + +upload: dist + # Upload package to the PyPI repository. + $(eval PKG_NAME := $(shell python setup.py --name)) + $(eval PKG_VERSION := $(shell python setup.py --version)) + twine upload -r pypi dist/$(PKG_NAME)-$(PKG_VERSION).tar.gz + +clean: + rm -rf $(BUILD_DIR) + rm -rf dist + rm -rf merge_clang_extdef_mappings.egg-info diff --git a/analyzer/tools/merge_clang_extdef_mappings/README.md b/analyzer/tools/merge_clang_extdef_mappings/README.md new file mode 100644 index 0000000000..e7ad45b10e --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/README.md @@ -0,0 +1,47 @@ +# merge-clang-extdef-mappings +As the collect phase runs parallel on multiple threads, all compilation units +are separately mapped into a temporary file in a temporary folder. These +function maps contain the mangled names of functions and the source (AST +generated from the source) which had them. These files should be merged at +the end into a global map file. + +`merge-clang-extdef-mappings` is a python tool which can be used to merge +individual function maps created by the +[clang-extdef-mapping](https://github.com/llvm/llvm-project/blob/master/clang/tools/clang-extdef-mapping/ClangExtDefMapGen.cpp) +tool into a global one. + + +## Install guide +```sh +# Create a Python virtualenv and set it as your environment. +make venv +source $PWD/venv/bin/activate + +# Build and install plist-to-html package. +make package +``` + +## Usage +```sh +usage: merge-clang-extdef-mappings [-h] -i input -o output + +Merge individual clang extdef mapping files into one mapping file. + +optional arguments: + -h, --help show this help message and exit + -i input, --input input + Folder which contains multiple output of the 'clang- + extdef-mapping' tool. + -o output, --output output + Output file where the merged function maps will be + stored into. + +Example: + merge-clang-extdef-mappings -i /path/to/fn_map_folder -o + /path/to/externalDefMap.txt +``` + +## License + +The project is licensed under University of Illinois/NCSA Open Source License. +See LICENSE.TXT for details. \ No newline at end of file diff --git a/analyzer/tools/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings/__init__.py b/analyzer/tools/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings/__init__.py new file mode 100644 index 0000000000..4259749345 --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings/__init__.py @@ -0,0 +1,7 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- diff --git a/analyzer/tools/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings/cli.py b/analyzer/tools/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings/cli.py new file mode 100644 index 0000000000..cf3189d21a --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings/cli.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + + +import argparse +import logging +import os +import sys + +# If we run this script in an environment where +# 'codechecker_merge_clang_extdef_mappings' module is not available we should +# add the grandparent directory of this file to the system path. +# TODO: This section will not be needed when CodeChecker will be delivered as +# a python package and will be installed in a virtual environment with all the +# dependencies. +if __name__ == '__main__': + current_dir = os.path.dirname(os.path.realpath(__file__)) + os.sys.path.append(os.path.dirname(current_dir)) + +from codechecker_merge_clang_extdef_mappings import \ + merge_clang_extdef_mappings # noqa + + +LOG = logging.getLogger('MergeClangExtdefMappings') + +msg_formatter = logging.Formatter('[%(levelname)s] - %(message)s') +log_handler = logging.StreamHandler(sys.stdout) +log_handler.setFormatter(msg_formatter) +LOG.setLevel(logging.INFO) +LOG.addHandler(log_handler) + + +def __add_arguments_to_parser(parser): + """ Add arguments to the the given parser. """ + parser.add_argument('-i', '--input', + type=str, + metavar='input', + required=True, + help="Folder which contains multiple output of the " + "'clang-extdef-mapping' tool.") + + parser.add_argument('-o', '--output', + type=str, + metavar='output', + required=True, + help="Output file where the merged function maps will " + "be stored into.") + + +def main(): + """ Merge CTU funcs maps main command line. """ + parser = argparse.ArgumentParser( + prog="merge-clang-extdef-mappings", + formatter_class=argparse.RawDescriptionHelpFormatter, + description="Merge individual clang extdef mapping files into one " + "mapping file.", + epilog="""Example: + merge-clang-extdef-mappings -i /path/to/fn_map_folder + /path/to/externalDefMap.txt""") + + __add_arguments_to_parser(parser) + + args = parser.parse_args() + + merge_clang_extdef_mappings.merge(args.input, args.output) + + +if __name__ == "__main__": + main() diff --git a/analyzer/tools/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings/merge_clang_extdef_mappings.py b/analyzer/tools/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings/merge_clang_extdef_mappings.py new file mode 100755 index 0000000000..482f050596 --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/codechecker_merge_clang_extdef_mappings/merge_clang_extdef_mappings.py @@ -0,0 +1,66 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + +import glob +import os + + +def _generate_func_map_lines(func_map_dir): + """ Iterate over all lines of input files in random order. """ + files = glob.glob(os.path.join(func_map_dir, '*')) + for func_map_file in files: + with open(func_map_file, 'r', + encoding='utf-8', errors="ignore") as func_map: + for line in func_map: + yield line + + +def _create_global_ctu_function_map(func_map_lines): + """ Takes iterator of individual function maps and creates a global map. + + It will keeping only unique names. We leave conflicting names out of CTU. + A function map contains the id of a function (mangled name) and the + originating source (the corresponding AST file) name. + """ + mangled_to_asts = {} + + # We collect all occurences of a function name into a set. + for line in func_map_lines: + mangled_name, ast_file = line.strip().split(' ', 1) + if mangled_name not in mangled_to_asts: + mangled_to_asts[mangled_name] = {ast_file} + else: + mangled_to_asts[mangled_name].add(ast_file) + + mangled_ast_pairs = [] + + for mangled_name, ast_files in mangled_to_asts.items(): + if len(ast_files) == 1: + mangled_ast_pairs.append((mangled_name, ast_files.pop())) + + return mangled_ast_pairs + + +def merge(func_map_dir, output_file): + """ Merge individual function maps into a global one. + + As the collect phase runs parallel on multiple threads, all compilation + units are separately mapped into a temporary file in ctu_temp_fnmap_folder. + These function maps contain the mangled names of functions and the source + (AST generated from the source) which had them. + These files should be merged at the end into a global map file: + ctu_func_map_file. + """ + func_map_lines = _generate_func_map_lines(func_map_dir) + mangled_ast_pairs = _create_global_ctu_function_map(func_map_lines) + + # Write (mangled function name, ast file) pairs into final file. + with open(output_file, 'w', + encoding='utf-8', errors='ignore') as out_file: + for mangled_name, ast_file in mangled_ast_pairs: + out_file.write('%s %s\n' % (mangled_name, ast_file)) diff --git a/analyzer/tools/merge_clang_extdef_mappings/requirements_py/dev/requirements.txt b/analyzer/tools/merge_clang_extdef_mappings/requirements_py/dev/requirements.txt new file mode 100644 index 0000000000..bf35683890 --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/requirements_py/dev/requirements.txt @@ -0,0 +1,3 @@ +nose==1.3.7 +pycodestyle==2.4.0 +pylint==1.9.4 diff --git a/analyzer/tools/merge_clang_extdef_mappings/setup.py b/analyzer/tools/merge_clang_extdef_mappings/setup.py new file mode 100644 index 0000000000..a61ab76b3e --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/setup.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="merge-clang-extdef-mappings", + version="0.1.0", + author='CodeChecker Team (Ericsson)', + description="Merge individual function maps into a global one.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/Ericsson/CodeChecker", + keywords=['clang', 'ctu', 'merge', 'func-map', 'static-analysis', + 'analysis'], + license='LICENSE.txt', + packages=setuptools.find_packages(), + include_package_data=True, + classifiers=[ + "Environment :: Console", + "Intended Audience :: Developers", + "Operating System :: POSIX", + "Programming Language :: Python :: 3" + ], + entry_points={ + 'console_scripts': [ + 'merge-clang-extdef-mappings = codechecker_merge_clang_extdef_mappings.cli:main' + ] + }, +) diff --git a/analyzer/tools/merge_clang_extdef_mappings/tests/Makefile b/analyzer/tools/merge_clang_extdef_mappings/tests/Makefile new file mode 100644 index 0000000000..02e9452bde --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/tests/Makefile @@ -0,0 +1,36 @@ +# Environment variables to run tests. + +REPO_ROOT ?= REPO_ROOT=$(ROOT) + +# Nose test runner configuration options. +NOSECFG = --config .noserc + +test: pycodestyle pylint test_unit + +test_in_env: pycodestyle_in_env pylint_in_env test_unit_in_env + +PYCODESTYLE_TEST_CMD = pycodestyle codechecker_merge_clang_extdef_mappings tests + +pycodestyle: + $(PYCODESTYLE_TEST_CMD) + +pycodestyle_in_env: venv_dev + $(ACTIVATE_DEV_VENV) && $(PYCODESTYLE_TEST_CMD) + +PYLINT_TEST_CMD = PYLINTRC=$(ROOT)/.pylintrc \ + pylint ./codechecker_merge_clang_extdef_mappings ./tests/** + +pylint: + $(PYLINT_TEST_CMD) + +pylint_in_env: venv + $(ACTIVATE_DEV_VENV) && $(PYLINT_TEST_CMD) + +UNIT_TEST_CMD = $(REPO_ROOT) \ + nosetests $(NOSECFG) tests/unit + +test_unit: + $(UNIT_TEST_CMD) + +test_unit_in_env: venv_dev + $(ACTIVATE_DEV_VENV) && $(UNIT_TEST_CMD) diff --git a/analyzer/tools/merge_clang_extdef_mappings/tests/unit/__init__.py b/analyzer/tools/merge_clang_extdef_mappings/tests/unit/__init__.py new file mode 100644 index 0000000000..4259749345 --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/tests/unit/__init__.py @@ -0,0 +1,7 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- diff --git a/analyzer/tools/merge_clang_extdef_mappings/tests/unit/merge_clang_extdef_mappings/__init__.py b/analyzer/tools/merge_clang_extdef_mappings/tests/unit/merge_clang_extdef_mappings/__init__.py new file mode 100644 index 0000000000..b3a2aa1f9d --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/tests/unit/merge_clang_extdef_mappings/__init__.py @@ -0,0 +1,54 @@ +# coding=utf-8 +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" Setup for the test package analyze. """ + +import os +import shutil +import tempfile + + +def get_workspace(test_id='test'): + """ Return a temporary workspace for the tests. """ + workspace_root = os.environ.get("MERGE_CTU_FUNC_MAPS_TEST_WORKSPACE_ROOT") + if not workspace_root: + # if no external workspace is set create under the build dir + workspace_root = os.path.join(os.environ['REPO_ROOT'], 'build', + 'workspace') + + if not os.path.exists(workspace_root): + os.makedirs(workspace_root) + + if test_id: + return tempfile.mkdtemp(prefix=test_id + "-", dir=workspace_root) + else: + return workspace_root + + +# Test workspace should be initialized in this module. +TEST_WORKSPACE = None + + +def setup_package(): + """ Setup the environment for the tests. """ + + global TEST_WORKSPACE + TEST_WORKSPACE = get_workspace('merge_clang_extdef_mappings') + + os.environ['TEST_WORKSPACE'] = TEST_WORKSPACE + + +def teardown_package(): + """ Delete the workspace associated with this test. """ + + # TODO: If environment variable is set keep the workspace + # and print out the path. + global TEST_WORKSPACE + + print("Removing: " + TEST_WORKSPACE) + shutil.rmtree(TEST_WORKSPACE) diff --git a/analyzer/tools/merge_clang_extdef_mappings/tests/unit/merge_clang_extdef_mappings/merge_clang_extdef_mappings.py b/analyzer/tools/merge_clang_extdef_mappings/tests/unit/merge_clang_extdef_mappings/merge_clang_extdef_mappings.py new file mode 100644 index 0000000000..dd0fc9ddd2 --- /dev/null +++ b/analyzer/tools/merge_clang_extdef_mappings/tests/unit/merge_clang_extdef_mappings/merge_clang_extdef_mappings.py @@ -0,0 +1,62 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + +import os +import unittest + +from codechecker_merge_clang_extdef_mappings import merge_clang_extdef_mappings + + +class MergeClangExtdefMappingsTest(unittest.TestCase): + @classmethod + def setUpClass(self): + """ Initialize test files. """ + self.test_workspace = os.environ['TEST_WORKSPACE'] + + self.extdef_maps_dir = os.path.join(self.test_workspace, + "extdef_maps") + + if not os.path.exists(self.extdef_maps_dir): + os.makedirs(self.extdef_maps_dir) + + self.extdef_map_1_lines = ["c:@F@f# path/to/file.cpp.ast", + "c:@F@g# path/to/file.cpp.ast", + "c:@F@both# path/to/file.cpp.ast"] + + extdef_map_file_1 = os.path.join(self.extdef_maps_dir, + 'externalDefMap1.txt') + with open(extdef_map_file_1, 'w', + encoding='utf-8', errors='ignore') as map_f: + map_f.write('\n'.join(self.extdef_map_1_lines)) + + self.extdef_map_2_lines = ["c:@F@main# path/to/file2.cpp.ast", + "c:@F@h# path/to/file2.cpp.ast", + "c:@F@both# path/to/file2.cpp.ast"] + + extdef_map_file_2 = os.path.join(self.extdef_maps_dir, + 'externalDefMap2.txt') + with open(extdef_map_file_2, 'w', + encoding='utf-8', errors='ignore') as map_f: + map_f.write('\n'.join(self.extdef_map_2_lines)) + + def test_merge_clang_extdef_mappings(self): + """ Test merging multiple func map files. """ + + output_file = os.path.join(self.test_workspace, 'externalDefMap.txt') + merge_clang_extdef_mappings.merge(self.extdef_maps_dir, output_file) + + with open(output_file, 'r', + encoding='utf-8', errors='ignore') as o_file: + lines = o_file.read().split('\n') + + expected_lines = ["c:@F@f# path/to/file.cpp.ast", + "c:@F@g# path/to/file.cpp.ast", + "c:@F@main# path/to/file2.cpp.ast", + "c:@F@h# path/to/file2.cpp.ast"] + for expected_line in expected_lines: + self.assertTrue(expected_line in lines) diff --git a/analyzer/tools/statistics_collector/.gitignore b/analyzer/tools/statistics_collector/.gitignore new file mode 100644 index 0000000000..1fe91996c1 --- /dev/null +++ b/analyzer/tools/statistics_collector/.gitignore @@ -0,0 +1,3 @@ +build/ +dist/ +statistics_collector.egg-info diff --git a/analyzer/tools/statistics_collector/.noserc b/analyzer/tools/statistics_collector/.noserc new file mode 100644 index 0000000000..512f4e1a08 --- /dev/null +++ b/analyzer/tools/statistics_collector/.noserc @@ -0,0 +1,13 @@ +[nosetests] + +# increase verbosity level +verbosity=3 + +# more detailed error messages on failed asserts +detailed-errors=1 + +# stop running tests on first error +stop=1 + +# do not capture stdout +#nocapture=1 diff --git a/analyzer/tools/statistics_collector/.pylintrc b/analyzer/tools/statistics_collector/.pylintrc new file mode 100644 index 0000000000..4822958e6c --- /dev/null +++ b/analyzer/tools/statistics_collector/.pylintrc @@ -0,0 +1,377 @@ +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. +jobs=1 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist= + +# Allow optimization of some AST trees. This will activate a peephole AST +# optimizer, which will apply various small optimizations. For instance, it can +# be used to obtain the result of joining multiple strings with the addition +# operator. Joining a lot of strings can lead to a maximum recursion error in +# Pylint and this flag can prevent that. It has one side effect, the resulting +# AST will be different than the one from reality. +optimize-ast=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable=all + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time. See also the "--disable" option for examples. +enable=logging-format-interpolation,old-style-class,unused-import,unused-variable,len-as-condition,bad-indentation,unpacking-in-except,import-star-module-level,parameter-unpacking,long-suffix,old-octal-literal,old-ne-operator,backtick,old-raise-syntax,print-statement,unpacking-in-except,import-star-module-level,parameter-unpacking,long-suffix,old-octal-literal,old-ne-operator,backtick,old-raise-syntax,print-statement,not-in-loop,function-redefined,continue-in-finally,abstract-class-instantiated,sstar-needs-assignment-target,duplicate-argument-name,too-many-star-expressions,nonlocal-and-global,return-outside-function,return-arg-in-generator,invalid-star-assignment-target,bad-reversed-sequence,nonexistent-operator,yield-outside-function,init-is-generator,nonlocal-without-binding,invalid-unary-operand-type,unsupported-binary-operation,no-member,not-callable,redundant-keyword-arg,assignment-from-no-return,assignment-from-none,not-context-manager,repeated-keyword,missing-kwoa,no-value-for-parameter,invalid-sequence-index,invalid-slice-index,too-many-function-args,unexpected-keyword-arg,unsupported-membership-test,unsubscriptable-object,unpacking-non-sequence,invalid-all-object,no-name-in-module,unbalanced-tuple-unpacking,undefined-variable,undefined-all-variable,used-before-assignment,format-needs-mapping,truncated-format-string,missing-format-string-key,mixed-format-string,too-few-format-args,bad-str-strip-call,too-many-format-args,bad-format-character,access-member-before-definition,method-hidden,assigning-non-slot,duplicate-bases,inconsistent-mro,inherit-non-class,invalid-slots,invalid-slots-object,no-method-argument,no-self-argument,unexpected-special-method-signature,non-iterator-returned,invalid-length-returned + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". +files-output=no + +# Tells whether to display a full report or only the messages +reports=yes + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +msg-template=[{msg_id}] {path}:{line:3d}:{column}: {msg} + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma,dict-separator + +# Maximum number of lines in a module +max-module-lines=2000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +#notes=FIXME,XXX,TODO + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=_$|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + + +[BASIC] + +# List of builtins function names that should not be used, separated by a comma +bad-functions= + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# Regular expression matching correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for function names +function-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for variable names +variable-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct constant names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Naming hint for constant names +const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression matching correct attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for attribute names +attr-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for argument names +argument-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Naming hint for class attribute names +class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Naming hint for inline iteration names +inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Naming hint for class names +class-name-hint=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression matching correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Naming hint for module names +module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression matching correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for method names +method-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=50 + + +[ELIF] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# List of classes names for which member attributes should not be checked +# (useful for classes with attributes dynamically set). This supports can work +# with qualified names. +ignored-classes= + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=8 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=20 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branches=12 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of boolean expressions in a if statement +max-bool-expr=5 + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,TERMIOS,Bastion,rexec + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=Exception diff --git a/analyzer/tools/statistics_collector/.pypirc b/analyzer/tools/statistics_collector/.pypirc new file mode 100644 index 0000000000..52d57ec25f --- /dev/null +++ b/analyzer/tools/statistics_collector/.pypirc @@ -0,0 +1,10 @@ +[distutils] +index-servers = + pypi + testpypi + +[pypi] +repository: https://upload.pypi.org/legacy/ + +[testpypi] +repository: https://test.pypi.org/legacy/ diff --git a/analyzer/tools/statistics_collector/LICENSE.txt b/analyzer/tools/statistics_collector/LICENSE.txt new file mode 100644 index 0000000000..bd8b243dfa --- /dev/null +++ b/analyzer/tools/statistics_collector/LICENSE.txt @@ -0,0 +1,218 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. diff --git a/analyzer/tools/statistics_collector/MANIFEST.in b/analyzer/tools/statistics_collector/MANIFEST.in new file mode 100644 index 0000000000..c1ebcaeab4 --- /dev/null +++ b/analyzer/tools/statistics_collector/MANIFEST.in @@ -0,0 +1,2 @@ +include README.md +include *.txt diff --git a/analyzer/tools/statistics_collector/Makefile b/analyzer/tools/statistics_collector/Makefile new file mode 100644 index 0000000000..fd697466aa --- /dev/null +++ b/analyzer/tools/statistics_collector/Makefile @@ -0,0 +1,64 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + +CURRENT_DIR = $(shell pwd) +ROOT = $(CURRENT_DIR) + +BUILD_DIR = $(CURRENT_DIR)/build +STATISTICS_COLLECTOR_DIR = $(BUILD_DIR)/statistics_collector + +ACTIVATE_DEV_VENV ?= . venv_dev/bin/activate +ACTIVATE_RUNTIME_VENV ?= . venv/bin/activate + +VENV_DEV_REQ_FILE ?= requirements_py/dev/requirements.txt + +default: all + +all: package + +venv: + # Create a virtual environment which can be used to run the build package. + virtualenv -p python3 venv && $(ACTIVATE_RUNTIME_VENV) + +venv_dev: + # Create a virtual environment for development. + virtualenv -p python3 venv_dev && \ + $(ACTIVATE_DEV_VENV) && pip install -r $(VENV_DEV_REQ_FILE) + +clean_venv_dev: + rm -rf venv_dev + +include tests/Makefile + +package: + # Install package in 'development mode'. + python setup.py develop + +build: + python setup.py build --build-purelib $(STATISTICS_COLLECTOR_DIR) + +dist: + # Create a source distribution. + python setup.py sdist + +upload_test: dist + # Upload package to the TestPyPI repository. + $(eval PKG_NAME := $(shell python setup.py --name)) + $(eval PKG_VERSION := $(shell python setup.py --version)) + twine upload -r testpypi dist/$(PKG_NAME)-$(PKG_VERSION).tar.gz + +upload: dist + # Upload package to the PyPI repository. + $(eval PKG_NAME := $(shell python setup.py --name)) + $(eval PKG_VERSION := $(shell python setup.py --version)) + twine upload -r pypi dist/$(PKG_NAME)-$(PKG_VERSION).tar.gz + +clean: + rm -rf $(BUILD_DIR) + rm -rf dist + rm -rf statistics_collector.egg-info diff --git a/analyzer/tools/statistics_collector/README.md b/analyzer/tools/statistics_collector/README.md new file mode 100644 index 0000000000..03518f906b --- /dev/null +++ b/analyzer/tools/statistics_collector/README.md @@ -0,0 +1,57 @@ +# statistics-collector +`statistics-collector` is a Python tool which helps you to process statistical +results of the Clang analyzer. It contains a script called `post-process-stats` +which will read the Clang analyzer outputs where the statistics emitter +checkers were enabled and collect the statistics into a special yaml file which +can be parsed by statistics checkers. + +## Install guide +```sh +# Create a Python virtualenv and set it as your environment. +make venv +source $PWD/venv/bin/activate + +# Build and install plist-to-html package. +make package +``` + +## Usage +```sh +usage: post-process-stats [-h] -i folder + [--stats-min-sample-count STATS_MIN_SAMPLE_COUNT] + [--stats-relevance-threshold STATS_RELEVANCE_THRESHOLD] + [-v] + output_dir + +Collect statistics from the clang analyzer output. + +positional arguments: + output_dir Output directory where the statistics yaml files will + be stored into. + +optional arguments: + -h, --help show this help message and exit + -i folder, --input folder + Folder which contains statistical results of clang to + collect statistics. + --stats-min-sample-count STATS_MIN_SAMPLE_COUNT + Minimum number of samples (function call occurrences) + to be collected for a statistics to be relevant ''. + --stats-relevance-threshold STATS_RELEVANCE_THRESHOLD + The minimum ratio of calls of function f that must + have a certain property property to consider it true + for that function (calculated as calls with a + property/all calls). CodeChecker will warn for calls + of f do not have that property. + ''. + -v, --verbose Set verbosity level. + +Example: + post-process-stats -i /path/to/pre_processed_stats /path/to/stats +``` + +## License + +The project is licensed under University of Illinois/NCSA Open Source License. +See LICENSE.TXT for details. \ No newline at end of file diff --git a/analyzer/tools/statistics_collector/codechecker_statistics_collector/__init__.py b/analyzer/tools/statistics_collector/codechecker_statistics_collector/__init__.py new file mode 100644 index 0000000000..4259749345 --- /dev/null +++ b/analyzer/tools/statistics_collector/codechecker_statistics_collector/__init__.py @@ -0,0 +1,7 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- diff --git a/analyzer/tools/statistics_collector/codechecker_statistics_collector/cli.py b/analyzer/tools/statistics_collector/codechecker_statistics_collector/cli.py new file mode 100644 index 0000000000..c068a83d4c --- /dev/null +++ b/analyzer/tools/statistics_collector/codechecker_statistics_collector/cli.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + + +import argparse +import logging +import os + +# If we run this script in an environment where +# 'codechecker_statistics_collector' module is not available we should add the +# grandparent directory of this file to the system path. +# TODO: This section will not be needed when CodeChecker will be delivered as +# a python package and will be installed in a virtual environment with all the +# dependencies. +if __name__ == '__main__': + current_dir = os.path.dirname(os.path.realpath(__file__)) + os.sys.path.append(os.path.dirname(current_dir)) + +from codechecker_statistics_collector import post_process_stats # noqa + + +LOG = logging.getLogger('StatisticsCollector') + +msg_formatter = logging.Formatter('[%(levelname)s] - %(message)s') +log_handler = logging.StreamHandler() +log_handler.setFormatter(msg_formatter) +LOG.setLevel(logging.INFO) +LOG.addHandler(log_handler) + + +def __add_arguments_to_parser(parser): + """ Add arguments to the given parser. """ + parser.add_argument('-i', '--input', + type=str, + metavar='folder', + required=True, + help="Folder which contains statistical results of " + "clang to collect statistics.") + + parser.add_argument('output_dir', + type=str, + help="Output directory where the statistics yaml " + "files will be stored into.") + + parser.add_argument('--stats-min-sample-count', + action='store', + default="10", + type=int, + dest='stats_min_sample_count', + help="Minimum number of samples (function call " + "occurrences) to be collected for a statistics " + "to be relevant ''.") + + parser.add_argument('--stats-relevance-threshold', + action='store', + default="0.85", + type=float, + dest='stats_relevance_threshold', + help="The minimum ratio of calls of function f that " + "must have a certain property property to " + "consider it true for that function (calculated " + "as calls with a property/all calls). " + "CodeChecker will warn for calls of f do not " + "have that property. ''.") + + parser.add_argument('-v', '--verbose', + action='store_true', + dest='verbose', + help="Set verbosity level.") + + +def main(): + """ Statistics collector main command line. """ + parser = argparse.ArgumentParser( + prog="post-process-stats", + formatter_class=argparse.RawDescriptionHelpFormatter, + description="Collect statistics from the clang analyzer output.", + epilog="""Example: + post-process-stats -i /path/to/pre_processed_stats /path/to/stats""") + + __add_arguments_to_parser(parser) + + args = parser.parse_args() + + if 'verbose' in args and args.verbose: + LOG.setLevel(logging.DEBUG) + + LOG.info("Starting to post-process statistical results...") + + post_process_stats.process(args.input, args.output_dir, + args.stats_min_sample_count, + args.stats_relevance_threshold) + + LOG.info("Done.") + + +if __name__ == "__main__": + main() diff --git a/analyzer/tools/statistics_collector/codechecker_statistics_collector/collectors/__init__.py b/analyzer/tools/statistics_collector/codechecker_statistics_collector/collectors/__init__.py new file mode 100644 index 0000000000..4259749345 --- /dev/null +++ b/analyzer/tools/statistics_collector/codechecker_statistics_collector/collectors/__init__.py @@ -0,0 +1,7 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- diff --git a/analyzer/tools/statistics_collector/codechecker_statistics_collector/collectors/return_value.py b/analyzer/tools/statistics_collector/codechecker_statistics_collector/collectors/return_value.py new file mode 100644 index 0000000000..60d2564e39 --- /dev/null +++ b/analyzer/tools/statistics_collector/codechecker_statistics_collector/collectors/return_value.py @@ -0,0 +1,101 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + +""" Return value statistics collector. """ + +from io import StringIO +from collections import defaultdict +import os +import re + + +class ReturnValueCollector(object): + """ Collect return value statistics. + + This script lists functions of which the return value is mostly checked. + """ + + # Checker name used for pre analysis. + checker_collect = 'statisticscollector.ReturnValueCheck' + + # Checker name which runs the analysis. + checker_analyze = 'statisticsbased.UncheckedReturnValue' + + def __init__(self, stats_min_sample_count, stats_relevance_threshold): + self.stats_min_sample_count = stats_min_sample_count + self.stats_relevance_threshold = stats_relevance_threshold + + # Matching these lines: + # /.../x.c:551:12: + # warning: Return Value Check:/.../x.c:551:12,parsedate,0 + + self.ret_val_regexp = \ + re.compile(r'.*warning: Return Value Check:' + '.*:[0-9]*:[0-9]*.*,(.*),([0,1])') + + self.stats = {'total': defaultdict(int), + 'nof_unchecked': defaultdict(int)} + + @staticmethod + def stats_file(path): + return os.path.join(path, 'UncheckedReturn.yaml') + + @staticmethod + def checker_analyze_cfg(path): + """ Return the checker config parameter for the analyzer checker. """ + return ['-Xclang', '-analyzer-config', + '-Xclang', + 'alpha.ericsson.statisticsbased:APIMetadataPath=' + path] + + def total(self): + return self.stats.get('total') + + def nof_unchecked(self): + return self.stats.get('nof_unchecked') + + def unchecked(self): + return self.stats.get('unchecked') + + def process_line(self, line): + """ Match regex on the line """ + m = self.ret_val_regexp.match(line) + if m: + func = m.group(1) + checked = m.group(2) + self.stats['total'][func] += 1 + self.stats['nof_unchecked'][func] += int(checked) + + def filter_stats(self): + """ Filter the collected statistics based on the threshold. + + Return a lisf of function names where the return value + was unchecked above the threshold. + """ + unchecked_functions = [] + total = self.stats.get('total') + for key in sorted(total): + checked_ratio = 1 - \ + self.stats['nof_unchecked'][key]/self.stats['total'][key] + if (self.stats_relevance_threshold < checked_ratio < 1 and + self.stats['total'][key] >= self.stats_min_sample_count): + unchecked_functions.append(key) + return unchecked_functions + + def get_yaml(self): + """ Get statistics in yaml format. + + FIXME proper yaml generation. + """ + stats_yaml = StringIO() + + stats_yaml.write("#\n") + stats_yaml.write("# UncheckedReturn metadata format 1.0\n") + for function_name in self.filter_stats(): + stats_yaml.write("- " + function_name + '\n') + + return stats_yaml.getvalue() diff --git a/analyzer/tools/statistics_collector/codechecker_statistics_collector/collectors/special_return_value.py b/analyzer/tools/statistics_collector/codechecker_statistics_collector/collectors/special_return_value.py new file mode 100644 index 0000000000..22216e96ac --- /dev/null +++ b/analyzer/tools/statistics_collector/codechecker_statistics_collector/collectors/special_return_value.py @@ -0,0 +1,117 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + +""" Special return value statistics collector. """ + +from io import StringIO + +from collections import defaultdict +import os +import re + + +class SpecialReturnValueCollector(object): + """ Collect special return value statistics. + + This script lists functions of which the return value is checked for + negative (integers) or null (pointers). + """ + + # Checker name used for pre analysis. + checker_collect = 'statisticscollector.SpecialReturnValue' + + # Checker name which runs the analysis. + checker_analyze = 'statisticsbased.SpecialReturnValue' + + def __init__(self, stats_min_sample_count, stats_relevance_threshold): + self.stats_min_sample_count = stats_min_sample_count + self.stats_relevance_threshold = stats_relevance_threshold + + # Matching these lines: + # /.../x.c:551:12: warning: + # Special Return Value:/.../x.c:551:12,parsedate,0,0 + + ptrn = \ + r'.*warning: Special Return Value:'\ + '.*:[0-9]*:[0-9]*.*,(.*),([0,1]),([0,1])' + self.special_ret_val_regexp = re.compile(ptrn) + + self.stats = {'total': defaultdict(int), + 'nof_negative': defaultdict(int), + 'nof_null': defaultdict(int)} + + @staticmethod + def stats_file(path): + return os.path.join(path, 'SpecialReturn.yaml') + + @staticmethod + def checker_analyze_cfg(path): + """ Return the checker config parameter for the analyzer checker. """ + return ['-Xclang', '-analyzer-config', + '-Xclang', + 'alpha.ericsson.statisticsbased:APIMetadataPath=' + path] + + def total(self): + return self.stats.get('total') + + def nof_null(self): + return self.stats.get('nof_null') + + def nof_negative(self): + return self.stats.get('nof_negative') + + def process_line(self, line): + """ Match regex on the line. """ + m = self.special_ret_val_regexp.match(line) + if m: + func = m.group(1) + ret_negative = m.group(2) + ret_null = m.group(3) + + self.stats['total'][func] += 1 + self.stats['nof_negative'][func] += int(ret_negative) + self.stats['nof_null'][func] += int(ret_null) + + def filter_stats(self): + """ Filter the collected statistics based on the threshold. """ + neg = [] + null = [] + stats = self.stats + total = stats.get('total') + + for key in sorted(stats.get('total').keys()): + negative_ratio = stats['nof_negative'][key]/stats['total'][key] + if (self.stats_relevance_threshold < negative_ratio < 1 and + total[key] >= self.stats_min_sample_count): + neg.append(key) + + null_ratio = stats['nof_null'][key]/stats['total'][key] + if (self.stats_relevance_threshold < null_ratio < 1 and + total[key] >= self.stats_min_sample_count): + null.append(key) + return neg, null + + def get_yaml(self): + """ Get statistics in yaml format. + + FIXME proper yaml generation. + """ + stats_yaml = StringIO() + + stats_yaml.write("#\n") + stats_yaml.write("# SpecialReturn metadata format 1.0\n") + neg, null = self.filter_stats() + + for n in neg: + stats_yaml.write( + "{name: " + n + ", relation: LT, value: 0}\n") + for n in null: + stats_yaml.write( + "{name: " + n + ", relation: EQ, value: 0}\n") + + return stats_yaml.getvalue() diff --git a/analyzer/tools/statistics_collector/codechecker_statistics_collector/post_process_stats.py b/analyzer/tools/statistics_collector/codechecker_statistics_collector/post_process_stats.py new file mode 100644 index 0000000000..4b85c11deb --- /dev/null +++ b/analyzer/tools/statistics_collector/codechecker_statistics_collector/post_process_stats.py @@ -0,0 +1,87 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + +""" +Post-process statistical results. + +Clang output files will be parsed for outputs by the statistics collector +checkers and converted into a special yml file which can be parsed back by the +statistics checkers. +""" + +import logging +import os + +from .collectors.return_value import ReturnValueCollector +from .collectors.special_return_value import SpecialReturnValueCollector + +LOG = logging.getLogger('StatisticsCollector') + + +def process(input_dir, output_dir, + stats_min_sample_count, stats_relevance_threshold): + """ + Read the clang analyzer outputs where the statistics emitter checkers + were enabled and collect the statistics. + + After the statistics collection cleanup the output files. + """ + + try: + os.stat(output_dir) + except Exception as ex: + LOG.debug(ex) + os.mkdir(output_dir) + + if not os.path.exists(input_dir): + LOG.debug("No statistics directory was found") + return + + clang_outs = [] + try: + for f in os.listdir(input_dir): + if os.path.isfile(os.path.join(input_dir, f)): + clang_outs.append(os.path.join(input_dir, f)) + except OSError as oerr: + LOG.debug(oerr) + LOG.debug("Statistics can not be collected.") + LOG.debug("Analyzer output error.") + return + + if not clang_outs: + LOG.warning("No output files were found to collect statistics.") + return + + ret_collector = ReturnValueCollector(stats_min_sample_count, + stats_relevance_threshold) + special_ret_collector =\ + SpecialReturnValueCollector(stats_min_sample_count, + stats_relevance_threshold) + + for clang_output in clang_outs: + with open(clang_output, 'r', + encoding='utf-8', errors='ignore') as out: + clang_output = "" + for line in out: + clang_output += line + "\n" + ret_collector.process_line(line) + special_ret_collector.process_line(line) + LOG.debug("Collecting statistics finished.") + + # Write out statistics. + unchecked_yaml = ReturnValueCollector.stats_file(output_dir) + LOG.debug("Writing out statistics to %s", unchecked_yaml) + with open(unchecked_yaml, 'w', + encoding='utf-8', errors='ignore') as uyaml: + uyaml.write(ret_collector.get_yaml()) + + special_ret_yaml = SpecialReturnValueCollector.stats_file(output_dir) + LOG.debug("Writing out statistics to %s", special_ret_yaml) + with open(special_ret_yaml, 'w', + encoding='utf-8', errors='ignore') as uyaml: + uyaml.write(special_ret_collector.get_yaml()) diff --git a/analyzer/tools/statistics_collector/requirements_py/dev/requirements.txt b/analyzer/tools/statistics_collector/requirements_py/dev/requirements.txt new file mode 100644 index 0000000000..bf35683890 --- /dev/null +++ b/analyzer/tools/statistics_collector/requirements_py/dev/requirements.txt @@ -0,0 +1,3 @@ +nose==1.3.7 +pycodestyle==2.4.0 +pylint==1.9.4 diff --git a/analyzer/tools/statistics_collector/setup.py b/analyzer/tools/statistics_collector/setup.py new file mode 100644 index 0000000000..d89ffc9f38 --- /dev/null +++ b/analyzer/tools/statistics_collector/setup.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="statistics-collector", + version="0.1.0", + author='CodeChecker Team (Ericsson)', + description="", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/Ericsson/CodeChecker", + keywords=['clang', 'statistics', 'merge', 'static-analysis', 'analysis'], + license='LICENSE.txt', + packages=setuptools.find_packages(), + include_package_data=True, + classifiers=[ + "Environment :: Console", + "Intended Audience :: Developers", + "Operating System :: POSIX", + "Programming Language :: Python :: 3" + ], + entry_points={ + 'console_scripts': [ + 'post-process-stats = codechecker_statistics_collector.cli:main' + ] + }, +) diff --git a/analyzer/tools/statistics_collector/tests/Makefile b/analyzer/tools/statistics_collector/tests/Makefile new file mode 100644 index 0000000000..9cb3e00177 --- /dev/null +++ b/analyzer/tools/statistics_collector/tests/Makefile @@ -0,0 +1,36 @@ +# Environment variables to run tests. + +REPO_ROOT ?= REPO_ROOT=$(ROOT) + +# Nose test runner configuration options. +NOSECFG = --config .noserc + +test: pycodestyle pylint test_unit + +test_in_env: pycodestyle_in_env pylint_in_env test_unit_in_env + +PYCODESTYLE_TEST_CMD = pycodestyle codechecker_statistics_collector tests + +pycodestyle: + $(PYCODESTYLE_TEST_CMD) + +pycodestyle_in_env: venv_dev + $(ACTIVATE_DEV_VENV) && $(PYCODESTYLE_TEST_CMD) + +PYLINT_TEST_CMD = PYLINTRC=$(ROOT)/.pylintrc \ + pylint ./codechecker_statistics_collector ./tests/** + +pylint: + $(PYLINT_TEST_CMD) + +pylint_in_env: venv + $(ACTIVATE_DEV_VENV) && $(PYLINT_TEST_CMD) + +UNIT_TEST_CMD = $(REPO_ROOT) \ + nosetests $(NOSECFG) tests/unit + +test_unit: + $(UNIT_TEST_CMD) + +test_unit_in_env: venv_dev + $(ACTIVATE_DEV_VENV) && $(UNIT_TEST_CMD) diff --git a/analyzer/tools/statistics_collector/tests/unit/__init__.py b/analyzer/tools/statistics_collector/tests/unit/__init__.py new file mode 100644 index 0000000000..4259749345 --- /dev/null +++ b/analyzer/tools/statistics_collector/tests/unit/__init__.py @@ -0,0 +1,7 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- diff --git a/analyzer/tools/statistics_collector/tests/unit/statistics_collector/__init__.py b/analyzer/tools/statistics_collector/tests/unit/statistics_collector/__init__.py new file mode 100644 index 0000000000..2ec58db228 --- /dev/null +++ b/analyzer/tools/statistics_collector/tests/unit/statistics_collector/__init__.py @@ -0,0 +1,54 @@ +# coding=utf-8 +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" Setup for the test package analyze. """ + +import os +import shutil +import tempfile + + +def get_workspace(test_id='test'): + """ Return a temporary workspace for the tests. """ + workspace_root = os.environ.get("STATISTICS_COLLECTOR_TEST_WORKSPACE_ROOT") + if not workspace_root: + # if no external workspace is set create under the build dir + workspace_root = os.path.join(os.environ['REPO_ROOT'], 'build', + 'workspace') + + if not os.path.exists(workspace_root): + os.makedirs(workspace_root) + + if test_id: + return tempfile.mkdtemp(prefix=test_id + "-", dir=workspace_root) + else: + return workspace_root + + +# Test workspace should be initialized in this module. +TEST_WORKSPACE = None + + +def setup_package(): + """ Setup the environment for the tests. """ + + global TEST_WORKSPACE + TEST_WORKSPACE = get_workspace('statistics_collector') + + os.environ['TEST_WORKSPACE'] = TEST_WORKSPACE + + +def teardown_package(): + """ Delete the workspace associated with this test. """ + + # TODO: If environment variable is set keep the workspace + # and print out the path. + global TEST_WORKSPACE + + print("Removing: " + TEST_WORKSPACE) + shutil.rmtree(TEST_WORKSPACE) diff --git a/analyzer/tests/unit/test_statistics_collectors.py b/analyzer/tools/statistics_collector/tests/unit/statistics_collector/statistics_collector_test.py similarity index 92% rename from analyzer/tests/unit/test_statistics_collectors.py rename to analyzer/tools/statistics_collector/tests/unit/statistics_collector/statistics_collector_test.py index 04871b2a4e..c53e38944f 100644 --- a/analyzer/tests/unit/test_statistics_collectors.py +++ b/analyzer/tools/statistics_collector/tests/unit/statistics_collector/statistics_collector_test.py @@ -11,13 +11,14 @@ import unittest -from codechecker_analyzer.analyzers.clangsa import statistics_collector +from codechecker_statistics_collector.collectors.special_return_value import \ + SpecialReturnValueCollector +from codechecker_statistics_collector.collectors.return_value import \ + ReturnValueCollector -class statistics_collectorsTest(unittest.TestCase): - """ - Testing the statistics collectors output parsing. - """ +class statisticsCollectorTest(unittest.TestCase): + """ Testing the statistics collectors output parsing. """ def test_spec_ret_val_coll(self): """ @@ -33,8 +34,7 @@ def test_spec_ret_val_coll(self): " Special Return Value:/.../x.c:551:12,parsedate,0,0" ] - special_ret_collector = \ - statistics_collector.SpecialReturnValueCollector(10, 0.85) + special_ret_collector = SpecialReturnValueCollector(10, 0.85) for line in test_input: special_ret_collector.process_line(line) @@ -86,8 +86,7 @@ def test_spec_ret_val_coll_neg_filter(self): " Special Return Value:/.../x.c:551:12,myfunc,0,0", ] - special_ret_collector = \ - statistics_collector.SpecialReturnValueCollector(10, 0.85) + special_ret_collector = SpecialReturnValueCollector(10, 0.85) for line in test_ret_neg: special_ret_collector.process_line(line) @@ -119,7 +118,7 @@ def test_ret_val_coll_neg(self): " Return Value Check:/.../x.c:551:12,parsedate,0", ] - ret_val_collector = statistics_collector.ReturnValueCollector(10, 0.85) + ret_val_collector = ReturnValueCollector(10, 0.85) for line in test_ret_neg: ret_val_collector.process_line(line) @@ -154,7 +153,7 @@ def test_ret_val_coll_filtering(self): " Return Value Check:/.../x.c:551:12,parsedate,1", ] - ret_val_collector = statistics_collector.ReturnValueCollector(10, 0.85) + ret_val_collector = ReturnValueCollector(10, 0.85) for line in test_ret_neg: ret_val_collector.process_line(line) diff --git a/docs/analyzer/user_guide.md b/docs/analyzer/user_guide.md index f617bee2b8..4a2bf5dccc 100644 --- a/docs/analyzer/user_guide.md +++ b/docs/analyzer/user_guide.md @@ -695,6 +695,12 @@ optional arguments: -t {plist}, --type {plist}, --output-format {plist} Specify the format the analysis results should use. (default: plist) + --makefile Generate a Makefile in the given output directory from + the analyzer commands and do not execute the analysis. + The analysis can be executed by calling the make + command like 'make -f output_dir/Makefile'. You can + ignore errors with the -i/--ignore-errors options: + 'make -f output_dir/Makefile -i'. (default: False) -q, --quiet Do not print the output or error of the analyzers to the standard output of CodeChecker. -c, --clean Delete analysis reports stored in the output diff --git a/tools/report-converter/README.md b/tools/report-converter/README.md index 7730bb9278..7c0078ae92 100644 --- a/tools/report-converter/README.md +++ b/tools/report-converter/README.md @@ -34,7 +34,7 @@ make package ## Usage ```sh usage: report-converter [-h] -o OUTPUT_DIR -t TYPE [--meta [META [META ...]]] - [-c] [-v] + [--filename FILENAME] [-c] [-v] file Creates a CodeChecker report directory from the given code analyzer output @@ -59,6 +59,17 @@ optional arguments: to a running CodeChecker server. It has the following format: key=value. Valid key values are: analyzer_command, analyzer_version. + --filename FILENAME This option can be used to override the default plist + file name output of this tool. This tool can produce + multiple plist files on the given code analyzer output + result file. The problem is if we run this tool + multiple times on the same directory, it may override + some plist files. To prevent this we can generate a + unique hash into the plist file names with this + option. For example: '{source_file}_{analyzer}_xxxxx'. + {source_file} and {analyzer} are special values which + will be replaced with the current analyzer and source + file name where the bug was found. -c, --clean Delete files stored in the output directory. -v, --verbose Set verbosity level. diff --git a/tools/report-converter/codechecker_report_converter/analyzer_result.py b/tools/report-converter/codechecker_report_converter/analyzer_result.py index d7d2bfb872..bfb549e0ea 100644 --- a/tools/report-converter/codechecker_report_converter/analyzer_result.py +++ b/tools/report-converter/codechecker_report_converter/analyzer_result.py @@ -31,7 +31,8 @@ class AnalyzerResult(object, metaclass=ABCMeta): # Link to the official analyzer website. URL = None - def transform(self, analyzer_result, output_dir, metadata=None): + def transform(self, analyzer_result, output_dir, + file_name="{source_file}_{analyzer}", metadata=None): """ Creates plist files from the given analyzer result to the given output directory. """ @@ -44,7 +45,7 @@ def transform(self, analyzer_result, output_dir, metadata=None): self._post_process_result(plist_objs) - self._write(plist_objs, output_dir) + self._write(plist_objs, output_dir, file_name) if metadata: self._save_metadata(metadata, output_dir) @@ -134,15 +135,19 @@ def _get_analyzer_result_file_content(self, result_file): errors='replace') as analyzer_result: return analyzer_result.readlines() - def _write(self, plist_objs, output_dir): + def _write(self, plist_objs, output_dir, file_name): """ Creates plist files from the parse result to the given output. It will generate a context free hash for each diagnostics. """ output_dir = os.path.abspath(output_dir) for plist_data in plist_objs: - file_name = os.path.basename(plist_data['files'][0]) - out_file_name = '{0}_{1}.plist'.format(file_name, self.TOOL_NAME) + source_file = os.path.basename(plist_data['files'][0]) + + out_file_name = file_name \ + .replace("{source_file}", source_file) \ + .replace("{analyzer}", self.TOOL_NAME) + out_file_name = '{0}.plist'.format(out_file_name) out_file = os.path.join(output_dir, out_file_name) LOG.info("Create/modify plist file: '%s'.", out_file) diff --git a/tools/report-converter/codechecker_report_converter/cli.py b/tools/report-converter/codechecker_report_converter/cli.py index 2ec74ea88a..9a91437cd9 100755 --- a/tools/report-converter/codechecker_report_converter/cli.py +++ b/tools/report-converter/codechecker_report_converter/cli.py @@ -55,7 +55,7 @@ LOG = logging.getLogger('ReportConverter') msg_formatter = logging.Formatter('[%(levelname)s] - %(message)s') -log_handler = logging.StreamHandler() +log_handler = logging.StreamHandler(sys.stdout) log_handler.setFormatter(msg_formatter) LOG.setLevel(logging.INFO) LOG.addHandler(log_handler) @@ -80,8 +80,8 @@ supported_metadata_keys = ["analyzer_command", "analyzer_version"] -def output_to_plist(analyzer_result, parser_type, output_dir, clean=False, - metadata=None): +def output_to_plist(analyzer_result, parser_type, output_dir, file_name, + clean=False, metadata=None): """ Creates .plist files from the given output to the given output dir. """ if clean and os.path.isdir(output_dir): LOG.info("Previous analysis results in '%s' have been removed, " @@ -92,7 +92,7 @@ def output_to_plist(analyzer_result, parser_type, output_dir, clean=False, os.makedirs(output_dir) parser = supported_converters[parser_type]() - parser.transform(analyzer_result, output_dir, metadata) + parser.transform(analyzer_result, output_dir, file_name, metadata) def process_metadata(metadata): @@ -151,6 +151,25 @@ def __add_arguments_to_parser(parser): "format: key=value. Valid key values are: " "{0}.".format(', '.join(supported_metadata_keys))) + parser.add_argument('--filename', + type=str, + dest='filename', + metavar='FILENAME', + default="{source_file}_{analyzer}", + help="This option can be used to override the default " + "plist file name output of this tool. This tool " + "can produce multiple plist files on the given " + "code analyzer output result file. The problem " + "is if we run this tool multiple times on the " + "same directory, it may override some plist " + "files. To prevent this we can generate a unique " + "hash into the plist file names with this " + "option. For example: " + "'{source_file}_{analyzer}_xxxxx'. {source_file} " + "and {analyzer} are special values which will " + "be replaced with the current analyzer and " + "source file name where the bug was found.") + parser.add_argument('-c', '--clean', dest="clean", required=False, @@ -197,7 +216,7 @@ def main(): sys.exit(1) return output_to_plist(args.input, args.type, args.output_dir, - args.clean, valid_metadata_values) + args.filename, args.clean, valid_metadata_values) if __name__ == "__main__": diff --git a/tools/report-converter/codechecker_report_converter/cppcheck/analyzer_result.py b/tools/report-converter/codechecker_report_converter/cppcheck/analyzer_result.py index a1be2bfd93..d6cd48adb8 100644 --- a/tools/report-converter/codechecker_report_converter/cppcheck/analyzer_result.py +++ b/tools/report-converter/codechecker_report_converter/cppcheck/analyzer_result.py @@ -76,7 +76,7 @@ def _add_report_hash(self, plist_data): files[diag['location']['file']]) diag['issue_hash_content_of_line_in_context'] = report_hash - def _write(self, file_to_plist_data, output_dir): + def _write(self, file_to_plist_data, output_dir, file_name): """ Creates plist files from the parse result to the given output. """ output_dir = os.path.abspath(output_dir) for file_name, plist_data in file_to_plist_data.items():