Error code return feature. Version up to v1.10.6 (#678)

* Error code return feature * skip args test for python3.11
Samsung · Feb 17, 2025 · aafc50e · aafc50e
1 parent ff602e0
commit aafc50e
Show file tree

Hide file tree

Showing 16 changed files with 219 additions and 108 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,13 +1,6 @@
 repos:
--   repo: https://github.com/pre-commit/mirrors-yapf
-    rev: v0.30.0
+-   repo: https://github.com/google/yapf
+    rev: v0.43.0
     hooks:
     - id: yapf
       args: ['--style=.style.yapf', '--parallel', '--in-place']
--   repo: https://github.com/pycqa/pydocstyle
-    rev: 6.1.1
-    hooks:
-    - id: pydocstyle
-      args:
-      - --convention=google
-      - --add-ignore=D1
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
@@ -1,6 +1,6 @@
 -   id: CredSweeper
     name: CredSweeper
     description: Advanced credential scanner
-    entry: python -m credsweeper --path
+    entry: python -m credsweeper --banner --error --color --no-stdout --path
     language: python
     types: [text]
diff --git a/credsweeper/__init__.py b/credsweeper/__init__.py
@@ -18,4 +18,4 @@
     '__version__'
 ]
 
-__version__ = "1.10.5"
+__version__ = "1.10.6"
diff --git a/credsweeper/__main__.py b/credsweeper/__main__.py
@@ -3,7 +3,7 @@
 import os
 import sys
 import time
-from argparse import ArgumentParser, ArgumentTypeError, Namespace
+from argparse import ArgumentParser, ArgumentTypeError, Namespace, BooleanOptionalAction
 from typing import Any, Union, Dict
 
 from credsweeper import __version__
@@ -211,6 +211,10 @@ def get_arguments() -> Namespace:
                         help="parse .gitignore files and skip credentials from ignored objects",
                         dest="skip_ignored",
                         action="store_true")
+    parser.add_argument("--error",
+                        help="produce error code if credentials are found",
+                        action=BooleanOptionalAction,
+                        default=False)
     parser.add_argument("--save-json",
                         nargs="?",
                         help="save result to json file (default: output.json)",
@@ -223,16 +227,21 @@ def get_arguments() -> Namespace:
                         const="output.xlsx",
                         dest="xlsx_filename",
                         metavar="PATH")
-    parser.add_argument("--color", "-C", help="print results with colorization", action="store_const", const=True)
+    parser.add_argument("--stdout", help="print results to stdout", action=BooleanOptionalAction, default=True)
+    parser.add_argument("--color", help="print results with colorization", action=BooleanOptionalAction, default=False)
     parser.add_argument("--hashed",
                         help="line, variable, value will be hashed in output",
-                        action="store_const",
-                        const=True)
+                        action=BooleanOptionalAction,
+                        default=False)
     parser.add_argument("--subtext",
                         help=f"line text will be stripped in {2 * ML_HUNK} symbols but value and variable are kept",
-                        action="store_const",
-                        const=True)
-    parser.add_argument("--sort", help="enable output sorting", dest="sort_output", action="store_true")
+                        action=BooleanOptionalAction,
+                        default=False)
+    parser.add_argument("--sort",
+                        help="enable output sorting",
+                        dest="sort_output",
+                        action=BooleanOptionalAction,
+                        default=False)
     parser.add_argument("--log",
                         "-l",
                         help=f"provide logging level of {list(Logger.LEVELS.keys())}"
@@ -281,6 +290,7 @@ def scan(args: Namespace, content_provider: AbstractProvider) -> int:
                                   config_path=args.config_path,
                                   json_filename=args.json_filename,
                                   xlsx_filename=args.xlsx_filename,
+                                  stdout=args.stdout,
                                   color=args.color,
                                   hashed=args.hashed,
                                   subtext=args.subtext,
@@ -310,6 +320,7 @@ def scan(args: Namespace, content_provider: AbstractProvider) -> int:
 def main() -> int:
     """Main function"""
     result = EXIT_FAILURE
+    credentials_number = 0
     start_time = time.time()
     args = get_arguments()
     if args.banner:
@@ -336,15 +347,20 @@ def main() -> int:
         del_credentials_number = scan(args, content_provider)
         summary["Deleted File Credentials"] = del_credentials_number
         if 0 <= add_credentials_number and 0 <= del_credentials_number:
+            # it means the scan was successful done
             result = EXIT_SUCCESS
+            # collect number of all found credential to produce error code when necessary
+            credentials_number = add_credentials_number + del_credentials_number
     elif args.export_config:
         logging.info(f"Exporting default config to file: {args.export_config}")
         config_dict = Util.json_load(APP_PATH / "secret" / "config.json")
         Util.json_dump(config_dict, args.export_config)
+        result = EXIT_SUCCESS
     elif args.export_log_config:
         logging.info(f"Exporting default logger config to file: {args.export_log_config}")
         config_dict = Util.yaml_load(APP_PATH / "secret" / "log.yaml")
         Util.yaml_dump(config_dict, args.export_log_config)
+        result = EXIT_SUCCESS
     elif args.banner and 2 == len(sys.argv):
         # only extend version invocation
         result = EXIT_SUCCESS
@@ -357,6 +373,10 @@ def main() -> int:
         end_time = time.time()
         print(f"Time Elapsed: {end_time - start_time}s")
 
+    if args.error and EXIT_SUCCESS == result and 0 < credentials_number:
+        # override result when credentials were found with the requirement
+        result = EXIT_FAILURE
+
     return result
 
 

diff --git a/credsweeper/app.py b/credsweeper/app.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import multiprocessing
 import signal
@@ -41,6 +42,7 @@ def __init__(self,
                  config_path: Optional[str] = None,
                  json_filename: Union[None, str, Path] = None,
                  xlsx_filename: Union[None, str, Path] = None,
+                 stdout: bool = False,
                  color: bool = False,
                  hashed: bool = False,
                  subtext: bool = False,
@@ -70,7 +72,8 @@ def __init__(self,
                 default built-in config is used if None
             json_filename: optional string variable, path to save result to json
             xlsx_filename: optional string variable, path to save result to xlsx
-            color: print results to stdout with colorization
+            stdout: print results to stdout
+            color: print concise results to stdout with colorization
             hashed: use hash of line, value and variable instead plain text
             subtext: use subtext of line near variable-value like it performed in ML
             use_filters: boolean variable, specifying the need of rule filters
@@ -110,6 +113,7 @@ def __init__(self,
         self.credential_manager = CredentialManager()
         self.json_filename: Union[None, str, Path] = json_filename
         self.xlsx_filename: Union[None, str, Path] = xlsx_filename
+        self.stdout = stdout
         self.color = color
         self.hashed = hashed
         self.subtext = subtext
@@ -245,8 +249,7 @@ def run(self, content_provider: AbstractProvider) -> int:
         # PatchesProvider has the attribute. Circular import error appears with using the isinstance
         change_type = content_provider.change_type if hasattr(content_provider, "change_type") else None
         self.export_results(change_type)
-
-        return len(self.credential_manager.get_credentials())
+        return self.credential_manager.len_credentials()
 
     # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 
@@ -392,7 +395,6 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None:
         Args:
             change_type: flag to know which file should be created for a patch
         """
-        is_exported = False
 
         credentials = self.credential_manager.get_credentials()
 
@@ -410,15 +412,22 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None:
 
         if self.json_filename:
             json_path = Path(self.json_filename)
-            is_exported = True
             if isinstance(change_type, DiffRowType):
                 # add suffix for appropriated reports to create two files for the patch scan
                 json_path = json_path.with_suffix(f".{change_type.value}{json_path.suffix}")
-            Util.json_dump([credential.to_json(hashed=self.hashed, subtext=self.subtext) for credential in credentials],
-                           file_path=json_path)
+            with open(json_path, 'w') as f:
+                # use the approach to reduce total memory usage in case of huge data
+                first_item = True
+                f.write('[\n')
+                for credential in credentials:
+                    if first_item:
+                        first_item = False
+                    else:
+                        f.write(",\n")
+                    f.write(json.dumps(credential.to_json(hashed=self.hashed, subtext=self.subtext), indent=4))
+                f.write("\n]")
 
         if self.xlsx_filename:
-            is_exported = True
             data_list = []
             for credential in credentials:
                 data_list.extend(credential.to_dict_list(hashed=self.hashed, subtext=self.subtext))
@@ -434,7 +443,6 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None:
                 df.to_excel(self.xlsx_filename, sheet_name="report", index=False)
 
         if self.color:
-            is_exported = True
             for credential in credentials:
                 for line_data in credential.line_data_list:
                     # bright rule name and path or info
@@ -443,6 +451,6 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None:
                           Style.RESET_ALL)
                     print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))
 
-        if is_exported is False:
+        if self.stdout:
             for credential in credentials:
                 print(credential.to_str(hashed=self.hashed, subtext=self.subtext))
diff --git a/credsweeper/credentials/credential_manager.py b/credsweeper/credentials/credential_manager.py
@@ -9,16 +9,24 @@
 
 
 class CredentialManager:
-    """The manager allows you to store, add and delete separate credit candidates.
-
-    Parameters:
-        candidates: list of credential candidates
-
-    """
+    """The manager allows you to store, add and delete separate credit candidates."""
 
     def __init__(self) -> None:
         self.candidates: List[Candidate] = list(Manager().list())
 
+    def clear_credentials(self) -> None:
+        """Clear credential candidates stored in the manager."""
+        self.candidates.clear()
+
+    def len_credentials(self) -> int:
+        """Get number of credential candidates stored in the manager.
+
+        Return:
+            Non-negative integer
+
+        """
+        return len(self.candidates)
+
     def get_credentials(self) -> List[Candidate]:
         """Get all credential candidates stored in the manager.
 

diff --git a/credsweeper/filters/value_token_base_check.py b/credsweeper/filters/value_token_base_check.py
@@ -37,10 +37,9 @@ def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]]
 
     @staticmethod
     def get_ppf(n: int) -> float:
-        """
-        from scipy.stats import t
-        print('\n'.join(f'{n}: {t.ppf(0.9827, n-1):.8f},' for n in [8,10,15,16,20,24,25,32,40,50,64]))
-        """
+        """Code used to produce the values"""
+        # from scipy.stats import t
+        # print('\n'.join(f'{n}: {t.ppf(0.9827, n-1):.8f},' for n in [8,10,15,16,20,24,25,32,40,50,64]))
         return ValueTokenBaseCheck.MUL_DICT[n]
 
     def run(self, line_data: LineData, target: AnalysisTarget) -> bool:

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -17,7 +17,7 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
 project = 'CredSweeper'
-copyright = '2024, Samsung CredTeam'
+copyright = '2025, Samsung CredTeam'
 author = 'CredTeam'
 
 from credsweeper import __version__ as credsweeper_version

diff --git a/docs/source/credsweeper.filters.rst b/docs/source/credsweeper.filters.rst
@@ -348,6 +348,14 @@ credsweeper.filters.value\_token\_base64\_check module
    :undoc-members:
    :show-inheritance:
 
+credsweeper.filters.value\_token\_base\_check module
+----------------------------------------------------
+
+.. automodule:: credsweeper.filters.value_token_base_check
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 credsweeper.filters.value\_token\_check module
 ----------------------------------------------
 

diff --git a/docs/source/credsweeper.utils.rst b/docs/source/credsweeper.utils.rst
@@ -12,6 +12,14 @@ credsweeper.utils.entropy\_validator module
    :undoc-members:
    :show-inheritance:
 
+credsweeper.utils.hop\_stat module
+----------------------------------
+
+.. automodule:: credsweeper.utils.hop_stat
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 credsweeper.utils.pem\_key\_detector module
 -------------------------------------------