Add conda-based pre-commit hooks

blue-yonder · Feb 14, 2022 · eeaea0b · eeaea0b
1 parent 30bffc4
commit eeaea0b
Show file tree

Hide file tree

Showing 35 changed files with 1,385 additions and 965 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,11 @@
+# Taken almost directly from https://github.com/ambv/black/blob/master/.flake8
+[flake8]
+ignore = B950, C901, D100, D104, E203, E266, E501, W503
+max-line-length = 88
+max-complexity = 18
+select = B,C,E,F,W,T4,B9,D
+enable-extensions = flake8-docstrings
+per-file-ignores =
+    benchmarks/**:D101,D102,D103
+    tests/**:D101,D102,D103
+docstring-convention = numpy
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,26 @@
+repos:
+ - repo: https://github.com/Quantco/pre-commit-mirrors-black
+   rev: 21.5b1
+   hooks:
+     - id: black-conda
+ - repo: https://github.com/Quantco/pre-commit-mirrors-flake8
+   rev: v3.9.2
+   hooks:
+    - id: flake8-conda
+ - repo: https://github.com/Quantco/pre-commit-mirrors-isort
+   rev: 5.8.0
+   hooks:
+    - id: isort-conda
+      additional_dependencies: [toml]
+      args: ["--profile", "black"]
+ - repo: https://github.com/Quantco/pre-commit-mirrors-mypy
+   rev: "0.931"
+   hooks:
+    - id: mypy-conda
+      additional_dependencies: [-c, conda-forge, types-setuptools]
+ - repo: https://github.com/Quantco/pre-commit-mirrors-pyupgrade
+   rev: 2.31.0
+   hooks:
+    - id: pyupgrade-conda
+      args:
+        - --py38-plus
diff --git a/docs/conf.py b/docs/conf.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #
 # turbodbc documentation build configuration file, created by
 # sphinx-quickstart on Sun Apr  9 09:48:15 2017.
@@ -8,9 +7,11 @@
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 import os
-import sphinx_rtd_theme
 import sys
-sys.path.insert(0, os.path.abspath(os.path.join('..', 'python')))
+
+import sphinx_rtd_theme
+
+sys.path.insert(0, os.path.abspath(os.path.join("..", "python")))
 
 
 # -- General configuration ------------------------------------------------
@@ -19,24 +20,24 @@
 #
 # needs_sphinx = '1.0'
 
-extensions = ['sphinx.ext.autodoc']
+extensions = ["sphinx.ext.autodoc"]
 
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
-source_suffix = '.rst'
+source_suffix = ".rst"
 
-master_doc = 'index'
+master_doc = "index"
 
-project = u'turbodbc'
-copyright = u'2017, Michael König'
-author = u'Michael König'
+project = "turbodbc"
+copyright = "2017, Michael König"
+author = "Michael König"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = os.environ.get('READTHEDOCS_VERSION', 'latest')
+version = os.environ.get("READTHEDOCS_VERSION", "latest")
 # The full version, including alpha/beta/rc tags.
 release = version
 
@@ -50,10 +51,10 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
@@ -64,14 +65,16 @@
 try:
     from unittest.mock import MagicMock
 except ImportError:
-    from mock import MagicMock
+    from unittest.mock import MagicMock
+
 
 class Mock(MagicMock):
     @classmethod
     def __getattr__(cls, name):
         return MagicMock()
 
-MOCK_MODULES = ['turbodbc_intern']
+
+MOCK_MODULES = ["turbodbc_intern"]
 sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
 
 
@@ -80,7 +83,7 @@ def __getattr__(cls, name):
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
 html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -91,30 +94,27 @@ def __getattr__(cls, name):
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 
 # -- Options for HTMLHelp output ------------------------------------------
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'turbodbcdoc'
+htmlhelp_basename = "turbodbcdoc"
 
 
 # -- Options for LaTeX output ---------------------------------------------
 
-latex_elements = {
+latex_elements = {  # type: ignore
     # The paper size ('letterpaper' or 'a4paper').
     #
     # 'papersize': 'letterpaper',
-
     # The font size ('10pt', '11pt' or '12pt').
     #
     # 'pointsize': '10pt',
-
     # Additional stuff for the LaTeX preamble.
     #
     # 'preamble': '',
-
     # Latex figure (float) alignment
     #
     # 'figure_align': 'htbp',
@@ -124,19 +124,15 @@ def __getattr__(cls, name):
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'turbodbc.tex', u'turbodbc Documentation',
-     u'Michael König', 'manual'),
+    (master_doc, "turbodbc.tex", "turbodbc Documentation", "Michael König", "manual"),
 ]
 
 
 # -- Options for manual page output ---------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'turbodbc', u'turbodbc Documentation',
-     [author], 1)
-]
+man_pages = [(master_doc, "turbodbc", "turbodbc Documentation", [author], 1)]
 
 
 # -- Options for Texinfo output -------------------------------------------
@@ -145,10 +141,13 @@ def __getattr__(cls, name):
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'turbodbc', u'turbodbc Documentation',
-     author, 'turbodbc', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        master_doc,
+        "turbodbc",
+        "turbodbc Documentation",
+        author,
+        "turbodbc",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
-
-
-
diff --git a/performance_scripts/measure_performance_exasol.py b/performance_scripts/measure_performance_exasol.py
@@ -1,43 +1,63 @@
+import json
+from datetime import date, datetime
+from typing import Any
+
+import numpy
 import pyodbc
+
 import turbodbc
-import numpy
-import json
-from datetime import datetime, date
+
 
 def connect(api, dsn):
     if api == "pyodbc":
         return pyodbc.connect(dsn=dsn)
     else:
-        return turbodbc.connect(dsn, parameter_sets_to_buffer=100000, rows_to_buffer=100000, use_async_io=True)
+        return turbodbc.connect(
+            dsn,
+            parameter_sets_to_buffer=100000,
+            rows_to_buffer=100000,
+            use_async_io=True,
+        )
+
 
 def _column_data(column_type):
-    if column_type == 'INTEGER':
+    if column_type == "INTEGER":
         return 42
-    if column_type == 'DOUBLE':
+    if column_type == "DOUBLE":
         return 3.14
-    if column_type == 'DATE':
-        return date(2016, 01, 02)
-    if 'VARCHAR' in column_type:
-        return 'test data'
-    if column_type == 'TIMESTAMP':
-        return datetime(2016, 01, 02, 03, 04, 05)
-    raise RuntimeError("Unknown column type {}".format(column_type))
+    if column_type == "DATE":
+        return date(2016, 1, 2)
+    if "VARCHAR" in column_type:
+        return "test data"
+    if column_type == "TIMESTAMP":
+        return datetime(2016, 1, 2, 3, 4, 5)
+    raise RuntimeError(f"Unknown column type {column_type}")
 
 
 def prepare_test_data(cursor, column_types, powers_of_two_lines):
-    columns = ['col{} {}'.format(i, type) for i, type in zip(xrange(len(column_types)), column_types)]
-    cursor.execute('CREATE OR REPLACE TABLE test_performance ({})'.format(', '.join(columns)))
+    columns = [
+        f"col{i} {type}" for i, type in zip(range(len(column_types)), column_types)
+    ]
+    cursor.execute(
+        "CREATE OR REPLACE TABLE test_performance ({})".format(", ".join(columns))
+    )
 
     data = [_column_data(type) for type in column_types]
-    cursor.execute('INSERT INTO test_performance VALUES ({})'.format(', '.join('?' for _ in columns)), data)
+    cursor.execute(
+        "INSERT INTO test_performance VALUES ({})".format(
+            ", ".join("?" for _ in columns)
+        ),
+        data,
+    )
 
-    for _ in xrange(powers_of_two_lines):
-        cursor.execute('INSERT INTO test_performance SELECT * FROM test_performance')
+    for _ in range(powers_of_two_lines):
+        cursor.execute("INSERT INTO test_performance SELECT * FROM test_performance")
 
 
 def _fetchallnumpy(cursor):
     cursor.fetchallnumpy()
 
+
 def _stream_to_ignore(cursor):
     for _ in cursor:
         pass
@@ -48,55 +68,61 @@ def _stream_to_list(cursor):
 
 
 def measure(cursor, extraction_method):
-    cursor.execute('SELECT * FROM test_performance')
+    cursor.execute("SELECT * FROM test_performance")
     start = datetime.now()
     extraction_method(cursor)
     stop = datetime.now()
     return (stop - start).total_seconds()
 
 
 powers_of_two = 21
-n_rows = 2**powers_of_two
+n_rows = 2 ** powers_of_two
 n_runs = 10
-column_types = ['INTEGER'] #, 'INTEGER', 'DOUBLE', 'DOUBLE'] #, 'VARCHAR(20)', 'DATE', 'TIMESTAMP']
-api = 'pyodbc'
+column_types = [
+    "INTEGER"
+]  # , 'INTEGER', 'DOUBLE', 'DOUBLE'] #, 'VARCHAR(20)', 'DATE', 'TIMESTAMP']
+api = "pyodbc"
 # extraction_method = _stream_to_ignore
 extraction_method = _stream_to_list
 # extraction_method = _fetchallnumpy
-database = 'Exasol'
+database = "Exasol"
 
 connection = connect(api, database)
 cursor = connection.cursor()
 
-print "Performing benchmark with {} rows".format(n_rows)
+print(f"Performing benchmark with {n_rows} rows")
 prepare_test_data(cursor, column_types, powers_of_two)
 
-runs = []
-for r in xrange(n_runs):
-    print "Run #{}".format(r + 1)
-    runs.append(measure(cursor, extraction_method))
-
-runs = numpy.array(runs)
-results = {'number_of_runs': n_runs,
-           'rows_per_run': n_rows,
-           'column_types': column_types,
-           'api': api,
-           'extraction_method': extraction_method.__name__,
-           'database': database,
-           'timings': {'best': runs.min(),
-                       'worst': runs.max(),
-                       'mean': runs.mean(),
-                       'standard_deviation': runs.std()},
-           'rates': {'best': n_rows / runs.min(),
-                     'worst': n_rows / runs.max(),
-                     'mean': n_rows * numpy.reciprocal(runs).mean(),
-                     'standard_deviation': n_rows * numpy.reciprocal(runs).std()}}
-
-print json.dumps(results, indent=4, separators=(',', ': '))
-file_name = 'results_{}_{}{}.json'.format(database,
-                                          api,
-                                          extraction_method.__name__)
-with open(file_name, 'w') as file:
-    json.dump(results, file, indent=4, separators=(',', ': '))
-
-print "Wrote results to file {}".format(file_name)
+runs_list = []
+for r in range(n_runs):
+    print(f"Run #{r + 1}")
+    runs_list.append(measure(cursor, extraction_method))
+
+runs = numpy.array(runs_list)
+results = {
+    "number_of_runs": n_runs,
+    "rows_per_run": n_rows,
+    "column_types": column_types,
+    "api": api,
+    "extraction_method": extraction_method.__name__,
+    "database": database,
+    "timings": {
+        "best": runs.min(),
+        "worst": runs.max(),
+        "mean": runs.mean(),
+        "standard_deviation": runs.std(),
+    },
+    "rates": {
+        "best": n_rows / runs.min(),
+        "worst": n_rows / runs.max(),
+        "mean": n_rows * numpy.reciprocal(runs).mean(),
+        "standard_deviation": n_rows * numpy.reciprocal(runs).std(),
+    },
+}
+
+print(json.dumps(results, indent=4, separators=(",", ": ")))
+file_name = f"results_{database}_{api}{extraction_method.__name__}.json"
+with open(file_name, "w") as file:
+    json.dump(results, file, indent=4, separators=(",", ": "))
+
+print(f"Wrote results to file {file_name}")