Skip to content

Commit

Permalink
Add conda-based pre-commit hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
xhochy committed Feb 14, 2022
1 parent 30bffc4 commit eeaea0b
Show file tree
Hide file tree
Showing 35 changed files with 1,385 additions and 965 deletions.
11 changes: 11 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Taken almost directly from https://github.com/ambv/black/blob/master/.flake8
[flake8]
ignore = B950, C901, D100, D104, E203, E266, E501, W503
max-line-length = 88
max-complexity = 18
select = B,C,E,F,W,T4,B9,D
enable-extensions = flake8-docstrings
per-file-ignores =
benchmarks/**:D101,D102,D103
tests/**:D101,D102,D103
docstring-convention = numpy
26 changes: 26 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
repos:
- repo: https://github.com/Quantco/pre-commit-mirrors-black
rev: 21.5b1
hooks:
- id: black-conda
- repo: https://github.com/Quantco/pre-commit-mirrors-flake8
rev: v3.9.2
hooks:
- id: flake8-conda
- repo: https://github.com/Quantco/pre-commit-mirrors-isort
rev: 5.8.0
hooks:
- id: isort-conda
additional_dependencies: [toml]
args: ["--profile", "black"]
- repo: https://github.com/Quantco/pre-commit-mirrors-mypy
rev: "0.931"
hooks:
- id: mypy-conda
additional_dependencies: [-c, conda-forge, types-setuptools]
- repo: https://github.com/Quantco/pre-commit-mirrors-pyupgrade
rev: 2.31.0
hooks:
- id: pyupgrade-conda
args:
- --py38-plus
67 changes: 33 additions & 34 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
#
# turbodbc documentation build configuration file, created by
# sphinx-quickstart on Sun Apr 9 09:48:15 2017.
Expand All @@ -8,9 +7,11 @@
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sphinx_rtd_theme
import sys
sys.path.insert(0, os.path.abspath(os.path.join('..', 'python')))

import sphinx_rtd_theme

sys.path.insert(0, os.path.abspath(os.path.join("..", "python")))


# -- General configuration ------------------------------------------------
Expand All @@ -19,24 +20,24 @@
#
# needs_sphinx = '1.0'

extensions = ['sphinx.ext.autodoc']
extensions = ["sphinx.ext.autodoc"]

templates_path = ['_templates']
templates_path = ["_templates"]

source_suffix = '.rst'
source_suffix = ".rst"

master_doc = 'index'
master_doc = "index"

project = u'turbodbc'
copyright = u'2017, Michael König'
author = u'Michael König'
project = "turbodbc"
copyright = "2017, Michael König"
author = "Michael König"

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = os.environ.get('READTHEDOCS_VERSION', 'latest')
version = os.environ.get("READTHEDOCS_VERSION", "latest")
# The full version, including alpha/beta/rc tags.
release = version

Expand All @@ -50,10 +51,10 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
pygments_style = "sphinx"

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
Expand All @@ -64,14 +65,16 @@
try:
from unittest.mock import MagicMock
except ImportError:
from mock import MagicMock
from unittest.mock import MagicMock


class Mock(MagicMock):
@classmethod
def __getattr__(cls, name):
return MagicMock()

MOCK_MODULES = ['turbodbc_intern']

MOCK_MODULES = ["turbodbc_intern"]
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)


Expand All @@ -80,7 +83,7 @@ def __getattr__(cls, name):
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
Expand All @@ -91,30 +94,27 @@ def __getattr__(cls, name):
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]


# -- Options for HTMLHelp output ------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'turbodbcdoc'
htmlhelp_basename = "turbodbcdoc"


# -- Options for LaTeX output ---------------------------------------------

latex_elements = {
latex_elements = { # type: ignore
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',

# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',

# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',

# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
Expand All @@ -124,19 +124,15 @@ def __getattr__(cls, name):
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'turbodbc.tex', u'turbodbc Documentation',
u'Michael König', 'manual'),
(master_doc, "turbodbc.tex", "turbodbc Documentation", "Michael König", "manual"),
]


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'turbodbc', u'turbodbc Documentation',
[author], 1)
]
man_pages = [(master_doc, "turbodbc", "turbodbc Documentation", [author], 1)]


# -- Options for Texinfo output -------------------------------------------
Expand All @@ -145,10 +141,13 @@ def __getattr__(cls, name):
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'turbodbc', u'turbodbc Documentation',
author, 'turbodbc', 'One line description of project.',
'Miscellaneous'),
(
master_doc,
"turbodbc",
"turbodbc Documentation",
author,
"turbodbc",
"One line description of project.",
"Miscellaneous",
),
]



132 changes: 79 additions & 53 deletions performance_scripts/measure_performance_exasol.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,63 @@
import json
from datetime import date, datetime
from typing import Any

import numpy
import pyodbc

import turbodbc
import numpy
import json
from datetime import datetime, date


def connect(api, dsn):
if api == "pyodbc":
return pyodbc.connect(dsn=dsn)
else:
return turbodbc.connect(dsn, parameter_sets_to_buffer=100000, rows_to_buffer=100000, use_async_io=True)
return turbodbc.connect(
dsn,
parameter_sets_to_buffer=100000,
rows_to_buffer=100000,
use_async_io=True,
)


def _column_data(column_type):
if column_type == 'INTEGER':
if column_type == "INTEGER":
return 42
if column_type == 'DOUBLE':
if column_type == "DOUBLE":
return 3.14
if column_type == 'DATE':
return date(2016, 01, 02)
if 'VARCHAR' in column_type:
return 'test data'
if column_type == 'TIMESTAMP':
return datetime(2016, 01, 02, 03, 04, 05)
raise RuntimeError("Unknown column type {}".format(column_type))
if column_type == "DATE":
return date(2016, 1, 2)
if "VARCHAR" in column_type:
return "test data"
if column_type == "TIMESTAMP":
return datetime(2016, 1, 2, 3, 4, 5)
raise RuntimeError(f"Unknown column type {column_type}")


def prepare_test_data(cursor, column_types, powers_of_two_lines):
columns = ['col{} {}'.format(i, type) for i, type in zip(xrange(len(column_types)), column_types)]
cursor.execute('CREATE OR REPLACE TABLE test_performance ({})'.format(', '.join(columns)))
columns = [
f"col{i} {type}" for i, type in zip(range(len(column_types)), column_types)
]
cursor.execute(
"CREATE OR REPLACE TABLE test_performance ({})".format(", ".join(columns))
)

data = [_column_data(type) for type in column_types]
cursor.execute('INSERT INTO test_performance VALUES ({})'.format(', '.join('?' for _ in columns)), data)
cursor.execute(
"INSERT INTO test_performance VALUES ({})".format(
", ".join("?" for _ in columns)
),
data,
)

for _ in xrange(powers_of_two_lines):
cursor.execute('INSERT INTO test_performance SELECT * FROM test_performance')
for _ in range(powers_of_two_lines):
cursor.execute("INSERT INTO test_performance SELECT * FROM test_performance")


def _fetchallnumpy(cursor):
cursor.fetchallnumpy()


def _stream_to_ignore(cursor):
for _ in cursor:
pass
Expand All @@ -48,55 +68,61 @@ def _stream_to_list(cursor):


def measure(cursor, extraction_method):
cursor.execute('SELECT * FROM test_performance')
cursor.execute("SELECT * FROM test_performance")
start = datetime.now()
extraction_method(cursor)
stop = datetime.now()
return (stop - start).total_seconds()


powers_of_two = 21
n_rows = 2**powers_of_two
n_rows = 2 ** powers_of_two
n_runs = 10
column_types = ['INTEGER'] #, 'INTEGER', 'DOUBLE', 'DOUBLE'] #, 'VARCHAR(20)', 'DATE', 'TIMESTAMP']
api = 'pyodbc'
column_types = [
"INTEGER"
] # , 'INTEGER', 'DOUBLE', 'DOUBLE'] #, 'VARCHAR(20)', 'DATE', 'TIMESTAMP']
api = "pyodbc"
# extraction_method = _stream_to_ignore
extraction_method = _stream_to_list
# extraction_method = _fetchallnumpy
database = 'Exasol'
database = "Exasol"

connection = connect(api, database)
cursor = connection.cursor()

print "Performing benchmark with {} rows".format(n_rows)
print(f"Performing benchmark with {n_rows} rows")
prepare_test_data(cursor, column_types, powers_of_two)

runs = []
for r in xrange(n_runs):
print "Run #{}".format(r + 1)
runs.append(measure(cursor, extraction_method))

runs = numpy.array(runs)
results = {'number_of_runs': n_runs,
'rows_per_run': n_rows,
'column_types': column_types,
'api': api,
'extraction_method': extraction_method.__name__,
'database': database,
'timings': {'best': runs.min(),
'worst': runs.max(),
'mean': runs.mean(),
'standard_deviation': runs.std()},
'rates': {'best': n_rows / runs.min(),
'worst': n_rows / runs.max(),
'mean': n_rows * numpy.reciprocal(runs).mean(),
'standard_deviation': n_rows * numpy.reciprocal(runs).std()}}

print json.dumps(results, indent=4, separators=(',', ': '))
file_name = 'results_{}_{}{}.json'.format(database,
api,
extraction_method.__name__)
with open(file_name, 'w') as file:
json.dump(results, file, indent=4, separators=(',', ': '))

print "Wrote results to file {}".format(file_name)
runs_list = []
for r in range(n_runs):
print(f"Run #{r + 1}")
runs_list.append(measure(cursor, extraction_method))

runs = numpy.array(runs_list)
results = {
"number_of_runs": n_runs,
"rows_per_run": n_rows,
"column_types": column_types,
"api": api,
"extraction_method": extraction_method.__name__,
"database": database,
"timings": {
"best": runs.min(),
"worst": runs.max(),
"mean": runs.mean(),
"standard_deviation": runs.std(),
},
"rates": {
"best": n_rows / runs.min(),
"worst": n_rows / runs.max(),
"mean": n_rows * numpy.reciprocal(runs).mean(),
"standard_deviation": n_rows * numpy.reciprocal(runs).std(),
},
}

print(json.dumps(results, indent=4, separators=(",", ": ")))
file_name = f"results_{database}_{api}{extraction_method.__name__}.json"
with open(file_name, "w") as file:
json.dump(results, file, indent=4, separators=(",", ": "))

print(f"Wrote results to file {file_name}")
Loading

0 comments on commit eeaea0b

Please sign in to comment.