Skip to content
This repository has been archived by the owner on Sep 22, 2022. It is now read-only.

Commit

Permalink
t1ha: Merge branch 'devel'.
Browse files Browse the repository at this point in the history
  • Loading branch information
erthink committed Mar 11, 2018
2 parents 58c63b1 + ae29cfc commit bad8ebd
Show file tree
Hide file tree
Showing 27 changed files with 5,551 additions and 743 deletions.
10 changes: 9 additions & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: 1.1.3.{build}
version: 1.2.0.{build}

environment:
matrix:
Expand All @@ -21,6 +21,14 @@ platform:
- x86
- x64
- ARM
- ARM64

matrix:
exclude:
- platform: ARM64
TOOLSET: v140
- platform: ARM64
TOOLSET: v120

build_script:
- ps: >
Expand Down
22 changes: 13 additions & 9 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
.vs/
ARM/
ARM64/
Win32/
libt1ha.a
libt1ha.so
t1ha-dll.VC.db
t1ha-static.VC.db
t1ha-test.VC.db
t1ha.VC.VC.opendb
t1ha.VC.db
t1ha.config
t1ha.creator.user
t1ha.includes
t1ha0.o
t1ha0_aes_avx.o
t1ha0_aes_noavx.o
t1ha0_ia32aes_avx.bc
Expand All @@ -8,15 +21,6 @@ t1ha0_ia32aes_avx.s
t1ha0_ia32aes_noavx.bc
t1ha0_ia32aes_noavx.i
t1ha0_ia32aes_noavx.s
t1ha0.o
t1ha1.o
t1ha-dll.VC.db
t1ha-static.VC.db
t1ha-test.VC.db
t1ha.VC.db
t1ha.VC.VC.opendb
test
.vs/
ARM/
Win32/
x64/
20 changes: 6 additions & 14 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,20 @@ os:
- linux
- osx

script: if [ "${COVERITY_SCAN_BRANCH}" != 1 ]; then make all check; fi

env:
global:
- secure: "fQvkBkC9lpnhl6L9DsfXQWBaHR4UMR1wtLjPrSn7Sl8cwKfJ5MAR+K1Khz7BDkKV7MCN6no7QJ1MYbwSSp67XnvwWL10yt/KbaAZCMNRWgwenfiCvdN67sw4rliX3q3X6PllI2jUSJy8gFmHDZzqEH4ark6uq3Gb30gt2lSlxXd/vF4xDOjy/IyLhzbFQIhJScHLApxeOYuoHLEmta5gsXjwhK6dsftDAHGc6alyPu19h5p4wvv+bP2WcREvMQHK6u3vfeUrUGdNYboEHyJmM9qHnR7JPmGMEhuF/lWjc7DtWrvb5qAVSayeKbTOQWnk5sINN46mTEDfGXkHsjlFIau6mdIGDNIAhdge6ODuVyCTS4pTd0LQNTJG5pL0dFDTtiqaSkDU4M0/ofsO2O535dwFhdJz7qqqWacTYDJCPGvku1TmhfxNWyrFlp/I8shtYdMKmNtHQrtEBlLYCdnypow2VB+3tyvwve5LpwSY2BD7gY2NrP6fp7vHqfoan5PXfsxXuBt7LJDmHDBOvTils2RPbqiF0jG1Xk1YYWTUopLqEl2iYUqnOeg4XcS7wEwOpgbEqfrvHJ4BPUI+Rz1TMN19P9sgWS+hWuUMg+hka3ZVvzAI619eqzMnYVNTJjbDHZya3kSxfBuyO7RLjj+3UN88QClLoIKJ7Aa/50xZGY8="

before_install: >
if [ "${COVERITY_SCAN_BRANCH}" = 1 ]; then
before_script: >
if [ "${TRAVIS_BRANCH}" = "coverity_scan" ]; then
# implement Coverity Scan with before_script instead of addons.coverity_scan
if grep -q '[0-9]\+\.1$' <<< "${TRAVIS_JOB_NUMBER}"; then
export COVERITY_SCAN_BRANCH=1
echo -n | openssl s_client -connect scan.coverity.com:443 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | sudo tee -a /etc/ssl/certs/ca-
curl -s 'https://scan.coverity.com/scripts/travisci_build_coverity_scan.sh' | COVERITY_SCAN_PROJECT_NAME="$TRAVIS_REPO_SLUG" COVERITY_SCAN_NOTIFICATION_EMAIL="[email protected]" COVERITY_SCAN_BUILD_COMMAND="make" COVERITY_SCAN_BUILD_COMMAND_PREPEND="" COVERITY_SCAN_BRANCH_PATTERN="$TRAVIS_BRANCH" bash
else
echo "Skip CoverityScan for unrelated os/compiler"
exit 0
fi
fi
addons:
coverity_scan:
project:
name: "leo-yuriev/t1ha"
description: "Build submitted via Travis CI"
version: 1.0
notification_email: [email protected]
build_command: make
branch_pattern: coverity_scan
script: if [ "${COVERITY_SCAN_BRANCH}" != 1 ]; then make all check; fi
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2016-2017 Positive Technologies, https://www.ptsecurity.com,
Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com,
Fast Positive Hash.

Portions Copyright (c) 2010-2013 Leonid Yuriev <[email protected]>,
Expand Down
99 changes: 79 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,60 +5,119 @@
# So, define it to 0 for calmness if doubt.
T1HA_USE_FAST_ONESHOT_READ ?=1

CFLAGS ?= -std=c99
CFLAGS ?= -std=c99 -O3 -DNDEBUG -D_DEFAULT_SOURCE
CC ?= gcc

TARGET_ARCHx86 = $(shell (export LC_ALL=C; ($(CC) --version 2>&1; $(CC) -v 2>&1) | grep -q -i -e '^Target: \(x86_64\)\|\([iI][3-6]86\)-.*' && echo yes || echo no))

OBJ_LIST := t1ha0.o t1ha1.o
BENCH_EXTRA :=
ifeq ($(TARGET_ARCHx86),yes)
TARGET_ARCH_e2k = $(shell (export LC_ALL=C; ($(CC) --version 2>&1; $(CC) -v 2>&1) | grep -q -i 'e2k' && echo yes || echo no))
TARGET_ARCH_ia32 = $(shell (export LC_ALL=C; ($(CC) --version 2>&1; $(CC) -v 2>&1) | grep -q -i -e '^Target: \(x86_64\)\|\([iI][3-6]86\)-.*' && echo yes || echo no))

OBJ_LIST := t1ha0.o t1ha1.o t1ha2.o
BENCH_EXTRA := bench.o mera.o test.o 4bench_xxhash.o
ifeq ($(TARGET_ARCH_e2k),yes)
TARGET_ARCH := e2k
CFLAGS += -mtune=native
OBJ_LIST += t1ha0_aes_noavx.o t1ha0_aes_avx.o
BENCH_EXTRA += 4bench_t1ha0_aes_noavx.o 4bench_t1ha0_aes_avx.o
else ifeq ($(TARGET_ARCH_ia32),yes)
TARGET_ARCH := ia32
CFLAGS += -mtune=native
OBJ_LIST += t1ha0_aes_noavx.o t1ha0_aes_avx.o t1ha0_aes_avx2.o
BENCH_EXTRA += 4bench_t1ha0_aes_noavx.o 4bench_t1ha0_aes_avx.o 4bench_t1ha0_aes_avx2.o
else
TARGET_ARCH := portable
endif

CFLAGS_TEST ?= -Wextra -Werror -O -g $(CFLAGS)
CFLAGS_LIB ?= -Wall -ffunction-sections -O3 -fPIC -g $(CFLAGS) -fvisibility=hidden -Dt1ha_EXPORTS
CFLAGS_TEST ?= -Wextra -Werror $(CFLAGS)
CFLAGS_LIB ?= -Wall -ffunction-sections -fPIC $(CFLAGS) -fvisibility=hidden -Dt1ha_EXPORTS

all: test libt1ha.a libt1ha.so

clean:
rm -f test test32 test64 *.i *.bc *.s *.o *.a *.so

t1ha0.o: t1ha.h src/t1ha_bits.h src/t1ha0.c Makefile
$(CC) $(CFLAGS_LIB) -c -o $@ src/t1ha0.c

t1ha0_aes_noavx.o_ARCH_ia32_CFLAGS = -mno-avx2 -mno-avx -maes
t1ha0_aes_noavx.o: t1ha.h src/t1ha_bits.h src/t1ha0_ia32aes_a.h src/t1ha0_ia32aes_b.h src/t1ha0_ia32aes_noavx.c Makefile
$(CC) $(CFLAGS_LIB) -save-temps -mno-avx2 -mno-avx -maes -c -o $@ src/t1ha0_ia32aes_noavx.c
$(CC) $(CFLAGS_LIB) -save-temps $($(@)_ARCH_$(TARGET_ARCH)_CFLAGS) -c -o $@ src/t1ha0_ia32aes_noavx.c

t1ha0_aes_avx.o_ARCH_ia32_CFLAGS = -mno-avx2 -mavx -maes
t1ha0_aes_avx.o: t1ha.h src/t1ha_bits.h src/t1ha0_ia32aes_a.h src/t1ha0_ia32aes_b.h src/t1ha0_ia32aes_avx.c Makefile
$(CC) $(CFLAGS_LIB) -save-temps -mno-avx2 -mavx -maes -c -o $@ src/t1ha0_ia32aes_avx.c
$(CC) $(CFLAGS_LIB) -save-temps $($(@)_ARCH_$(TARGET_ARCH)_CFLAGS) -c -o $@ src/t1ha0_ia32aes_avx.c

t1ha0_aes_avx2.o_ARCH_ia32_CFLAGS = -mavx2 -mavx -maes
t1ha0_aes_avx2.o: t1ha.h src/t1ha_bits.h src/t1ha0_ia32aes_a.h src/t1ha0_ia32aes_b.h src/t1ha0_ia32aes_avx2.c Makefile
$(CC) $(CFLAGS_LIB) -save-temps -mavx2 -mavx -maes -c -o $@ src/t1ha0_ia32aes_avx2.c
$(CC) $(CFLAGS_LIB) -save-temps $($(@)_ARCH_$(TARGET_ARCH)_CFLAGS) -c -o $@ src/t1ha0_ia32aes_avx2.c

4bench_t1ha0_aes_noavx.o_ARCH_ia32_CFLAGS = -mno-avx2 -mno-avx -maes
4bench_t1ha0_aes_noavx.o: t1ha.h src/t1ha_bits.h src/t1ha0_ia32aes_a.h src/t1ha0_ia32aes_b.h tests/4bench_t1ha0_ia32aes_noavx.c Makefile
$(CC) $(CFLAGS_LIB) -mno-avx2 -mno-avx -maes -c -o $@ tests/4bench_t1ha0_ia32aes_noavx.c
$(CC) $(CFLAGS_LIB) $($(@)_ARCH_$(TARGET_ARCH)_CFLAGS) -c -o $@ tests/4bench_t1ha0_ia32aes_noavx.c

4bench_t1ha0_aes_avx.o_ARCH_ia32_CFLAGS = -mno-avx2 -mavx -maes
4bench_t1ha0_aes_avx.o: t1ha.h src/t1ha_bits.h src/t1ha0_ia32aes_a.h src/t1ha0_ia32aes_b.h tests/4bench_t1ha0_ia32aes_avx.c Makefile
$(CC) $(CFLAGS_LIB) -mno-avx2 -mavx -maes -c -o $@ tests/4bench_t1ha0_ia32aes_avx.c
$(CC) $(CFLAGS_LIB) $($(@)_ARCH_$(TARGET_ARCH)_CFLAGS) -c -o $@ tests/4bench_t1ha0_ia32aes_avx.c

4bench_t1ha0_aes_avx2.o_ARCH_ia32_CFLAGS = -mavx2 -mavx -maes
4bench_t1ha0_aes_avx2.o: t1ha.h src/t1ha_bits.h src/t1ha0_ia32aes_a.h src/t1ha0_ia32aes_b.h tests/4bench_t1ha0_ia32aes_avx2.c Makefile
$(CC) $(CFLAGS_LIB) -mavx2 -mavx -maes -c -o $@ tests/4bench_t1ha0_ia32aes_avx2.c
$(CC) $(CFLAGS_LIB) $($(@)_ARCH_$(TARGET_ARCH)_CFLAGS) -c -o $@ tests/4bench_t1ha0_ia32aes_avx2.c

t1ha1.o: t1ha.h src/t1ha_bits.h src/t1ha1.c Makefile
$(CC) $(CFLAGS_LIB) -c -o $@ src/t1ha1.c

libt1ha.a: $(OBJ_LIST) test Makefile
t1ha2.o: t1ha.h src/t1ha_bits.h src/t1ha2.c Makefile
$(CC) $(CFLAGS_LIB) -c -o $@ src/t1ha2.c

libt1ha.a: $(OBJ_LIST) Makefile
$(AR) rs $@ $(OBJ_LIST)

libt1ha.so: $(OBJ_LIST) test Makefile
libt1ha.so: $(OBJ_LIST) Makefile
$(CC) $(CFLAGS) -shared -s -o $@ $(OBJ_LIST)

test: $(OBJ_LIST) $(BENCH_EXTRA) tests/main.c Makefile
@echo "Target-ARCHx86: $(TARGET_ARCHx86)" || true
###############################################################################

mera.o: t1ha.h tests/mera.h tests/mera.c \
Makefile
$(CC) $(CFLAGS_TEST) -save-temps -c -o $@ tests/mera.c

bench.o: t1ha.h tests/common.h tests/mera.h tests/bench.c \
Makefile
$(CC) $(CFLAGS_TEST) -c -o $@ tests/bench.c

test.o: t1ha.h tests/common.h tests/mera.h tests/test.c \
Makefile
$(CC) $(CFLAGS_TEST) -c -o $@ tests/test.c

4bench_xxhash.o: tests/xxhash/xxhash.h tests/xxhash/xxhash.c \
Makefile
$(CC) $(CFLAGS_TEST) -Wno-error -c -o $@ tests/xxhash/xxhash.c

test: $(OBJ_LIST) $(BENCH_EXTRA) tests/main.c Makefile \
t1ha.h tests/common.h tests/mera.h \
mera.o bench.o test.o
@echo "Target-ARCH: $(TARGET_ARCH)" || true
$(CC) $(CFLAGS_TEST) -o $@ tests/main.c $(OBJ_LIST) $(BENCH_EXTRA)

check: test
./test || rm -rf libt1ha.a libt1ha.so

clean:
rm -f test test32 test64 *.i *.bc *.s *.o *.a *.so
bench-verbose: test
./test --bench-verbose || rm -rf libt1ha.a libt1ha.so

###############################################################################

# sparc64-linux-gnu-gcc - qemu troubles (sigaction, etc...)
# hppa-linux-gnu-gcc - don't supported by qemu
# hppa64-linux-gnu-gcc - gcc unable to cross-compiler
# s390x-linux-gnu-gcc - qemu troubles (hang)

CROSS_LIST = sh4-linux-gnu-gcc alpha-linux-gnu-gcc \
powerpc64-linux-gnu-gcc powerpc-linux-gnu-gcc \
mips64-linux-gnuabi64-gcc mips-linux-gnu-gcc \
arm-linux-gnueabihf-gcc aarch64-linux-gnu-gcc

cross-gcc:
for CC in $(CROSS_LIST); do make clean && CC=$$CC make all || exit $$?; done

cross-qemu:
for CC in $(CROSS_LIST); do make clean && CC=$$CC CFLAGS_TEST="-std=c99 -static" make bench-verbose || exit $$?; done
104 changes: 87 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@ Fast Positive Hash, aka "Позитивный Хэш"
by [Positive Technologies](https://www.ptsecurity.com).

*The Future will Positive. Всё будет хорошо.*
[![Build Status](https://travis-ci.org/leo-yuriev/t1ha.svg?branch=master)](https://travis-ci.org/leo-yuriev/t1ha)
[![Build status](https://ci.appveyor.com/api/projects/status/ptug5fl2ouxdo68h/branch/master?svg=true)](https://ci.appveyor.com/project/leo-yuriev/t1ha/branch/master)
[![CircleCI](https://circleci.com/gh/leo-yuriev/t1ha/tree/master.svg?style=svg)](https://circleci.com/gh/leo-yuriev/t1ha/tree/master)
[![Build Status](https://travis-ci.org/leo-yuriev/t1ha.svg?branch=devel)](https://travis-ci.org/leo-yuriev/t1ha)
[![Build status](https://ci.appveyor.com/api/projects/status/ptug5fl2ouxdo68h/branch/devel?svg=true)](https://ci.appveyor.com/project/leo-yuriev/t1ha/branch/devel)
[![CircleCI](https://circleci.com/gh/leo-yuriev/t1ha/tree/devel.svg?style=svg)](https://circleci.com/gh/leo-yuriev/t1ha/tree/devel)
[![Coverity Scan Status](https://scan.coverity.com/projects/12918/badge.svg)](https://scan.coverity.com/projects/leo-yuriev-t1ha)

## Briefly, it is a portable 64-bit hash function:
1. Intended for 64-bit little-endian platforms, predominantly for x86_64,
1. Intended for 64-bit little-endian platforms, predominantly for Elbrus and x86_64,
but portable and without penalties it can run on any 64-bit CPU.
2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash
2. In most cases up to 15% faster than City, xxHash, mum-hash, metro-hash, etc.
and all others portable hash-functions (which do not use specific hardware tricks).
3. Currently not suitable for cryptography.
3. Provides a set of _terraced_ hash functions.
4. Currently not suitable for cryptography.
5. Licensed under [Zlib License](https://en.wikipedia.org/wiki/Zlib_License).

Also pay attention to [Erlang](https://github.com/lemenkov/erlang-t1ha)
and [Golang](https://github.com/dgryski/go-t1ha) implementations.
Expand All @@ -24,9 +26,11 @@ and [Golang](https://github.com/dgryski/go-t1ha) implementations.
# Usage
The `t1ha` library provides several terraced hash functions
with the dissimilar properties and for a different cases.

These functions briefly described below, see [t1ha.h](t1ha.h) for more API details.

To use in your own project you may link with the t1ha-library,
or just add to your project corresponding source files from `/src` directory.

Please, feel free to fill an issue or make pull request.


Expand All @@ -48,8 +52,8 @@ Please, feel free to fill an issue or make pull request.

Also should be noted, the quality of t1ha0() hashing is a subject
for tradeoffs with performance. Therefore the quality and strength
of t1ha0() may be lower than t1ha1(), especially on 32-bit targets,
but then much faster.
of `t1ha0()` may be lower than `t1ha1()` and `t1ha2()`,
especially on 32-bit targets, but then much faster.
However, guaranteed that it passes all SMHasher tests.

Internally t1ha0() selects most faster implementation for current CPU,
Expand All @@ -66,29 +70,44 @@ Please, feel free to fill an issue or make pull request.
| `t1ha1_be()` | 64-bit big-endian |


`t1ha1` = 64 bits, fast portable hash
`t1ha1` = 64 bits, baseline fast portable hash
-------------------------------------

The main generic version of "Fast Positive Hash" with reasonable quality
The first version of "Fast Positive Hash" with reasonable quality
for checksum, hash tables and thin fingerprinting. It is stable, e.g.
returns same result on all architectures and CPUs.

1. Speed with the reasonable quality of hashing.
2. Efficiency on modern 64-bit CPUs, but not in a hardware.
3. Strong as possible, until no penalties on performance.

The main version is intended for little-endian systems and will run
Unfortunatelly, [Yves Orton](https://github.com/demerphq/smhasher) discovered
that `t1ha1()` fails the strict avalanche criteria in some cases.
This flaw is insignificant for the `t1ha1()` purposes and imperceptible
from a practical point of view.
However, nowadays this issue has resolved in the next `t1ha2()` function,
that was initially planned to providing a bit more quality.

The basic version of 't1ha1()' intends for little-endian systems and will run
slowly on big-endian. Therefore a dedicated big-endian version is also
provided, but returns the different result than the main version.
provided, but returns the different result than the basic version.


`t1ha2` = 64 bits, little more attention for quality and strength
`t1ha2` = 64 and 128 bits, slightly more attention for quality and strength
-----------------------------------------------------------------
The next-step version of "Fast Positive Hash",
but not yet finished and therefore not available.
The recommended version of "Fast Positive Hash" with good quality
for checksum, hash tables and fingerprinting. It is stable, e.g.
returns same result on all architectures and CPUs.

1. Portable and extremely efficiency on modern 64-bit CPUs.
2. Great quality of hashing and still faster than other non-t1ha hashes.
3. Provides streaming mode and 128-bit result.

`t1ha3` = 128 bits, fast non-cryptographic fingerprinting
The `t1ha2()` is intended for little-endian systems and will run
slightly slowly on big-endian systems.


`t1ha3` = 128 and 256 bits, fast non-cryptographic fingerprinting
---------------------------------------------------------
The next-step version of "Fast Positive Hash",
but not yet finished and therefore not available.
Expand Down Expand Up @@ -124,6 +143,57 @@ for _The 1Hippeus project - zerocopy messaging in the spirit of Sparta!_
********************************************************************************

## Benchmarking and Testing

Current version of t1ha library includes tool for basic testing and benchmarking.
Just try `make check` from t1ha directory.

To comparison benchmark also includes 32- and 64-bit versions of `xxhash()` function.
For example:
```
$ CC=clang-5.0 make all && sudo make check
...
Preparing to benchmarking...
- suggest enable rdpmc for usermode (echo 2 | sudo tee /sys/devices/cpu/rdpmc)
- running on CPU#3
- use RDPMC_perf as clock source for benchmarking
- assume it cheap and stable
- measure granularity and overhead: 53 cycle, 0.0188679 iteration/cycle
Bench for tiny keys (5 bytes):
t1ha2_atonce : 13.070 cycle/hash, 2.614 cycle/byte, 0.383 byte/cycle, 1.148 Gb/s @3GHz
t1ha1_64le : 14.055 cycle/hash, 2.811 cycle/byte, 0.356 byte/cycle, 1.067 Gb/s @3GHz
t1ha0 : 14.070 cycle/hash, 2.814 cycle/byte, 0.355 byte/cycle, 1.066 Gb/s @3GHz
xxhash64 : 17.203 cycle/hash, 3.441 cycle/byte, 0.291 byte/cycle, 0.872 Gb/s @3GHz
Bench for medium keys (1024 bytes):
t1ha2_atonce : 266.500 cycle/hash, 0.260 cycle/byte, 3.842 byte/cycle, 11.527 Gb/s @3GHz
t1ha1_64le : 245.750 cycle/hash, 0.240 cycle/byte, 4.167 byte/cycle, 12.501 Gb/s @3GHz
t1ha0 : 86.625 cycle/hash, 0.085 cycle/byte, 11.821 byte/cycle, 35.463 Gb/s @3GHz
xxhash64 : 283.000 cycle/hash, 0.276 cycle/byte, 3.618 byte/cycle, 10.855 Gb/s @3GHz
```

The `test` tool support a set of command line options to selecting functions and size of keys for benchmarking.
For more info please run `./test --help`.


One noteable option is `--hash-stdin-strings`, it intended to estimate hash collisions on your custom data.
With this option `test` tool will hash each line from standard input and print its hash to standard output.

For instance, you could count collisions for lines from some `words.list` file by bash's command:
```
./t1ha/test --hash-stdin-strings < words.list | sort | uniq -c -d | wc -l
```

More complex example - count `xxhash()` collisions for lines from `words.list` and 0...10000 numbers,
with distinction only in 32 bit of hash values:
```
(cat words.list && seq 0 10000) | \
./t1ha/test --xxhash --hash-stdin-strings | \
cut --bytes=-8 | sort | uniq -c -d | wc -l
```


### SMHasher
[_SMHasher_](https://github.com/aappleby/smhasher/wiki) is a wellknown
test suite designed to test the distribution, collision,
and performance properties of non-cryptographic hash functions.
Expand Down
Loading

0 comments on commit bad8ebd

Please sign in to comment.