From 9a89fb1c9dfeda4640780111f9e9437f08cfad88 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 12 Oct 2020 10:02:37 -0300 Subject: [PATCH] Hash always use all characters in a long string Hashes for long strings are computed only when they are used as keys in a table, not a too common case. And, in that case, it is to easy to force collisions changing only the characters which are not part of the hash. --- lstate.c | 2 +- lstring.c | 20 ++++---------------- lstring.h | 3 +-- ltests.c | 1 - 4 files changed, 6 insertions(+), 20 deletions(-) diff --git a/lstate.c b/lstate.c index 76df6a20d..422742924 100644 --- a/lstate.c +++ b/lstate.c @@ -76,7 +76,7 @@ static unsigned int luai_makeseed (lua_State *L) { addbuff(buff, p, &h); /* local variable */ addbuff(buff, p, &lua_newstate); /* public function */ lua_assert(p == sizeof(buff)); - return luaS_hash(buff, p, h, 1); + return luaS_hash(buff, p, h); } #endif diff --git a/lstring.c b/lstring.c index 6f1574731..138871c70 100644 --- a/lstring.c +++ b/lstring.c @@ -22,16 +22,6 @@ #include "lstring.h" -/* -** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a long string to -** compute its hash -*/ -#if !defined(LUAI_HASHLIMIT) -#define LUAI_HASHLIMIT 5 -#endif - - - /* ** Maximum size for string table. */ @@ -50,10 +40,9 @@ int luaS_eqlngstr (TString *a, TString *b) { } -unsigned int luaS_hash (const char *str, size_t l, unsigned int seed, - size_t step) { +unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) { unsigned int h = seed ^ cast_uint(l); - for (; l >= step; l -= step) + for (; l > 0; l--) h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1])); return h; } @@ -63,8 +52,7 @@ unsigned int luaS_hashlongstr (TString *ts) { lua_assert(ts->tt == LUA_VLNGSTR); if (ts->extra == 0) { /* no hash? */ size_t len = ts->u.lnglen; - size_t step = (len >> LUAI_HASHLIMIT) + 1; - ts->hash = luaS_hash(getstr(ts), len, ts->hash, step); + ts->hash = luaS_hash(getstr(ts), len, ts->hash); ts->extra = 1; /* now it has its hash */ } return ts->hash; @@ -201,7 +189,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) { TString *ts; global_State *g = G(L); stringtable *tb = &g->strt; - unsigned int h = luaS_hash(str, l, g->seed, 1); + unsigned int h = luaS_hash(str, l, g->seed); TString **list = &tb->hash[lmod(h, tb->size)]; lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */ for (ts = *list; ts != NULL; ts = ts->u.hnext) { diff --git a/lstring.h b/lstring.h index a413a9d3a..450c2390d 100644 --- a/lstring.h +++ b/lstring.h @@ -41,8 +41,7 @@ #define eqshrstr(a,b) check_exp((a)->tt == LUA_VSHRSTR, (a) == (b)) -LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, - unsigned int seed, size_t step); +LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed); LUAI_FUNC unsigned int luaS_hashlongstr (TString *ts); LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b); LUAI_FUNC void luaS_resize (lua_State *L, int newsize); diff --git a/ltests.c b/ltests.c index 994561599..7e3a389a7 100644 --- a/ltests.c +++ b/ltests.c @@ -523,7 +523,6 @@ static lu_mem checkgraylist (global_State *g, GCObject *o) { ((void)g); /* better to keep it available if we need to print an object */ while (o) { lua_assert(!!isgray(o) ^ (getage(o) == G_TOUCHED2)); - //lua_assert(isgray(o) || getage(o) == G_TOUCHED2); lua_assert(!testbit(o->marked, TESTBIT)); if (keepinvariant(g)) l_setbit(o->marked, TESTBIT); /* mark that object is in a gray list */