Yzena
/
Yc
2
1
Fork 0

Add back SpookyHash and the countwords map test

I decided to add them back because using a public domain hashing
function should be pretty safe, and the countwords test is now mostly my
own code, so I don't think it will be a problem.

Signed-off-by: Gavin Howard <gavin@yzena.com>
afl_persistent_mode
Gavin Howard 11 months ago
parent 922e1374dc
commit 0f7d167c83
Signed by: gavin
GPG Key ID: C08038BDF280D33E
  1. 34
      NOTICE.md
  2. 29
      src/hash/CMakeLists.txt
  3. 920
      src/hash/hash.c
  4. 114
      src/hash/hash.h
  5. 37
      tests/hash/CMakeLists.txt
  6. 78
      tests/hash/hash_alignment.c
  7. 136
      tests/hash/hash_deltas.c
  8. 106
      tests/hash/hash_pieces.c
  9. 170
      tests/hash/hash_results.c
  10. 88
      tests/hash/hash_tests.c
  11. 64
      tests/hash/hash_tests.h
  12. 336
      tests/map/map_countwords.c

@ -1,3 +1,37 @@
# Notice
Copyright (c) 2017-2021 Yzena Tech.
With the exception of the files listed below, all files in this repository are
under the [Yzena Network License, Version 0.1][1], and all such files are
considered part of the "source code" as defined by that license.
[1]: https://yzena.com/yzena-network-license/
## `tests/map/map_countwords.c`
The file `tests/map/map_countwords.c` is under the following license and
copyright:
> MIT License
>
> Copyright (c) 2021 Ben Hoyt
> Copyright (c) 2021 Yzena Tech
>
> Permission is hereby granted, free of charge, to any person obtaining a copy
> of this software and associated documentation files (the "Software"), to deal
> in the Software without restriction, including without limitation the rights
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> copies of the Software, and to permit persons to whom the Software is
> furnished to do so, subject to the following conditions:
>
> The above copyright notice and this permission notice shall be included in
> all copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> SOFTWARE.

@ -0,0 +1,29 @@
# ***** BEGIN LICENSE BLOCK *****
#
# Copyright 2017-2020 Yzena Tech
#
# Licensed under the Yzena Network License, Version 0.1 (the "Yzena Network
# License" or "YNL"). You may not use this file except in compliance with the
# Yzena Network License.
#
# You may obtain a copy of the Yzena Network License at
#
# https://yzena.com/yzena-network-license/
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Yzena Network License is distributed under the
# following disclaimer:
#
# As far as the law allows, this software comes as is, without any
# warranty or condition, and no contributor will be liable to anyone for
# any damages related to this software or this license, under any kind of
# legal claim.
#
# ****** END LICENSE BLOCK ******
set(YC_HASH_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/hash.c"
)
set(YC_HASH_SRC "${YC_HASH_SRC}" PARENT_SCOPE)

@ -0,0 +1,920 @@
/*
* ***** BEGIN LICENSE BLOCK *****
*
* Copyright 2017-2020 Yzena Tech
*
* Licensed under the Yzena Network License, Version 0.1 (the "Yzena Network
* License" or "YNL"). You may not use this file except in compliance with the
* Yzena Network License.
*
* You may obtain a copy of the Yzena Network License at
*
* https://yzena.com/yzena-network-license/
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the Yzena Network License is distributed under the
* following disclaimer:
*
* As far as the law allows, this software comes as is, without any
* warranty or condition, and no contributor will be liable to anyone for
* any damages related to this software or this license, under any kind of
* legal claim.
*
* ****** END LICENSE BLOCK ******
*
* *****************************************************************
*
* ******* BEGIN FILE DESCRIPTION *******
*
* Source file for the general hash function that Yc provides. The hash
* function is an almost exact reproduction of Bob Jenkin's SpookyHash
* (http://burtleburtle.net/bob/hash/spooky.html), which is in the public
* domain.
*
* ******** END FILE DESCRIPTION ********
*/
#include <yc/assert.h>
#include <yc/hash.h>
#include <yc/yc.h>
#include "hash.h"
#include "../yc.h"
#include <stdlib.h>
#include <string.h>
/**
* Corresponds to SpookyHash::Rot64().
*
* Left rotate 64-bit x by k bytes.
* @param x 64-bit value to rotate.
* @param k Number of bytes to rotate.
* @return The rotated 64-bit value.
*/
static uint64_t
yhash_rot64(uint64_t x, int k) yinline
{
return (x << k) | (x >> (-k & 63));
}
/**
* Corresponds to SpookyHash::Mix().
*
* This is used if the input is 96 bytes long or longer.
*
* The internal state is fully overwritten every 96 bytes.
* Every input bit appears to cause at least 128 bits of entropy
* before 96 other bytes are combined, when run forward or backward
* * For every input bit,
* * Two inputs differing in just that input bit
* * Where "differ" means xor or subtraction
* * And the base value is random
* * When run forward or backwards one yhash_mix().
* I (Bob Jenkins) tried 3 pairs of each; they all differed by at
* least 212 bits.
* @param data Twelve uint64_t's that will be mixed.
* @param s0 One of twelve vars that help the mix.
* @param s1 One of twelve vars that help the mix.
* @param s2 One of twelve vars that help the mix.
* @param s3 One of twelve vars that help the mix.
* @param s4 One of twelve vars that help the mix.
* @param s5 One of twelve vars that help the mix.
* @param s6 One of twelve vars that help the mix.
* @param s7 One of twelve vars that help the mix.
* @param s8 One of twelve vars that help the mix.
* @param s9 One of twelve vars that help the mix.
* @param s10 One of twelve vars that help the mix.
* @param s11 One of twelve vars that help the mix.
* @pre All pointer parameters must not be NULL.
*/
yallnonnull
static void
yhash_mix(const uint64_t* data, uint64_t* s0, uint64_t* s1, uint64_t* s2,
uint64_t* s3, uint64_t* s4, uint64_t* s5, uint64_t* s6, uint64_t* s7,
uint64_t* s8, uint64_t* s9, uint64_t* s10,
uint64_t* s11) yinline
{
*s0 += data[0];
*s2 ^= *s10;
*s11 ^= *s0;
*s0 = yhash_rot64(*s0, 11);
*s11 += *s1;
*s1 += data[1];
*s3 ^= *s11;
*s0 ^= *s1;
*s1 = yhash_rot64(*s1, 32);
*s0 += *s2;
fflush(stdout);
*s2 += data[2];
*s4 ^= *s0;
*s1 ^= *s2;
*s2 = yhash_rot64(*s2, 43);
*s1 += *s3;
*s3 += data[3];
*s5 ^= *s1;
*s2 ^= *s3;
*s3 = yhash_rot64(*s3, 31);
*s2 += *s4;
*s4 += data[4];
*s6 ^= *s2;
*s3 ^= *s4;
*s4 = yhash_rot64(*s4, 17);
*s3 += *s5;
*s5 += data[5];
*s7 ^= *s3;
*s4 ^= *s5;
*s5 = yhash_rot64(*s5, 28);
*s4 += *s6;
*s6 += data[6];
*s8 ^= *s4;
*s5 ^= *s6;
*s6 = yhash_rot64(*s6, 39);
*s5 += *s7;
*s7 += data[7];
*s9 ^= *s5;
*s6 ^= *s7;
*s7 = yhash_rot64(*s7, 57);
*s6 += *s8;
*s8 += data[8];
*s10 ^= *s6;
*s7 ^= *s8;
*s8 = yhash_rot64(*s8, 55);
*s7 += *s9;
*s9 += data[9];
*s11 ^= *s7;
*s8 ^= *s9;
*s9 = yhash_rot64(*s9, 54);
*s8 += *s10;
*s10 += data[10];
*s0 ^= *s8;
*s9 ^= *s10;
*s10 = yhash_rot64(*s10, 22);
*s9 += *s11;
*s11 += data[11];
*s1 ^= *s9;
*s10 ^= *s11;
*s11 = yhash_rot64(*s11, 46);
*s10 += *s0;
}
/**
* Corresponds to SpookyHash::EndPartial().
*
* One iteration for yhash_end().
* @param h0 One of 12 variables to mix.
* @param h1 One of 12 variables to mix.
* @param h2 One of 12 variables to mix.
* @param h3 One of 12 variables to mix.
* @param h4 One of 12 variables to mix.
* @param h5 One of 12 variables to mix.
* @param h6 One of 12 variables to mix.
* @param h7 One of 12 variables to mix.
* @param h8 One of 12 variables to mix.
* @param h9 One of 12 variables to mix.
* @param h10 One of 12 variables to mix.
* @param h11 One of 12 variables to mix.
* @pre All pointer parameters must not be NULL.
*/
yallnonnull
static void
yhash_endPartial(uint64_t* h0, uint64_t* h1, uint64_t* h2, uint64_t* h3,
uint64_t* h4, uint64_t* h5, uint64_t* h6, uint64_t* h7,
uint64_t* h8, uint64_t* h9, uint64_t* h10,
uint64_t* h11) yinline
{
*h11 += *h1;
*h2 ^= *h11;
*h1 = yhash_rot64(*h1, 44);
*h0 += *h2;
*h3 ^= *h0;
*h2 = yhash_rot64(*h2, 15);
*h1 += *h3;
*h4 ^= *h1;
*h3 = yhash_rot64(*h3, 34);
*h2 += *h4;
*h5 ^= *h2;
*h4 = yhash_rot64(*h4, 21);
*h3 += *h5;
*h6 ^= *h3;
*h5 = yhash_rot64(*h5, 38);
*h4 += *h6;
*h7 ^= *h4;
*h6 = yhash_rot64(*h6, 33);
*h5 += *h7;
*h8 ^= *h5;
*h7 = yhash_rot64(*h7, 10);
*h6 += *h8;
*h9 ^= *h6;
*h8 = yhash_rot64(*h8, 13);
*h7 += *h9;
*h10 ^= *h7;
*h9 = yhash_rot64(*h9, 38);
*h8 += *h10;
*h11 ^= *h8;
*h10 = yhash_rot64(*h10, 53);
*h9 += *h11;
*h0 ^= *h9;
*h11 = yhash_rot64(*h11, 42);
*h10 += *h0;
*h1 ^= *h10;
*h0 = yhash_rot64(*h0, 54);
}
/**
* Corresponds to SpookyHash::End().
*
* Mix all 12 inputs together so that h0, h1 are a hash of them all.
*
* For two inputs differing in just the input bits
* Where "differ" means xor or subtraction
* And the base value is random, or a counting value starting at that bit
* The final result will have each bit of h0, h1 flip
* For every input bit,
* with probability 50 +- .3%
* For every pair of input bits,
* with probability 50 +- 3%
*
* This does not rely on the last yhash_mix() call having already mixed some.
* Two iterations was almost good enough for a 64-bit result, but a
* 128-bit result is reported, so yhash_end() does three iterations.
* @param data Twelve uint64_t's to mix.
* @param h0 One of 12 vars to help mix.
* @param h1 One of 12 vars to help mix.
* @param h2 One of 12 vars to help mix.
* @param h3 One of 12 vars to help mix.
* @param h4 One of 12 vars to help mix.
* @param h5 One of 12 vars to help mix.
* @param h6 One of 12 vars to help mix.
* @param h7 One of 12 vars to help mix.
* @param h8 One of 12 vars to help mix.
* @param h9 One of 12 vars to help mix.
* @param h10 One of 12 vars to help mix.
* @param h11 One of 12 vars to help mix.
* @pre All pointer parameters must not be NULL.
*/
yallnonnull
static void
yhash_end(const uint64_t* data, uint64_t* h0, uint64_t* h1, uint64_t* h2,
uint64_t* h3, uint64_t* h4, uint64_t* h5, uint64_t* h6, uint64_t* h7,
uint64_t* h8, uint64_t* h9, uint64_t* h10,
uint64_t* h11) yinline
{
*h0 += data[0];
*h1 += data[1];
*h2 += data[2];
*h3 += data[3];
*h4 += data[4];
*h5 += data[5];
*h6 += data[6];
*h7 += data[7];
*h8 += data[8];
*h9 += data[9];
*h10 += data[10];
*h11 += data[11];
yhash_endPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
yhash_endPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
yhash_endPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
}
/**
* Corresponds to SpookyHash::ShortMix().
*
* The goal is for each bit of the input to expand into 128 bits of
* apparent entropy before it is fully overwritten.
* n trials both set and cleared at least m bits of h0 h1 h2 h3
* n: 2 m: 29
* n: 3 m: 46
* n: 4 m: 57
* n: 5 m: 107
* n: 6 m: 146
* n: 7 m: 152
* when run forwards or backwards
* for all 1-bit and 2-bit diffs
* with diffs defined by either xor or subtraction
* with a base of all zeros plus a counter, or plus another bit, or random
* @param h0 One of four vars to mix.
* @param h1 One of four vars to mix.
* @param h2 One of four vars to mix.
* @param h3 One of four vars to mix.
* @pre All pointer parameters must not be NULL.
*/
yallnonnull
static void
yhash_shortMix(uint64_t* h0, uint64_t* h1, uint64_t* h2,
uint64_t* h3) yinline
{
*h2 = yhash_rot64(*h2, 50);
*h2 += *h3;
*h0 ^= *h2;
*h3 = yhash_rot64(*h3, 52);
*h3 += *h0;
*h1 ^= *h3;
*h0 = yhash_rot64(*h0, 30);
*h0 += *h1;
*h2 ^= *h0;
*h1 = yhash_rot64(*h1, 41);
*h1 += *h2;
*h3 ^= *h1;
*h2 = yhash_rot64(*h2, 54);
*h2 += *h3;
*h0 ^= *h2;
*h3 = yhash_rot64(*h3, 48);
*h3 += *h0;
*h1 ^= *h3;
*h0 = yhash_rot64(*h0, 38);
*h0 += *h1;
*h2 ^= *h0;
*h1 = yhash_rot64(*h1, 37);
*h1 += *h2;
*h3 ^= *h1;
*h2 = yhash_rot64(*h2, 62);
*h2 += *h3;
*h0 ^= *h2;
*h3 = yhash_rot64(*h3, 34);
*h3 += *h0;
*h1 ^= *h3;
*h0 = yhash_rot64(*h0, 5);
*h0 += *h1;
*h2 ^= *h0;
*h1 = yhash_rot64(*h1, 36);
*h1 += *h2;
*h3 ^= *h1;
}
/**
* Corresponds to SpookyHash::ShortEnd().
*
* Mix all 4 inputs together so that h0, h1 are a hash of them all.
*
* For two inputs differing in just the input bits
* Where "differ" means xor or subtraction
* And the base value is random, or a counting value starting at that bit
* The final result will have each bit of h0, h1 flip
* For every input bit,
* with probability 50 +- .3% (it is probably better than that)
* For every pair of input bits,
* with probability 50 +- .75% (the worst case is approximately that)
* @param h0 One of four vars to mix.
* @param h1 One of four vars to mix.
* @param h2 One of four vars to mix.
* @param h3 One of four vars to mix.
* @pre All pointer parameters must not be NULL.
*/
yallnonnull
static void
yhash_shortEnd(uint64_t* h0, uint64_t* h1, uint64_t* h2,
uint64_t* h3) yinline
{
*h3 ^= *h2;
*h2 = yhash_rot64(*h2, 15);
*h3 += *h2;
*h0 ^= *h3;
*h3 = yhash_rot64(*h3, 52);
*h0 += *h3;
*h1 ^= *h0;
*h0 = yhash_rot64(*h0, 26);
*h1 += *h0;
*h2 ^= *h1;
*h1 = yhash_rot64(*h1, 51);
*h2 += *h1;
*h3 ^= *h2;
*h2 = yhash_rot64(*h2, 28);
*h3 += *h2;
*h0 ^= *h3;
*h3 = yhash_rot64(*h3, 9);
*h0 += *h3;
*h1 ^= *h0;
*h0 = yhash_rot64(*h0, 47);
*h1 += *h0;
*h2 ^= *h1;
*h1 = yhash_rot64(*h1, 54);
*h2 += *h1;
*h3 ^= *h2;
*h2 = yhash_rot64(*h2, 32);
*h3 += *h2;
*h0 ^= *h3;
*h3 = yhash_rot64(*h3, 25);
*h0 += *h3;
*h1 ^= *h0;
*h0 = yhash_rot64(*h0, 63);
*h1 += *h0;
}
/**
* Corresponds to SpookyHash::Short().
*
* Short hash. It could be used on any message, but it's used by
* Spooky just for short messages.
* @param message The message to hash.
* @param length The length of the message to hash.
* @param hash1 A pointer that will be given the first 64 bits
* of the 128-bit hash.
* @param hash2 A pointer that will be given the second 64 bits
* of the 128-bit hash.
* @pre All pointer parameters must not be NULL.
*/
yallnonnull
static void
yhash_short(const void* message, size_t length, uint64_t* hash1,
uint64_t* hash2)
{
uint64_t buf[2 * YHASH_NUM_VARS];
union
{
const uint8_t* p8;
uint32_t* p32;
uint64_t* p64;
size_t i;
} u;
u.p8 = (const uint8_t*) message;
if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
{
memcpy(buf, message, length);
u.p64 = buf;
}
size_t remainder = length % 32;
uint64_t a = *hash1;
uint64_t b = *hash2;
uint64_t c = YHASH_CONST;
uint64_t d = YHASH_CONST;
if (length > 15)
{
const uint64_t* end = u.p64 + (length / 32) * 4;
// Handle all complete sets of 32 bytes.
for (; u.p64 < end; u.p64 += 4)
{
c += u.p64[0];
d += u.p64[1];
yhash_shortMix(&a, &b, &c, &d);
a += u.p64[2];
b += u.p64[3];
}
// Handle the case of 16+ remaining bytes.
if (remainder >= 16)
{
c += u.p64[0];
d += u.p64[1];
yhash_shortMix(&a, &b, &c, &d);
u.p64 += 2;
remainder -= 16;
}
}
// Handle the last 0..15 bytes, and its length.
d += ((uint64_t) length) << 56;
switch (remainder)
{
case 15:
{
d += ((uint64_t) u.p8[14]) << 48;
}
// Fallthrough.
yfallthrough;
case 14:
{
d += ((uint64_t) u.p8[13]) << 40;
}
// Fallthrough.
yfallthrough;
case 13:
{
d += ((uint64_t) u.p8[12]) << 32;
}
// Fallthrough.
yfallthrough;
case 12:
{
d += u.p32[2];
c += u.p64[0];
break;
}
case 11:
{
d += ((uint64_t) u.p8[10]) << 16;
}
// Fallthrough.
yfallthrough;
case 10:
{
d += ((uint64_t) u.p8[9]) << 8;
}
// Fallthrough.
yfallthrough;
case 9:
{
d += (uint64_t) u.p8[8];
}
// Fallthrough.
yfallthrough;
case 8:
{
c += u.p64[0];
break;
}
case 7:
{
c += ((uint64_t) u.p8[6]) << 48;
}
// Fallthrough.
yfallthrough;
case 6:
{
c += ((uint64_t) u.p8[5]) << 40;
}
// Fallthrough.
yfallthrough;
case 5:
{
c += ((uint64_t) u.p8[4]) << 32;
}
// Fallthrough.
yfallthrough;
case 4:
{
c += u.p32[0];
break;
}
case 3:
{
c += ((uint64_t) u.p8[2]) << 16;
}
// Fallthrough.
yfallthrough;
case 2:
{
c += ((uint64_t) u.p8[1]) << 8;
}
// Fallthrough.
yfallthrough;
case 1:
{
c += (uint64_t) u.p8[0];
break;
}
case 0:
{
c += YHASH_CONST;
d += YHASH_CONST;
break;
}
}
yhash_shortEnd(&a, &b, &c, &d);
*hash1 = a;
*hash2 = b;
}
void
yhash128(const void* message, size_t length, uint64_t* hash1, uint64_t* hash2)
{
// If length is less than the buffer size,
// use the short hash.
if (length < YHASH_BUFFER_SIZE)
{
yhash_short(message, length, hash1, hash2);
return;
}
uint64_t h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11;
uint64_t buf[YHASH_NUM_VARS];
uint64_t* end;
union
{
const uint8_t* p8;
uint64_t* p64;
size_t i;
} u;
size_t remainder;
h0 = h3 = h6 = h9 = *hash1;
h1 = h4 = h7 = h10 = *hash2;
h2 = h5 = h8 = h11 = YHASH_CONST;
u.p8 = (const uint8_t*) message;
end = u.p64 + (length / YHASH_BLOCK_SIZE) * YHASH_NUM_VARS;
// Handle all whole YHASH_BLOCK_SIZE blocks of bytes.
if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
{
while (u.p64 < end)
{
yhash_mix(u.p64, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9,
&h10, &h11);
u.p64 += YHASH_NUM_VARS;
}
}
else
{
while (u.p64 < end)
{
memcpy(buf, u.p64, YHASH_BLOCK_SIZE);
yhash_mix(buf, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9,
&h10, &h11);
u.p64 += YHASH_NUM_VARS;
}
}
// Handle the last partial block of YHASH_BLOCK_SIZE bytes.
unsigned long long endv = (unsigned long long) (const uint8_t*) end;
remainder =
(length - (endv - ((unsigned long long) (const uint8_t*) message)));
memcpy(buf, end, remainder);
memset(((uint8_t*) buf) + remainder, 0, YHASH_BLOCK_SIZE - remainder);
((uint8_t*) buf)[YHASH_BLOCK_SIZE - 1] = (uint8_t) remainder;
// Do some final mixing.
yhash_end(buf, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10,
&h11);
*hash1 = h0;
*hash2 = h1;
}
uint64_t
yhash64(const void* message, size_t length, uint64_t seed)
{
uint64_t hash1 = seed;
yhash128(message, length, &hash1, &seed);
return hash1;
}
uint32_t
yhash32(const void* message, size_t length, uint32_t seed)
{
uint64_t hash1 = seed;
uint64_t hash2 = seed;
yhash128(message, length, &hash1, &hash2);
return (uint32_t) hash1;
}
YHashState
yhash_init(uint64_t seed1, uint64_t seed2)
{
yhash* h = ycalloc(1, sizeof(yhash));
if (!h) return NULL;
h->length = 0;
h->remainder = 0;
h->state[0] = seed1;
h->state[1] = seed2;
return h;
}
void
yhash_add(YHashState h, const void* msg, size_t len)
{
yc_assert(h, YC_ASSERT_HASH_STATE_NULL);
yc_assert(msg, YC_ASSERT_MSG_NULL);
uint64_t h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11;
size_t newLength = len + h->remainder;
uint8_t remainder;
union
{
const uint8_t* p8;
uint64_t* p64;
size_t i;
} u;
const uint64_t* end;
// Is this message fragment too short? If it is, stuff it away.
if (newLength < YHASH_BUFFER_SIZE)
{
memcpy(&((uint8_t*) h->data)[h->remainder], msg, len);
h->length = len + h->length;
h->remainder = (uint8_t) newLength;
return;
}
// Init the variables.
if (h->length < YHASH_BUFFER_SIZE)
{
h0 = h3 = h6 = h9 = h->state[0];
h1 = h4 = h7 = h10 = h->state[1];
h2 = h5 = h8 = h11 = YHASH_CONST;
}
else
{
h0 = h->state[0];
h1 = h->state[1];
h2 = h->state[2];
h3 = h->state[3];
h4 = h->state[4];
h5 = h->state[5];
h6 = h->state[6];
h7 = h->state[7];
h8 = h->state[8];
h9 = h->state[9];
h10 = h->state[10];
h11 = h->state[11];
}
h->length = len + h->length;
// if we've got anything stuffed away, use it now.
if (h->remainder)
{
uint8_t prefix = YHASH_BUFFER_SIZE - h->remainder;
memcpy(&(((uint8_t*) h->data)[h->remainder]), msg, prefix);
u.p64 = h->data;
yhash_mix(u.p64, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10,
&h11);
yhash_mix(&u.p64[YHASH_NUM_VARS], &h0, &h1, &h2, &h3, &h4, &h5, &h6,
&h7, &h8, &h9, &h10, &h11);
u.p8 = ((const uint8_t*) msg) + prefix;
len -= prefix;
}
else
{
u.p8 = (const uint8_t*) msg;
}
// Handle all whole blocks of YHASH_BLOCK_SIZE bytes.
end = u.p64 + (len / YHASH_BLOCK_SIZE) * YHASH_NUM_VARS;
unsigned long long endv = (unsigned long long) (const uint8_t*) end;
remainder =
(uint8_t)(len - (endv - ((unsigned long long) (const uint8_t*) u.p8)));
if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
{
while (u.p64 < end)
{
yhash_mix(u.p64, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9,
&h10, &h11);
u.p64 += YHASH_NUM_VARS;
}
}
else
{
while (u.p64 < end)
{
memcpy(h->data, u.p8, YHASH_BLOCK_SIZE);
yhash_mix(h->data, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9,
&h10, &h11);
u.p64 += YHASH_NUM_VARS;
}
}
// Stuff away the last few bytes.
h->remainder = remainder;
memcpy(h->data, end, remainder);
// Stuff away the variables.
h->state[0] = h0;
h->state[1] = h1;
h->state[2] = h2;
h->state[3] = h3;
h->state[4] = h4;
h->state[5] = h5;
h->state[6] = h6;
h->state[7] = h7;
h->state[8] = h8;
h->state[9] = h9;
h->state[10] = h10;
h->state[11] = h11;
}
void
yhash_hash(const YHashState h, uint64_t* hash1, uint64_t* hash2)
{
yc_assert(h, YC_ASSERT_HASH_STATE_NULL);
// Do the short if we are small enough.
if (h->length < YHASH_BUFFER_SIZE)
{
*hash1 = h->state[0];
*hash2 = h->state[1];
yhash_short(h->data, h->length, hash1, hash2);
return;
}
uint64_t* data = (uint64_t*) h->data;
uint8_t remainder = h->remainder;
uint64_t h0 = h->state[0];
uint64_t h1 = h->state[1];
uint64_t h2 = h->state[2];
uint64_t h3 = h->state[3];
uint64_t h4 = h->state[4];
uint64_t h5 = h->state[5];
uint64_t h6 = h->state[6];
uint64_t h7 = h->state[7];
uint64_t h8 = h->state[8];
uint64_t h9 = h->state[9];
uint64_t h10 = h->state[10];
uint64_t h11 = h->state[11];
if (remainder >= YHASH_BLOCK_SIZE)
{
// h->data can contain two blocks; handle any whole first block.
yhash_mix(data, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10,
&h11);
data += YHASH_NUM_VARS;
remainder -= YHASH_BLOCK_SIZE;
}
// Mix in the last partial block, and the length mod YHASH_BLOCK_SIZE.
memset(&((uint8_t*) data)[remainder], 0, (YHASH_BLOCK_SIZE - remainder));
((uint8_t*) data)[YHASH_BLOCK_SIZE - 1] = remainder;
// Do some final mixing.
yhash_end(data, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10,
&h11);
*hash1 = h0;
*hash2 = h1;
}
void
yhash_free(YHashState h)
{
yc_assert(h, YC_ASSERT_HASH_STATE_NULL);
free(h);
}

@ -0,0 +1,114 @@
/*
* ***** BEGIN LICENSE BLOCK *****
*
* Copyright 2017-2020 Yzena Tech
*
* Licensed under the Yzena Network License, Version 0.1 (the "Yzena Network
* License" or "YNL"). You may not use this file except in compliance with the
* Yzena Network License.
*
* You may obtain a copy of the Yzena Network License at
*
* https://yzena.com/yzena-network-license/
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the Yzena Network License is distributed under the
* following disclaimer:
*
* As far as the law allows, this software comes as is, without any
* warranty or condition, and no contributor will be liable to anyone for
* any damages related to this software or this license, under any kind of
* legal claim.
*
* ****** END LICENSE BLOCK ******
*
* *****************************************************************
*
* ******* BEGIN FILE DESCRIPTION *******
*
* Non-public header file for the general hash function that Yc provides. The
* hash function is an almost exact reproduction of Bob Jenkin's SpookyHash
* (http://burtleburtle.net/bob/hash/spooky.html), which is in the public
* domain.
*
* ******** END FILE DESCRIPTION ********
*/
#ifndef YC_HASH_PRIVATE_H
#define YC_HASH_PRIVATE_H
/* For C++ compatibility */
#ifdef __cplusplus
extern "C" {
#endif
//! @cond INTERNAL
#include <yc/hash.h>
#include <yc/yc.h>
/**
* @file src/hash/hash.h
*/
/**
* @defgroup hash_internal hash_internal
* Internal functions and data structures for working with hashes.
* @{
*/
/**
* @def YHASH_NUM_VARS
* Number of uint64's in internal state.
*/
#define YHASH_NUM_VARS (12)
/**
* @def YHASH_BLOCK_SIZE
* Size of the internal state.
*/
#define YHASH_BLOCK_SIZE (YHASH_NUM_VARS * 8)
/**
* @def YHASH_BUFFER_SIZE
* Size of buffer of unhashed data, in bytes.
*/
#define YHASH_BUFFER_SIZE (2 * YHASH_BLOCK_SIZE)
/**
* @def ALLOW_UNALIGNED_READS
* Whether unaligned reads are allowed.
*/
#define ALLOW_UNALIGNED_READS (0)
/**
* A data structure that allows a
* user to hash a message in pieces.
*/
typedef struct yhash
{
/// Unhashed data, for partial messages.
uint64_t data[2 * YHASH_NUM_VARS];
/// Internal state of the hash.
uint64_t state[YHASH_NUM_VARS];
/// Total length of the input so far.
size_t length;
/// Length of unhashed data stashed in m_data.
uint8_t remainder;
} yhash;
/**
* @}
*/
//! @endcond INTERNAL
#ifdef __cplusplus
}
#endif
#endif // YC_HASH_PRIVATE_H

@ -0,0 +1,37 @@
# ***** BEGIN LICENSE BLOCK *****
#
# Copyright 2017-2020 Yzena Tech
#
# Licensed under the Yzena Network License, Version 0.1 (the "Yzena Network
# License" or "YNL"). You may not use this file except in compliance with the
# Yzena Network License.
#
# You may obtain a copy of the Yzena Network License at
#
# https://yzena.com/yzena-network-license/
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Yzena Network License is distributed under the
# following disclaimer:
#
# As far as the law allows, this software comes as is, without any
# warranty or condition, and no contributor will be liable to anyone for
# any damages related to this software or this license, under any kind of
# legal claim.
#
# ****** END LICENSE BLOCK ******
set(HASH_TESTS_SRC "hash_tests.c")
set(HASH_TESTS_STATIC hash_tests)
create_static_library("${HASH_TESTS_STATIC}" "${HASH_TESTS_STATIC}"
"${HASH_TESTS_SRC}" "NO" "${YC_STATIC}")
create_test(hash_results "${YC_STATIC}" "${HASH_TESTS_STATIC}")
create_test(hash_alignment "${YC_STATIC}" "${HASH_TESTS_STATIC}")
if("${YC_ENABLE_EXTRA_LONG_TESTS}")
create_test(hash_deltas "${YC_STATIC}" "${HASH_TESTS_STATIC}")
endif()
create_test(hash_pieces "${YC_STATIC}" "${HASH_TESTS_STATIC}")

@ -0,0 +1,78 @@
/**
* ***** BEGIN LICENSE BLOCK *****
*
* Copyright 2017-2020 Yzena Tech
*
* Licensed under the Yzena Network License, Version 0.1 (the "Yzena Network
* License" or "YNL"). You may not use this file except in compliance with the
* Yzena Network License.
*
* You may obtain a copy of the Yzena Network License at
*
* https://yzena.com/yzena-network-license/
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the Yzena Network License is distributed under the
* following disclaimer:
*
* As far as the law allows, this software comes as is, without any
* warranty or condition, and no contributor will be liable to anyone for
* any damages related to this software or this license, under any kind of
* legal claim.
*
* ****** END LICENSE BLOCK ******
*
* *****************************************************************
*
* ******* BEGIN FILE DESCRIPTION *******
*
* Part of test code copied from SpookyHash
* (http://burtleburtle.net/bob/hash/spooky.html).
*
* ******** END FILE DESCRIPTION ********
*/
#include <yc/hash.h>
#include <yc/yc.h>
#include "hash_tests.h"
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#define BUFSIZE (1024)
static void
TestAlignment(void)
{
printf("\ntesting alignment ...\n");
char buf[BUFSIZE];
uint64_t hash[8];
for (int i = 0; i < BUFSIZE - 16; ++i)
{
for (int j = 0; j < 8; ++j)
{
buf[j] = (char) (i + j);
for (int k = 1; k <= i; ++k) buf[j + k] = (char) k;
buf[j + i + 1] = (char) (i + j);
hash[j] = yhash64((const void*) (buf + j + 1), (size_t) i, 0);
}
for (int j = 1; j < 8; ++j)
{
if (hash[0] != hash[j]) printf("alignment problems: %d %d\n", i, j);
}
}
}
int
main(int argc, const char** argv)
{
YUNUSED(argc);
YUNUSED(argv);
TestAlignment();
}

@ -0,0 +1,136 @@
/**
* ***** BEGIN LICENSE BLOCK *****
*
* Copyright 2017-2020 Yzena Tech
*
* Licensed under the Yzena Network License, Version 0.1 (the "Yzena Network
* License" or "YNL"). You may not use this file except in compliance with the
* Yzena Network License.
*
* You may obtain a copy of the Yzena Network License at
*
* https://yzena.com/yzena-network-license/
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the Yzena Network License is distributed under the
* following disclaimer:
*
* As far as the law allows, this software comes as is, without any
* warranty or condition, and no contributor will be liable to anyone for
* any damages related to this software or this license, under any kind of
* legal claim.
*
* ****** END LICENSE BLOCK ******
*
* *****************************************************************
*
* ******* BEGIN FILE DESCRIPTION *******
*
* Part of test code copied from SpookyHash
* (http://burtleburtle.net/bob/hash/spooky.html).
*
* ******** END FILE DESCRIPTION ********
*/
#include <yc/hash.h>
#include <yc/yc.h>
#include "hash_tests.h"
#include <stdio.h>
#include <string.h>
// Test that all deltas of one or two input bits affect all output bits.
#define BUFSIZE (256)
#define TRIES (50)
#define MEASURES (6)
static void
TestDeltas(int seed)
{
printf("\nall 1 or 2 bit input deltas get %d tries to flip every output "
"bit ...\n",
TRIES);
Random random = yrand_init((uint64_t) seed);
// For messages 0..BUFSIZE - 1 bytes.
for (int h = 0; h < BUFSIZE; ++h)
{
int maxk = 0;
// First bit to set.
for (int i = 0; i < h * 8; ++i)
{
// Second bit to set, or don't have a second bit.
for (int j = 0; j <= i; ++j)
{
uint64_t measure[MEASURES][2];
uint64_t counter[MEASURES][2];
for (int l = 0; l < 2; ++l)
{
for (int m = 0; m < MEASURES; ++m) counter[m][l] = 0;
}
// try to hit every output bit TRIES times
int k;
for (k = 0; k < TRIES; ++k)
{
uint8_t buf1[BUFSIZE];
uint8_t buf2[BUFSIZE];
int done = 1;
for (int l = 0; l < h; ++l)
{
buf1[l] = buf2[l] = (uint8_t) yrand(&random);
}
buf1[i / 8] ^= (1 << (i % 8));
if (j != i) buf1[j / 8] ^= (1 << (j % 8));
yhash128(buf1, (size_t) h, &measure[0][0], &measure[0][1]);
yhash128(buf2, (size_t) h, &measure[1][0], &measure[1][1]);
for (int l = 0; l < 2; ++l)
{
measure[2][l] = measure[0][l] ^ measure[1][l];
measure[3][l] = ~(measure[0][l] ^ measure[1][l]);
measure[4][l] = measure[0][l] - measure[1][l];
measure[4][l] ^= (measure[4][l] >> 1);
measure[5][l] = measure[0][l] + measure[1][l];
measure[5][l] ^= (measure[4][l] >> 1);
}
for (int l = 0; l < 2; ++l)
{
for (int m = 0; m < MEASURES; ++m)
{
counter[m][l] |= measure[m][l];
if (~counter[m][l]) done = 0;
}
}
if (done) break;
}
if (k == TRIES)
printf("failed %d %d %d\n", h, i, j);
else if (k > maxk)
{
maxk = k;
}
}
}
printf("passed for buffer size %d max %d\n", h, maxk);