Radish alpha
H
rad:z3QDZAW2FAfuLvihrhiyDC9fAD8G9
HardenedBSD Package Manager
Radicle
Git
libucl: attempt to fix asan issue raised on debian CI
Baptiste Daroussin committed 3 years ago
commit 45d8488676eb030a6fca0c25244c56c5f2e3df54
parent 98597ca
1 file changed +74 -69
modified external/libucl/src/mum.h
@@ -1,4 +1,5 @@
-
/* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
+
/* Copyright (c) 2016, 2017, 2018
+
   Vladimir Makarov <vmakarov@gcc.gnu.org>

   Permission is hereby granted, free of charge, to any person
   obtaining a copy of this software and associated documentation
@@ -35,7 +36,7 @@
   Random and Pseudorandom Number Generators for Cryptographic
   Applications (version 2.2.1) with 1000 bitstreams each containing
   1M bits.  MUM hashing is also faster Spooky64 and City64 on small
-
   strings (at least up to 512-bit) on Haswell and Power7.  The MUM bulk
+
   strings (at least upto 512-bit) on Haswell and Power7.  The MUM bulk
   speed (speed on very long data) is bigger than Spooky and City on
   Power7.  On Haswell the bulk speed is bigger than Spooky one and
   close to City speed.  */
@@ -56,6 +57,16 @@ typedef unsigned __int64 uint64_t;
#include <stdint.h>
#endif

+
#ifdef __GNUC__
+
#define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
+
#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
+
#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
+
#else
+
#define _MUM_ATTRIBUTE_UNUSED
+
#define _MUM_OPTIMIZE(opts)
+
#define _MUM_TARGET(opts)
+
#endif
+

/* Macro saying to use 128-bit integers implemented by GCC for some
   targets.  */
#ifndef _MUM_USE_INT128
@@ -69,21 +80,6 @@ typedef unsigned __int64 uint64_t;
#endif
#endif

-
#if defined(__GNUC__) && ((__GNUC__ == 4) &&  (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
-
#define _MUM_FRESH_GCC
-
#endif
-

-
#if defined(__GNUC__) && !defined(__llvm__) && defined(_MUM_FRESH_GCC)
-
#define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
-
#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
-
#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
-
#else
-
#define _MUM_ATTRIBUTE_UNUSED
-
#define _MUM_OPTIMIZE(opts)
-
#define _MUM_TARGET(opts)
-
#endif
-

-

/* Here are different primes randomly generated with the equal
   probability of their bit values.  They are used to randomize input
   values.  */
@@ -94,7 +90,7 @@ static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
-

+
  
static uint64_t _mum_primes [] = {
  0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
  0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
@@ -113,7 +109,7 @@ _mum (uint64_t v, uint64_t p) {
     multiplication.  If we use a generic code we actually call a
     function doing 128x128->128 bit multiplication.  The function is
     very slow.  */
-
  lo = v * p, hi;
+
  lo = v * p;
  asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
#else
  __uint128_t r = (__uint128_t) v * (__uint128_t) p;
@@ -130,7 +126,7 @@ _mum (uint64_t v, uint64_t p) {
  uint64_t rm_1 = hp * lv;
  uint64_t rl =  lv * lp;
  uint64_t t, carry = 0;
-

+
  
  /* We could ignore a carry bit here if we did not care about the
     same hash for 32-bit and 64-bit targets.  */
  t = rl + (rm_0 << 32);
@@ -172,7 +168,7 @@ _mum_le (uint64_t v) {
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  return _mum_bswap64 (v);
#else
-
#error "Unknown endianness"
+
#error "Unknown endianess"
#endif
}

@@ -183,7 +179,18 @@ _mum_le32 (uint32_t v) {
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  return _mum_bswap32 (v);
#else
-
#error "Unknown endianness"
+
#error "Unknown endianess"
+
#endif
+
}
+

+
static inline uint64_t
+
_mum_le16 (uint16_t v) {
+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+
  return v;
+
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+
  return (v >> 8) | ((v & 0xff) << 8);
+
#else
+
#error "Unknown endianess"
#endif
}

@@ -200,8 +207,10 @@ _mum_le32 (uint32_t v) {
#define _MUM_UNROLL_FACTOR_POWER 3
#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
#define _MUM_UNROLL_FACTOR_POWER 4
-
#else
+
#elif defined (MUM_V1) || defined (MUM_V2)
#define _MUM_UNROLL_FACTOR_POWER 2
+
#else
+
#define _MUM_UNROLL_FACTOR_POWER 3
#endif
#endif

@@ -213,21 +222,35 @@ _mum_le32 (uint32_t v) {

#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)

+
/* Rotate V left by SH.  */
+
static inline uint64_t _mum_rotl (uint64_t v, int sh) {
+
  return v << sh | v >> (64 - sh);
+
}
+

static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
  uint64_t result = start;
  const unsigned char *str = (const unsigned char *) key;
  uint64_t u64;
-
  int i;
+
  size_t i;
  size_t n;
-

+
  
+
#ifndef MUM_V2
  result = _mum (result, _mum_block_start_prime);
+
#endif
  while  (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
    /* This loop could be vectorized when we have vector insns for
-
       64x64->128-bit multiplication.  AVX2 currently only have a
-
       vector insn for 4 32x32->64-bit multiplication.  */
+
       64x64->128-bit multiplication.  AVX2 currently only have vector
+
       insns for 4 32x32->64-bit multiplication and for 1
+
       64x64->128-bit multiplication (pclmulqdq).  */
+
#if defined (MUM_V1) || defined (MUM_V2)
    for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
      result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+
#else
+
    for (i = 0; i < _MUM_UNROLL_FACTOR; i += 2)
+
      result ^= _mum (_mum_le (((uint64_t *) str)[i]) ^ _mum_primes[i],
+
		      _mum_le (((uint64_t *) str)[i + 1]) ^ _mum_primes[i + 1]);
+
#endif
    len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
    str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
    /* We will use the same prime numbers on the next iterations --
@@ -235,20 +258,18 @@ _mum_hash_aligned (uint64_t start, const void *key, size_t len) {
    result = _mum (result, _mum_unroll_prime);
  }
  n = len / sizeof (uint64_t);
-
  for (i = 0; i < (int)n; i++)
+
  for (i = 0; i < n; i++)
    result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
  len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
  switch (len) {
  case 7:
    u64 = _mum_le32 (*(uint32_t *) str);
-
    u64 |= (uint64_t) str[4] << 32;
-
    u64 |= (uint64_t) str[5] << 40;
+
    u64 |= _mum_le16 (*(uint16_t *) (str + 4)) << 32;
    u64 |= (uint64_t) str[6] << 48;
    return result ^ _mum (u64, _mum_tail_prime);
  case 6:
    u64 = _mum_le32 (*(uint32_t *) str);
-
    u64 |= (uint64_t) str[4] << 32;
-
    u64 |= (uint64_t) str[5] << 40;
+
    u64 |= _mum_le16 (*(uint16_t *) (str + 4)) << 32;
    return result ^ _mum (u64, _mum_tail_prime);
  case 5:
    u64 = _mum_le32 (*(uint32_t *) str);
@@ -258,13 +279,11 @@ _mum_hash_aligned (uint64_t start, const void *key, size_t len) {
    u64 = _mum_le32 (*(uint32_t *) str);
    return result ^ _mum (u64, _mum_tail_prime);
  case 3:
-
    u64 = str[0];
-
    u64 |= (uint64_t) str[1] << 8;
+
    u64 = _mum_le16 (*(uint16_t *) str);
    u64 |= (uint64_t) str[2] << 16;
    return result ^ _mum (u64, _mum_tail_prime);
  case 2:
-
    u64 = str[0];
-
    u64 |= (uint64_t) str[1] << 8;
+
    u64 = _mum_le16 (*(uint16_t *) str);
    return result ^ _mum (u64, _mum_tail_prime);
  case 1:
    u64 = str[0];
@@ -276,28 +295,23 @@ _mum_hash_aligned (uint64_t start, const void *key, size_t len) {
/* Final randomization of H.  */
static inline uint64_t
_mum_final (uint64_t h) {
+
#if defined (MUM_V1)
  h ^= _mum (h, _mum_finish_prime1);
  h ^= _mum (h, _mum_finish_prime2);
+
#elif defined (MUM_V2)
+
  h ^= _mum_rotl (h, 33);
+
  h ^= _mum (h, _mum_finish_prime1);
+
#else
+
  h = _mum (h, h);
+
#endif
  return h;
}

-
#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
-

-
/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
-
   it is possible.  Although on modern Intel processors MULQ takes
-
   3-cycles vs. 4 for MULX, MULX permits more freedom in insn
-
   scheduling as it uses less fixed registers.  */
-
static inline uint64_t _MUM_TARGET("arch=haswell")
-
_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
-
  return _mum_final (_mum_hash_aligned (seed + len, key, len));
-
}
-
#endif
-

#ifndef _MUM_UNALIGNED_ACCESS
#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
    || defined(__s390__) || defined(__m32c__) || defined(cris)     \
    || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
-
    || defined(__aarch64__)
+
    || defined(__aarch64__) || defined(_M_AMD64) || defined(_M_IX86)
#define _MUM_UNALIGNED_ACCESS 1
#else
#define _MUM_UNALIGNED_ACCESS 0
@@ -324,9 +338,9 @@ _mum_hash_default (const void *key, size_t len, uint64_t seed) {
  const unsigned char *str = (const unsigned char *) key;
  size_t block_len;
  uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
-

+
  
  result = seed + len;
-
  if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
+
  if (((size_t) str & 0x7) == 0)
    result = _mum_hash_aligned (result, key, len);
  else {
    while (len != 0) {
@@ -344,7 +358,7 @@ static inline uint64_t
_mum_next_factor (void) {
  uint64_t start = 0;
  int i;
-

+
  
  for (i = 0; i < 8; i++)
    start = (start << 8) | rand() % 256;
  return start;
@@ -355,7 +369,7 @@ _mum_next_factor (void) {
/* Set random multiplicators depending on SEED.  */
static inline void
mum_hash_randomize (uint64_t seed) {
-
  int i;
+
  size_t i;

  srand (seed);
  _mum_hash_step_prime = _mum_next_factor ();
@@ -365,7 +379,7 @@ mum_hash_randomize (uint64_t seed) {
  _mum_block_start_prime = _mum_next_factor ();
  _mum_unroll_prime = _mum_next_factor ();
  _mum_tail_prime = _mum_next_factor ();
-
  for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
+
  for (i = 0; i < sizeof (_mum_primes) / sizeof (uint64_t); i++)
    _mum_primes[i] = _mum_next_factor ();
}

@@ -377,8 +391,7 @@ mum_hash_init (uint64_t seed) {

/* Process data KEY with the state H and return the updated state.  */
static inline uint64_t
-
mum_hash_step (uint64_t h, uint64_t key)
-
{
+
mum_hash_step (uint64_t h, uint64_t key) {
  return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
}

@@ -396,22 +409,14 @@ mum_hash64 (uint64_t key, uint64_t seed) {
}

/* Hash data KEY of length LEN and SEED.  The hash depends on the
-
   target endianness and the unroll factor.  */
+
   target endianess and the unroll factor.  */
static inline uint64_t
mum_hash (const void *key, size_t len, uint64_t seed) {
-
#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
-
  static int avx2_support = 0;
-

-
  if (avx2_support > 0)
-
    return _mum_hash_avx2 (key, len, seed);
-
  else if (! avx2_support) {
-
    __builtin_cpu_init ();
-
    avx2_support =  __builtin_cpu_supports ("avx2") ? 1 : -1;
-
    if (avx2_support > 0)
-
      return _mum_hash_avx2 (key, len, seed);
-
  }
-
#endif
+
#if _MUM_UNALIGNED_ACCESS
+
  return _mum_final (_mum_hash_aligned (seed + len, key, len));
+
#else
  return _mum_hash_default (key, len, seed);
+
#endif
}

#endif