45d8488 libucl: attempt to fix asan issue raised on debian CI — HardenedBSD-pkg

HardenedBSD Package Manager

libucl: attempt to fix asan issue raised on debian CI

Baptiste Daroussin committed 3 years ago

commit 45d8488676eb030a6fca0c25244c56c5f2e3df54
parent 98597cae6bbb2f1f81b1d634d7fa2a8a90c1d42e

1 file changed +74 -69

modified external/libucl/src/mum.h

@@ -1,4 +1,5 @@

 /* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
 /* Copyright (c) 2016, 2017, 2018
    Vladimir Makarov <vmakarov@gcc.gnu.org>
    Permission is hereby granted, free of charge, to any person
    obtaining a copy of this software and associated documentation

@@ -35,7 +36,7 @@

    Random and Pseudorandom Number Generators for Cryptographic
    Applications (version 2.2.1) with 1000 bitstreams each containing
 M bits.  MUM hashing is also faster Spooky64 and City64 on small
    strings (at least up to 512-bit) on Haswell and Power7.  The MUM bulk
    strings (at least upto 512-bit) on Haswell and Power7.  The MUM bulk
    speed (speed on very long data) is bigger than Spooky and City on
    Power7.  On Haswell the bulk speed is bigger than Spooky one and
    close to City speed.  */

@@ -56,6 +57,16 @@ typedef unsigned __int64 uint64_t;

 #include <stdint.h>
 #endif
 #ifdef __GNUC__
 #define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
 #define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
 #define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
 #else
 #define _MUM_ATTRIBUTE_UNUSED
 #define _MUM_OPTIMIZE(opts)
 #define _MUM_TARGET(opts)
 #endif
 /* Macro saying to use 128-bit integers implemented by GCC for some
    targets.  */
 #ifndef _MUM_USE_INT128

@@ -69,21 +80,6 @@ typedef unsigned __int64 uint64_t;

 #endif
 #endif
 #if defined(__GNUC__) && ((__GNUC__ == 4) &&  (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
 #define _MUM_FRESH_GCC
 #endif
 #if defined(__GNUC__) && !defined(__llvm__) && defined(_MUM_FRESH_GCC)
 #define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
 #define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
 #define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
 #else
 #define _MUM_ATTRIBUTE_UNUSED
 #define _MUM_OPTIMIZE(opts)
 #define _MUM_TARGET(opts)
 #endif
 /* Here are different primes randomly generated with the equal
    probability of their bit values.  They are used to randomize input
    values.  */

@@ -94,7 +90,7 @@ static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;

 static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
 static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
 static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
 static uint64_t _mum_primes [] = {
 X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
 X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,

@@ -113,7 +109,7 @@ _mum (uint64_t v, uint64_t p) {

      multiplication.  If we use a generic code we actually call a
      function doing 128x128->128 bit multiplication.  The function is
      very slow.  */
   lo = v * p, hi;
   lo = v * p;
   asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
 #else
   __uint128_t r = (__uint128_t) v * (__uint128_t) p;

@@ -130,7 +126,7 @@ _mum (uint64_t v, uint64_t p) {

   uint64_t rm_1 = hp * lv;
   uint64_t rl =  lv * lp;
   uint64_t t, carry = 0;
   /* We could ignore a carry bit here if we did not care about the
      same hash for 32-bit and 64-bit targets.  */
   t = rl + (rm_0 << 32);

@@ -172,7 +168,7 @@ _mum_le (uint64_t v) {

 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
   return _mum_bswap64 (v);
 #else
 #error "Unknown endianness"
 #error "Unknown endianess"
 #endif
 }

@@ -183,7 +179,18 @@ _mum_le32 (uint32_t v) {

 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
   return _mum_bswap32 (v);
 #else
 #error "Unknown endianness"
 #error "Unknown endianess"
 #endif
 }
 static inline uint64_t
 _mum_le16 (uint16_t v) {
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
   return v;
 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
   return (v >> 8) | ((v & 0xff) << 8);
 #else
 #error "Unknown endianess"
 #endif
 }

@@ -200,8 +207,10 @@ _mum_le32 (uint32_t v) {

 #define _MUM_UNROLL_FACTOR_POWER 3
 #elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
 #define _MUM_UNROLL_FACTOR_POWER 4
 #else
 #elif defined (MUM_V1) || defined (MUM_V2)
 #define _MUM_UNROLL_FACTOR_POWER 2
 #else
 #define _MUM_UNROLL_FACTOR_POWER 3
 #endif
 #endif

@@ -213,21 +222,35 @@ _mum_le32 (uint32_t v) {

 #define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
 /* Rotate V left by SH.  */
 static inline uint64_t _mum_rotl (uint64_t v, int sh) {
   return v << sh | v >> (64 - sh);
 }
 static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
 _mum_hash_aligned (uint64_t start, const void *key, size_t len) {
   uint64_t result = start;
   const unsigned char *str = (const unsigned char *) key;
   uint64_t u64;
   int i;
   size_t i;
   size_t n;
 #ifndef MUM_V2
   result = _mum (result, _mum_block_start_prime);
 #endif
   while  (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
     /* This loop could be vectorized when we have vector insns for
 x64->128-bit multiplication.  AVX2 currently only have a
        vector insn for 4 32x32->64-bit multiplication.  */
 x64->128-bit multiplication.  AVX2 currently only have vector
        insns for 4 32x32->64-bit multiplication and for 1
 x64->128-bit multiplication (pclmulqdq).  */
 #if defined (MUM_V1) || defined (MUM_V2)
     for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
       result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
 #else
     for (i = 0; i < _MUM_UNROLL_FACTOR; i += 2)
       result ^= _mum (_mum_le (((uint64_t *) str)[i]) ^ _mum_primes[i],
 		      _mum_le (((uint64_t *) str)[i + 1]) ^ _mum_primes[i + 1]);
 #endif
     len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
     str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
     /* We will use the same prime numbers on the next iterations --

@@ -235,20 +258,18 @@ _mum_hash_aligned (uint64_t start, const void *key, size_t len) {

     result = _mum (result, _mum_unroll_prime);
   }
   n = len / sizeof (uint64_t);
   for (i = 0; i < (int)n; i++)
   for (i = 0; i < n; i++)
     result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
   len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
   switch (len) {
   case 7:
     u64 = _mum_le32 (*(uint32_t *) str);
     u64 |= (uint64_t) str[4] << 32;
     u64 |= (uint64_t) str[5] << 40;
     u64 |= _mum_le16 (*(uint16_t *) (str + 4)) << 32;
     u64 |= (uint64_t) str[6] << 48;
     return result ^ _mum (u64, _mum_tail_prime);
   case 6:
     u64 = _mum_le32 (*(uint32_t *) str);
     u64 |= (uint64_t) str[4] << 32;
     u64 |= (uint64_t) str[5] << 40;
     u64 |= _mum_le16 (*(uint16_t *) (str + 4)) << 32;
     return result ^ _mum (u64, _mum_tail_prime);
   case 5:
     u64 = _mum_le32 (*(uint32_t *) str);

@@ -258,13 +279,11 @@ _mum_hash_aligned (uint64_t start, const void *key, size_t len) {

     u64 = _mum_le32 (*(uint32_t *) str);
     return result ^ _mum (u64, _mum_tail_prime);
   case 3:
     u64 = str[0];
     u64 |= (uint64_t) str[1] << 8;
     u64 = _mum_le16 (*(uint16_t *) str);
     u64 |= (uint64_t) str[2] << 16;
     return result ^ _mum (u64, _mum_tail_prime);
   case 2:
     u64 = str[0];
     u64 |= (uint64_t) str[1] << 8;
     u64 = _mum_le16 (*(uint16_t *) str);
     return result ^ _mum (u64, _mum_tail_prime);
   case 1:
     u64 = str[0];

@@ -276,28 +295,23 @@ _mum_hash_aligned (uint64_t start, const void *key, size_t len) {

 /* Final randomization of H.  */
 static inline uint64_t
 _mum_final (uint64_t h) {
 #if defined (MUM_V1)
   h ^= _mum (h, _mum_finish_prime1);
   h ^= _mum (h, _mum_finish_prime2);
 #elif defined (MUM_V2)
   h ^= _mum_rotl (h, 33);
   h ^= _mum (h, _mum_finish_prime1);
 #else
   h = _mum (h, h);
 #endif
   return h;
 }
 #if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
 /* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
    it is possible.  Although on modern Intel processors MULQ takes
 -cycles vs. 4 for MULX, MULX permits more freedom in insn
    scheduling as it uses less fixed registers.  */
 static inline uint64_t _MUM_TARGET("arch=haswell")
 _mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
   return _mum_final (_mum_hash_aligned (seed + len, key, len));
 }
 #endif
 #ifndef _MUM_UNALIGNED_ACCESS
 #if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
     || defined(__s390__) || defined(__m32c__) || defined(cris)     \
     || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
     || defined(__aarch64__)
     || defined(__aarch64__) || defined(_M_AMD64) || defined(_M_IX86)
 #define _MUM_UNALIGNED_ACCESS 1
 #else
 #define _MUM_UNALIGNED_ACCESS 0

@@ -324,9 +338,9 @@ _mum_hash_default (const void *key, size_t len, uint64_t seed) {

   const unsigned char *str = (const unsigned char *) key;
   size_t block_len;
   uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
   result = seed + len;
   if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
   if (((size_t) str & 0x7) == 0)
     result = _mum_hash_aligned (result, key, len);
   else {
     while (len != 0) {

@@ -344,7 +358,7 @@ static inline uint64_t

 _mum_next_factor (void) {
   uint64_t start = 0;
   int i;
   for (i = 0; i < 8; i++)
     start = (start << 8) | rand() % 256;
   return start;

@@ -355,7 +369,7 @@ _mum_next_factor (void) {

 /* Set random multiplicators depending on SEED.  */
 static inline void
 mum_hash_randomize (uint64_t seed) {
   int i;
   size_t i;
   srand (seed);
   _mum_hash_step_prime = _mum_next_factor ();

@@ -365,7 +379,7 @@ mum_hash_randomize (uint64_t seed) {

   _mum_block_start_prime = _mum_next_factor ();
   _mum_unroll_prime = _mum_next_factor ();
   _mum_tail_prime = _mum_next_factor ();
   for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
   for (i = 0; i < sizeof (_mum_primes) / sizeof (uint64_t); i++)
     _mum_primes[i] = _mum_next_factor ();
 }

@@ -377,8 +391,7 @@ mum_hash_init (uint64_t seed) {

 /* Process data KEY with the state H and return the updated state.  */
 static inline uint64_t
 mum_hash_step (uint64_t h, uint64_t key)
 {
 mum_hash_step (uint64_t h, uint64_t key) {
   return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
 }

@@ -396,22 +409,14 @@ mum_hash64 (uint64_t key, uint64_t seed) {

 }
 /* Hash data KEY of length LEN and SEED.  The hash depends on the
    target endianness and the unroll factor.  */
    target endianess and the unroll factor.  */
 static inline uint64_t
 mum_hash (const void *key, size_t len, uint64_t seed) {
 #if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
   static int avx2_support = 0;
   if (avx2_support > 0)
     return _mum_hash_avx2 (key, len, seed);
   else if (! avx2_support) {
     __builtin_cpu_init ();
     avx2_support =  __builtin_cpu_supports ("avx2") ? 1 : -1;
     if (avx2_support > 0)
       return _mum_hash_avx2 (key, len, seed);
   }
 #endif
 #if _MUM_UNALIGNED_ACCESS
   return _mum_final (_mum_hash_aligned (seed + len, key, len));
 #else
   return _mum_hash_default (key, len, seed);
 #endif
 }
 #endif