97 lines
5.7 KiB
C
97 lines
5.7 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ This program is free software; you can redistribute it and/or modify │
|
|
│ it under the terms of the GNU General Public License as published by │
|
|
│ the Free Software Foundation; version 2 of the License. │
|
|
│ │
|
|
│ This program is distributed in the hope that it will be useful, but │
|
|
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
|
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
|
│ General Public License for more details. │
|
|
│ │
|
|
│ You should have received a copy of the GNU General Public License │
|
|
│ along with this program; if not, write to the Free Software │
|
|
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
|
│ 02110-1301 USA │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/str/internal.h"
|
|
|
|
/**
|
|
* Hashes data with hardware acceleration at 10GBps.
|
|
* @note needs Nehalem+ c. 2008 or Bulldozer+ c. 2011
|
|
*/
|
|
uint32_t crc32c$sse42(uint32_t init, const void *data, size_t size) {
|
|
const unsigned char *p = (const unsigned char *)data;
|
|
const unsigned char *pe = (const unsigned char *)data + size;
|
|
uint32_t h = init ^ 0xffffffff;
|
|
if (size >= 16 + 8) {
|
|
while ((uintptr_t)p & 7) asm("crc32b\t%1,%0" : "+r"(h) : "rm"(*p++));
|
|
uint64_t hl = h;
|
|
while (p < pe - 16ul) {
|
|
asm("crc32q\t%1,%0" : "+r"(hl) : "rm"(*(const uint64_t *)p));
|
|
p += 8;
|
|
asm("crc32q\t%1,%0" : "+r"(hl) : "rm"(*(const uint64_t *)p));
|
|
p += 8;
|
|
}
|
|
h = (uint32_t)hl;
|
|
}
|
|
while (p < pe) asm("crc32b\t%1,%0" : "+r"(h) : "rm"(*p++));
|
|
return h ^ 0xffffffff;
|
|
}
|
|
|
|
/*
|
|
bench_crc32c$sse42 for #c per n where c ≈ 0.293ns
|
|
N x1 x8 x64 mBps
|
|
------------------------------------------------------------
|
|
1 877.000 43.375 40.359 81
|
|
1 45.000 39.625 40.484 80
|
|
2 34.500 27.562 20.461 159
|
|
3 23.000 16.708 14.245 228
|
|
4 18.250 13.094 11.449 284
|
|
7 10.429 8.339 8.185 397
|
|
8 42.125 8.734 6.850 475
|
|
15 9.400 5.375 4.884 665
|
|
16 7.312 5.070 4.882 666
|
|
31 5.258 2.923 2.680 1213
|
|
32 3.969 2.676 2.562 1269
|
|
63 3.095 1.581 1.428 2276
|
|
64 2.234 1.623 1.478 2199
|
|
127 1.205 0.901 0.900 3610
|
|
128 1.164 0.960 0.915 3552
|
|
255 0.922 0.651 0.618 5260
|
|
256 0.715 0.650 0.609 5341
|
|
511 0.558 0.482 0.477 6819
|
|
512 0.529 0.475 0.469 6932
|
|
1023 0.425 0.400 0.396 8204
|
|
1024 0.417 0.392 0.388 8383
|
|
2047 0.367 0.355 0.353 9199
|
|
2048 0.374 0.366 0.364 8929
|
|
4095 0.351 0.338 0.337 9644
|
|
4096 0.353 0.338 0.338 9624
|
|
8191 0.335 0.338 0.337 9641
|
|
8192 0.335 0.329 0.329 9870
|
|
16383 0.336 0.325 0.325 10011
|
|
16384 0.336 0.326 0.375 8666
|
|
32767 0.329 0.323 0.323 10070
|
|
32768 0.327 0.324 0.323 10062
|
|
65535 0.322 0.322 0.322 10103
|
|
65536 0.321 0.322 0.322 10102
|
|
131071 0.322 0.321 0.321 10125
|
|
131072 0.321 0.321 0.321 10124
|
|
262143 0.322 0.321 0.335 9699
|
|
262144 0.321 0.321 0.321 10134
|
|
524287 0.321 0.321 0.499 6516
|
|
524288 0.321 0.321 0.339 9575
|
|
1048575 0.322 0.321 0.322 10095
|
|
1048576 0.320 1.001 0.323 10048
|
|
2097151 0.325 0.321 0.322 10086
|
|
2097152 0.330 0.320 0.323 10076
|
|
4194303 0.331 0.322 0.321 10128
|
|
4194304 0.332 0.321 0.325 10004
|
|
8388607 0.334 0.332 0.331 9829
|
|
8388608 0.334 0.329 0.327 9934
|
|
*/
|