公众号
RSS https://github.com/wanghenshui/cppweeklynews/releases.atom
欢迎投稿,推荐或自荐文章/软件/资源等
本周内容不多
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-07-19 第211期
boost 1.83会有个 ` boost::concurrent_flat_map`, 这篇文章带你了解设计思路。还是开地址法,还并发,还快
测了一下是吊锤tbb的。没测和folly::concurrentHashMap的比较
其实这个文章可以展开讲讲
浏览器安卓端崩溃最终怀疑是CPU的问题
总之就是利用CPU流水线先走一个快速路径,再检查条件,也就是所谓的分支预测
你可能想问了,我能不能让CPU别预测,别显得你多聪明了老老实实算就完了
__builtin_unpredictable() https://clang.llvm.org/docs/LanguageExtensions.html#builtin-unpredictable
基于async_simple的。有点意思
了解一下PGO流程
看个热闹
手把手教你写个grpc server
SIMD时间,字符转数字
常规, 一个一个比
if (ch >= '0' && ch <= '9')
d = ch - '0';
else if (ch >= 'A' && ch <= 'V')
d = ch - 'A' + 10;
else if (ch >= 'a' && ch <= 'v')
d = ch - 'a' + 10;
else
return -1;
进化版本,唉我会打表了
size_t base32hex_simple(uint8_t *dst, const uint8_t *src) {
static const uint8_t table[256] = {
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 32, 32, 32, 32, 32, 32,
32, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
};
bool valid = true;
const uint8_t *srcinit = src;
do {
uint64_t r = 0;
for (size_t i = 0; i < 8; i++) {
uint8_t x = table[*src];
src++;
if (x > 31) {
r <<= (5 * (8 - i));
valid = false;
break;
}
r <<= 5;
r |= x;
}
r = (unsigned long int)_bswap64((long long int)r);
uint64_t rs = ((uint64_t)r >> (3 * 8));
memcpy(dst, (const char *)&rs, 8);
dst += 5;
} while (valid);
return (size_t)(src - srcinit);
}
超神版本 SIMD直接贴代码
size_t base32hex_simd(uint8_t *dst, const uint8_t *src) {
bool valid = true;
const __m128i delta_check =
_mm_setr_epi8(-16, -32, -48, 70, -65, 41, -97, 9, 0, 0, 0, 0, 0, 0, 0, 0);
const __m128i delta_rebase =
_mm_setr_epi8(0, 0, 0, -48, -55, -55, -87, -87, 0, 0, 0, 0, 0, 0, 0, 0);
const uint8_t *srcinit = src;
do {
__m128i v = _mm_loadu_si128((__m128i *)src);
__m128i hash_key = _mm_and_si128(_mm_srli_epi32(v, 4), _mm_set1_epi8(0x0F));
__m128i check = _mm_add_epi8(_mm_shuffle_epi8(delta_check, hash_key), v);
v = _mm_add_epi8(v, _mm_shuffle_epi8(delta_rebase, hash_key));
unsigned int m = (unsigned)_mm_movemask_epi8(check);
if (m) {
int length = __builtin_ctz(m);
if (length == 0) {
break;
}
src += length;
__m128i zero_mask =
_mm_loadu_si128((__m128i *)(zero_masks + 16 - length));
v = _mm_andnot_si128(zero_mask, v);
valid = false;
} else { // common case
src += 16;
}
v = _mm_maddubs_epi16(v, _mm_set1_epi32(0x01200120));
v = _mm_madd_epi16(
v, _mm_set_epi32(0x00010400, 0x00104000, 0x00010400, 0x00104000));
// ...00000000`0000eeee`efffffgg`ggghhhhh`00000000`aaaaabbb`bbcccccd`dddd0000
v = _mm_or_si128(v, _mm_srli_epi64(v, 48));
v = _mm_shuffle_epi8(
v, _mm_set_epi8(0, 0, 0, 0, 0, 0, 12, 13, 8, 9, 10, 4, 5, 0, 1, 2));
/* decoded 10 bytes... but write 16 cause why not? */
_mm_storeu_si128((__m128i *)dst, v);
dst += 10;
} while (valid);
return (size_t)(src - srcinit);
}
还有SWAR版本,我直接贴仓库连接,不贴代码了 https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/2023/07/20/src/base32.c
raymond chen的window时间,看不懂
如果有疑问评论最好在上面链接到评论区里评论,这样方便搜索,微信公众号有点封闭/知乎吞评论