C++ 中文周刊 2024-06-17 第160期

周刊项目地址

公众号

点击「查看原文」跳转到 GitHub 上对应文件,链接就可以点击了

qq群 点击进入

RSS

欢迎投稿,推荐或自荐文章/软件/资源等,评论区留言

本期文章由 HNY 赞助


资讯

标准委员会动态/ide/编译器信息放在这里

编译器信息最新动态推荐关注hellogcc公众号 本周更新 2024-06-12 第258期

llvm19 支持#embed了

文章

Why do Windows functions all begin with a pointless MOV EDI, EDI instruction?

这是2011年的文章了,最近又有讨论了,简单来说就是nop,window上可以在线热补丁把这行汇编替换成jmp xx 不过这玩意也就windows 有

其实XCHG edi, edi也是一个意思

Making a bignum library for fun

实现一个大数乘法,直接看代码吧

这里当字符串处理的,难度降低了很多, 面试题属于是

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    char *digits;
    int size;
} BigNum;

void bignum_init(BigNum *n, const char *str) {
    n->size = strlen(str);
    n->digits = malloc(n->size * sizeof(char));
    for (int i = 0; i < n->size; i++) {
        // Store digits in reverse. Convert from ASCII.
        n->digits[i] = str[n->size - 1 - i] - '0'; 
    }
}

void bignum_free(BigNum *n) {
    free(n->digits);
    n->digits = NULL;
    n->size = 0;
}

void bignum_print(BigNum *n) {
    printf("BigNum: ");
    for (int i = n->size - 1; i >= 0; i--) {
        printf("%d", n->digits[i]);
    }
    printf("\n");
}

int bignum_compare(const BigNum *a, const BigNum *b) {
    // Returns 1 if a is greater than b, -1 if less than, and 0 if equal to.
    if (a->size != b->size) {
        return a->size > b->size ? 1 : -1;
    }
    for (int i = a->size - 1; i >= 0; i--) {
        if (a->digits[i] != b->digits[i]) {
            return a->digits[i] > b->digits[i] ? 1 : -1;
        }
    }
    return 0;
}

void bignum_add(BigNum *result, const BigNum *a, const BigNum *b) {
    int max_size = a->size > b->size ? a->size : b->size;
    result->digits = malloc((max_size + 1) * sizeof(char));
    int carry = 0;
    int i;

    for (i = 0; i < max_size || carry; i++) {
        int sum = carry + (i < a->size ? a->digits[i] : 0) + (i < b->size ? b->digits[i] : 0);
        result->digits[i] = sum % 10; // Store the last digit of the sum.
        carry = sum / 10; // Carry any overflow.
    }
    result->size = i;
}

void bignum_multiply(BigNum *result, const BigNum *a, const BigNum *b) {
    result->digits = calloc(a->size + b->size, sizeof(char));
    result->size = a->size + b->size; // Max size we will need.

    for (int i = 0; i < a->size; i++) {
        for (int j = 0; j < b->size; j++) {
            int index = i + j;
            result->digits[index] += a->digits[i] * b->digits[j];
            result->digits[index + 1] += result->digits[index] / 10;
            result->digits[index] %= 10;
        }
    }
    
    // Trim any leading zeros.
    while (result->size > 1 && result->digits[result->size - 1] == 0) {
        result->size--;
    }
}

int main() {
    BigNum a, b, sum, product;

    bignum_init(&a, "12345678901234567890");
    bignum_init(&b, "98765432109876543210");

    bignum_print(&a);
    bignum_print(&b);

    bignum_add(&sum, &a, &b);
    bignum_print(&sum);
    bignum_free(&sum);

    bignum_multiply(&product, &a, &b);
    bignum_print(&product);
    bignum_free(&product);

    int cmp_result = bignum_compare(&a, &b);
    if (cmp_result > 0) {
        printf("a is greater than b\n");
    } else if (cmp_result < 0) {
        printf("a is less than b\n");
    } else {
        printf("a is equal to b\n");
    }

    bignum_free(&a);
    bignum_free(&b);

    return 0;
}

Using namespaces effectively

不要头文件直接使用using namespace 放在cpp文件中或者函数里

另外 namespace 版本控制,两种玩法

namespace gem {
    namespace v1 {
        struct Point {
            int x;
            int y;
        };
    }
    namespace v2 {
        struct Point {
            int y; // y goes first in v2
            int x;
        };
    }
using namespace v1; // pull everything under v1 out
}
namespace gem {
    inline namespace v1 {
        ...
    }
    namespace v2 {
        ...
    }
}

推荐用inline,少写一行

其实是c++11之前有bug,但是我觉得还是不知道这个bug比较好,就当少写一行

What’s the deal with std::type_identity?

template<typename T>
struct type_identity {
    using type = T;
};

感觉很垃圾,这是干嘛的?

举个例子

template<typename T>
T add(T a, T b) {
    return a + b;
}

auto sum = add(0.5, 1); // error: cannot deduce T

报错了,这怎么办?主要是类型匹配太多,可以用identity建立关系,消除冲突的重载

template<typename T>
T add(T a, std::type_identity_t<T> b) {
    return a + b;
}
auto sum = add(0.5, 1); // T is "double"

来个现实例子

void enqueue(std::function<void(void)> const& work);

template<typename...Args>
void enqueue(std::function<void(Args...)> const& work,
    Args...args)
{
    enqueue([=] { work(args...); });
}
enqueue([](int v) { std::cout << v; }, 42); // 编译不过

这俩enqueue明显有推导关系 为啥编译不过呢,因为lambda可能匹配两个enqueue

需要一个限制

template<typename...Args>
void enqueue(
    std::type_identity_t<
        std::function<void(Args...)>
    > const& work,
    Args...args)
{
    enqueue([=] { work(args...); });
}

当然改成这样也行

template<typename...Args>
void enqueue(
    std::function<void(
        std::type_identity_t<Args>...
    )> const& work,
    Args...args)
{
    enqueue([=] { work(args...); });
}

当然还可以这样改

template<typename...Args>
void enqueue(
    std::function<void(
        std::decay_t<Args>...
    )> const& work,
    Args&&...args)
{
    enqueue([work, ...args = std::forward<Args>(args)]
            { work(std::move(args)...); });
}

当然也可以这样改 chatgpt老师教的​

template<typename Func, typename... Args>
void enqueue(Func&& func, Args&&... args)
{
    enqueue([=]() { std::invoke(std::forward<Func>(func), std::forward<Args>(args)...); });
}

想了解更多可以看提案哈

The limits of maybe_unused

不是特别好用,void忽略也能对服用

Destructuring Lambda Expression Parameters

结构化绑定很好用

int sum(tuple<int, int, int> triple) {
  auto [a, b, c] = triple;
  return a + b + c;
}

结构化绑定的值如果能直接传给lambda是不是很帅

比如这样

extern bool is_good(string s, int v);

auto foo(map<string, int> items) {
  return views::filter(
    items,
    // DOES NOT WORK!
    [](auto [key, value]) {
      return is_good(key, value);
    });
}

写一个转发吧,其实就是

inline constexpr struct {
  template <typename F>
  constexpr auto operator%(F&& fn) const {
    return [fn](auto&& tpl) {
      return std::apply(fn, FWD(tpl));
    };
  }
} spread_args;

上面的代码就可以这样了

auto foo(map<string, int> items) {
  return views::filter(
    items,
    // Works!
    spread_args % [](auto key, auto value) {
      return is_good(key, value);
    });
}

甚至可以更简单点

auto foo(map<string, int> items) {
  return views::filter(items, spread_args % is_good);
}

Scan HTML faster with SIMD instructions: Chrome edition

常规

void NaiveAdvanceString(const char *&start, const char *end) {
  for (;start < end; start++) {
    if(*start == '<' || *start == '&' 
        || *start == '\r' || *start == '\0') {
      return;
    }
  }
}

常规sse

void AdvanceString(const char*& start, const char* end) {
    const __m128i quote_mask = _mm_set1_epi8('<');
    const __m128i escape_mask = _mm_set1_epi8('&');
    const __m128i newline_mask = _mm_set1_epi8('\r');
    const __m128i zero_mask = _mm_set1_epi8('\0');

    static constexpr auto stride = 16;
    for (; start + (stride - 1) < end; start += stride) {
        __m128i data = _mm_loadu_si128(
           reinterpret_cast<const __m128i*>(start));
        __m128i quotes = _mm_cmpeq_epi8(data, quote_mask);
        __m128i escapes = _mm_cmpeq_epi8(data, escape_mask);
        __m128i newlines = _mm_cmpeq_epi8(data, newline_mask);
        __m128i zeros = _mm_cmpeq_epi8(data, zero_mask);
        __m128i mask = _mm_or_si128(_mm_or_si128(quotes, zeros),                   
             _mm_or_si128(escapes, newlines));
        int m = _mm_movemask_epi8(mask);
        if (m != 0) {
            start += __builtin_ctz(m);
            return;
        }
    }

    // Process any remaining bytes (less than 16)
    while (start < end) {
        if (*start == '<' || *start == '&' 
             || *start == '\r' || *start == '\0') {
            return;
        }
        start++;
    }
}

查表 sse

void AdvanceStringTable(const char *&start, const char *end) {
  uint8x16_t low_nibble_mask = {0b0001, 0, 0, 0, 0, 0, 0b0100, 
          0, 0, 0, 0, 0, 0b0010, 0b1000, 0, 0};
  uint8x16_t high_nibble_mask = {0b1001, 0, 0b0100, 0b0010, 
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  uint8x16_t v0f = vmovq_n_u8(0xf);
  uint8x16_t bit_mask = {16, 15, 14, 13, 12, 11, 10, 9, 8,
                            7, 6, 5, 4, 3, 2, 1};
  static constexpr auto stride = 16;
  for (; start + (stride - 1) < end; start += stride) {
    uint8x16_t data = vld1q_u8(reinterpret_cast<const uint8_t *>(start));
    uint8x16_t lowpart = vqtbl1q_u8(low_nibble_mask, vandq_u8(data, v0f));
    uint8x16_t highpart = vqtbl1q_u8(high_nibble_mask,  
           vshrq_n_u8(data, 4));
    uint8x16_t classify = vandq_u8(lowpart, highpart);
    uint8x16_t matchesones = vtstq_u8(classify, vdupq_n_u8(0xFF));
    uint8x16_t matches = vandq_u8(bit_mask, matchesones);
    int m = vmaxvq_u8(matches);
    if(m != 0) {
      start += 16 - m;
      return;
    }
  }  
  for (;start < end; start++) {
    if(*start == '<' || *start == '&' || *start == '\r' 
     || *start == '\0') {
      return;
    }
  }
}

一次查表

void AdvanceStringTableSimpler(const char *&start, const char *end) {
  uint8x16_t low_nibble_mask = {0, 0, 0, 0, 0, 0, 0x26, 0, 0, 
                            0, 0, 0, 0x3c, 0xd, 0, 0};
  uint8x16_t v0f = vmovq_n_u8(0xf);
  uint8x16_t bit_mask = {16, 15, 14, 13, 12, 11, 10, 9, 8,
                            7, 6, 5, 4, 3, 2, 1};
  static constexpr auto stride = 16;
  for (; start + (stride - 1) < end; start += stride) {
    uint8x16_t data = vld1q_u8(reinterpret_cast<const uint8_t *>(start));
    uint8x16_t lowpart = vqtbl1q_u8(low_nibble_mask, vandq_u8(data, v0f));
    uint8x16_t matchesones = vceqq_u8(lowpart, data);
    uint8x16_t matches = vandq_u8(bit_mask, matchesones);
    int m = vmaxvq_u8(matches);
    if(m != 0) {
      start += 16 - m;
      return;
    }
  }  
  for (;start < end; start++) {
    if(*start == '<' || *start == '&' 
     || *start == '\r' || *start == '\0') {
      return;
    }
  }
}

我都看不懂

Rolling your own fast matrix multiplication: loop order and vectorization

这个挺好玩的,手动测试那种循环快,看一乐,不如openmp

开源项目介绍

互动环节

上半年过得好快啊,都不知道自己在忙啥做出啥东西来,半年就结束了。

工作还是不好找,大环境看来还是继续恶劣下去

大家再坚持坚持


上一期

看到这里或许你有建议或者疑问或者指出错误,请留言评论! 多谢! 你的评论非常重要!也可以帮忙点赞收藏转发!多谢支持! 觉得写的不错那就给点吧, 在线乞讨 微信转账