C++ 中文周刊 第114期

周刊项目地址

公众号

RSS https://github.com/wanghenshui/cppweeklynews/releases.atom

欢迎投稿,推荐或自荐文章/软件/资源等

提交 issue

感谢 不语 Concept Matrixtang 赞助

资讯

标准委员会动态/ide/编译器信息放在这里

编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-05-17 第202期

美国印第安纳大学布鲁明顿主校区Dr. Dingwen Tao招收高性能计算方向博士后和全奖博士生

有感兴趣的可以看看,不知道我的读者里有米有学生

十城相聚」第四站:5月21日,成都

内容

文章

确实,学生很需要这些,不然都得从头学,不知道我的读者学生多不多,如果没学,这里补课 https://missing-semester-cn.github.io/

c++知识点的一本书,这里友情推荐一下

一段猥琐的SFINAE

template <typename T> class is_std_string_like {
  template <typename U>
  static auto check(U* p)
      -> decltype((void)p->find('a'), p->length(), (void)p->data(), int());
  template <typename> static void check(...);

 public:
  static constexpr const bool value =
      is_string<T>::value ||
      std::is_convertible<T, std_string_view<char>>::value ||
      !std::is_void<decltype(check<T>(nullptr))>::value;
};

这段代码一搜遍地都是,fmt/range.h

d41d8c手把手带你走近这段代码的演进。。。不过还是直接看c++20的写法吧

template <typename T> constexpr bool is_std_string_like =
    is_string<T>::value ||
    is_convertible<T, std_string_view<char>>::value ||
    requires(T* p) {
      p->find('a');
      p->length();
      p->data();
    };

你看懂这个就行了

还是那些东西,循环不变量外提,循环外提等等

#include <memory_resource>
#include <cassert>

int main() {
    // Create a memory resource
    std::pmr::monotonic_buffer_resource resource(1024);

    // Create a polymorphic allocator using the memory resource
    std::pmr::polymorphic_allocator<int> allocator(&resource);

    // Allocate memory for an array of 10 integers using the polymorphic allocator
    int* ptr = allocator.allocate(10);

    // Test that the allocation was successful
    assert(ptr);

    // Deallocate the memory
    allocator.deallocate(ptr, 10);
}

用的还是挺多的

有点意思

老生常谈了。这里介绍一下thread_local,直接看代码

#include <iostream>
#include <thread>
#include <mutex>

std::mutex mutPrint;
thread_local int x = 0;

void foo() {
    thread_local int y = 0;
    std::lock_guard guard(mutPrint);
    std::cout << "in thread\t" << std::this_thread::get_id() << " ";
    std::cout << "&x " << &x << ", ";
    std::cout << "&y " << &y << '\n';
}

int main() {
    std::cout << "main\t" << std::this_thread::get_id() << " &x " << &x << '\n';

    std::jthread worker1 { foo };
    foo();
    std::jthread worker2 { foo };
    foo();
}
/*
main        4154632640 &x 0xf7a2a9b8
in thread   4154632640 &x 0xf7a2a9b8, &y 0xf7a2a9bc
in thread   4154628928 &x 0xf7a29b38, &y 0xf7a29b3c
in thread   4154632640 &x 0xf7a2a9b8, &y 0xf7a2a9bc
in thread   4146236224 &x 0xf7228b38, &y 0xf7228b3c
*/

注意地址可不一样

直接贴代码吧

普通

size_t scalar_utf8_length(const char * c, size_t len) {
  size_t answer = 0;
  for(size_t i = 0; i<len; i++) {
    if((c[i]>>7)) { answer++;}
  }
  return answer + len;
}

复杂

uint64_t utf8_length_kvakil(const uint8_t *data, uint32_t length) {
  uint64_t result = 0;
  const int lanes = sizeof(uint8x16_t);
  uint8_t rem = length % lanes;
  const uint8_t *simd_end = data + (length / lanes) * lanes;
  const uint8x16_t threshold = vdupq_n_u8(0x80);
  for (; data < simd_end; data += lanes) {
    // load 16 bytes
    uint8x16_t input = vld1q_u8(data);
    // compare to threshold (0x80)
    uint8x16_t withhighbit = vcgeq_u8(input, threshold);
    // shift and narrow
    uint8x8_t highbits = vshrn_n_u16(vreinterpretq_u16_u8(withhighbit), 4);
    // we have 0, 4 or 8 bits per byte
    uint8x8_t bitsperbyte = vcnt_u8(highbits);
    // sum the bytes vertically to uint16_t
   result += vaddlv_u8(bitsperbyte);
  }
  result /= 4; // we overcounted by a factor of 4
  // scalar tail
  for (uint8_t j = 0; j < rem; j++) {
    result += (simd_end[j] >> 7);
  }
  return result + length;
}

还是很复杂

uint64_t utf8_length_cawley(const uint8_t *data, uint32_t length) {
  uint64_t result = 0;
  const int lanes = sizeof(uint8x16_t);
  uint8_t rem = length % lanes;
  const uint8_t *simd_end = data + (length / lanes) * lanes;
  const uint8x16_t threshold = vdupq_n_u8(0x80);
  for (; data < simd_end; data += lanes) {
    // load 16 bytes
    uint8x16_t input = vld1q_u8(data);
    // compare to threshold (0x80)
    uint8x16_t withhighbit = vcgeq_u8(input, threshold);
    // vertical addition
    result -= vaddvq_s8(withhighbit);
  }
  // scalar tail
  for (uint8_t j = 0; j < rem; j++) {
    result += (simd_end[j] >> 7);
  }
  return result + length;
}

速度 scalar code ~7 GB/s NEON code (Vakil) ~27 GB/s NEON code (Cawley) ~30 GB/s

上一期讲了无分支的二分查找,这个作者看了也很感兴趣,也写了一版,并写了推导过程。大家感兴趣的可以看看推导过程,这里直接贴代码了

template<typename It, typename T, typename Cmp>
It lower_bound(It begin, It end, const T& value, Cmp comp) {
    size_t n = end - begin;
    size_t b = 0;
    for (size_t bit = std::bit_floor(n); bit != 0; bit >>= 1) {
        size_t i = (b | bit) - 1;
        if (i < n && comp(begin[i], value)) b |= bit;
    }
    return begin + b;
}

还是经典的循环中的常量应用生命周期问题,作者建议,用 &限定接口并标记删除,这样就不会用到常量引用了

原来的代码

struct Pixel{
    int r = 0;
    int g = 0;
    int b = 0;
    friend auto operator<=>(const Pixel& a, const Pixel& b) = default;
};

struct Metadata { /* some metadata here, width, height, etc. */ }; 
class MyRawImage{
    std::vector<Pixel> m_buffer;
    Metadata           m_metadata;
public:
    MyRawImage(std::vector<Pixel> src) : m_buffer(std::move(src)) {}

    const Pixel& operator[](int index) const { return m_buffer[index]; }
    Pixel&       operator[](int index)       { return m_buffer[index]; }

    const std::vector<Pixel>& data() const   { return m_buffer; }
    const Metadata& information() const      { return m_metadata; }
};


MyRawImage loadImage(int i){
    return std::vector<Pixel>(i * 100, Pixel {i, i, i});
}

MyRawImage problematic(int i){
    std::vector<Pixel> filtered;
    auto filter = [](Pixel p) { 
        p.r = std::min(p.r, 0xFF); 
        p.g = std::min(p.g, 0xFF); 
        p.b = std::min(p.b, 0xFF);
        return p; 
    };

    // oops: 
    // equivalent of `auto&& ps =  loadImage(i).data(); for (Pixel p : ps) { ... }`
    // loadImage() returns a temporary, temporary.data() reference is stored, then
    // for-loop iterates over a stored reference to a property of deleted temporary 
    // 这里出大问题
    // 最简单的修复方法就是存一下loadImage的返回值
    for(Pixel p : loadImage(i).data())
        filtered.push_back(filter(p));

    return filtered;
}

Pixel fine(int i) {
    auto max = [](auto&& range) -> Pixel  { 
        return *std::max_element(std::begin(range), std::end(range)); 
    };

    // this one is fine: a temporary will be destroyed after the max() calcualtion
    return max(loadImage(i).data()); 
}

int main(int, char**) {
    constexpr static int pattern = 0x12;
    constexpr static Pixel pixelPattern = Pixel { pattern, pattern, pattern };

    Pixel maxPixel = fine(pattern);
    assert(maxPixel == pixelPattern);

    MyRawImage img = problematic(pattern);
    auto isGood = [](const Pixel& p) { return p == pixelPattern; };
    assert(img.data().end() == std::ranges::find_if_not(img.data(), isGood));
}

c++23之后循环就修复这个问题了,作者考虑成员函数限定符来解决这个问题

#include <iostream>
 struct S {
    void f() &  { std::cout << "左值\n"; }
    void f() && { std::cout << "右值\n"; }
};
 int main() {
    S s;
    s.f();            // 打印“左值”
    std::move(s).f(); // 打印“右值”
    S().f();          // 打印“右值”
}

通过这个思路,上面的代码,实现两个data

#include <ranges>
#include <cassert>
#include <vector>
struct Pixel {
    int r = 0;
    int g = 0;
    int b = 0;
    friend auto operator<=>(const Pixel& a, const Pixel& b) = default;
};

struct Metadata { /* some metadata here, width, height, etc. */ }; 

class MyRawImage {
    std::vector<Pixel> m_buffer;
    Metadata           m_metadata;
public:
    class UnsafeReference {  
        std::vector<Pixel>& m_buffer;

    public:
        UnsafeReference(std::vector<Pixel>& buffer) : m_buffer(buffer) {}

        // I would like it to be a free-function rather than a member function,
        // to lower the chance that Intellisence will provide a disservice
        // to the developer by slipping an unsafe getter by auto-suggestions. 
        // it's good to require a fair bit of attention here
        friend std::vector<Pixel>& allowUnsafe(UnsafeReference&&);
    };

    MyRawImage(std::vector<Pixel> src) : m_buffer(std::move(src)) {}

    const Pixel& operator[](int index) const { return m_buffer[index]; }
    Pixel&       operator[](int index)       { return m_buffer[index]; }

    const std::vector<Pixel>& data() const & { return m_buffer; }
    // 循环临时对象,走这里,封装一层
    UnsafeReference data() &&                { return m_buffer; }

    const Metadata& information() const &    { return m_metadata; }
    const Metadata& information() && = delete;
};

std::vector<Pixel>& allowUnsafe(MyRawImage::UnsafeReference&& unsafe){
    return unsafe.m_buffer;
}

MyRawImage loadImage(int i){
    return std::vector<Pixel>(i * 100, Pixel {i, i, i});
}

MyRawImage was_problematic(int i){
    std::vector<Pixel> filtered;
    auto filter = [](Pixel p) { 
        p.r = std::min(p.r, 0xFF); 
        p.g = std::min(p.g, 0xFF); 
        p.b = std::min(p.b, 0xFF);
        return p; 
    };

    const MyRawImage& image = loadImage(i);
    for(Pixel p : image.data())
        filtered.push_back(filter(p));

    return filtered;
}

Pixel fine_again(int i)
{
    auto max = [](auto&& range) -> Pixel 
    { 
        return *std::max_element(std::begin(range), std::end(range)); 
    };

    // this one is fine: a temporary will be destroyed after the max() calcualtion
    return max(allowUnsafe(loadImage(i).data()));
}

int main(int, char**){
    constexpr static int pattern = 0x12;
    constexpr static Pixel pixelPattern = Pixel { pattern, pattern, pattern };

    Pixel maxPixel = fine_again(pattern);
    assert(maxPixel == pixelPattern);

    MyRawImage img = was_problematic(pattern);
    auto isGood = [](const Pixel& p) { return p == pixelPattern; };
    assert(img.data().end() == std::ranges::find_if_not(img.data(), isGood));
}

我觉得这个解决方法不咋地

视频

协程入门demo

代码在这里 https://github.com/dietmarkuehl/co_await-all-the-things/blob/main/task-using-std.cpp

开源项目需要人手

新项目介绍/版本更新


本文永久链接

如果有疑问评论最好在上面链接到评论区里评论,这样方便搜索,微信公众号有点封闭/知乎吞评论

看到这里或许你有建议或者疑问或者指出错误,请留言评论! 多谢! 你的评论非常重要!也可以帮忙点赞收藏转发!多谢支持! 觉得写的不错那就给点吧, 在线乞讨 微信转账