公众号
点击「查看原文」跳转到 GitHub 上对应文件,链接就可以点击了
qq群 点击进入
欢迎投稿,推荐或自荐文章/软件/资源等,评论区留言
本期文章由 HNY lhmouse 赞助
lhmouse的项目 mcfthread/asteria 推荐大家关注
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 第268期
可以参考
看一乐
看一乐
clang ffast-math有bug
__m128 test(const __m128 vec)
{
return _mm_sqrt_ps(vec);
}
/*
test: # @test
sqrtps xmm0, xmm0
ret
*/
正常生成汇编没问题,但ffast-math生成的汇编有问题, 会替换成rsqrtps然后由于精度问题退化成牛顿法,反而满了我靠
.LCPI0_0:
.long 0xbf000000 # float -0.5
.long 0xbf000000 # float -0.5
.long 0xbf000000 # float -0.5
.long 0xbf000000 # float -0.5
.LCPI0_1:
.long 0xc0400000 # float -3
.long 0xc0400000 # float -3
.long 0xc0400000 # float -3
.long 0xc0400000 # float -3
test:
rsqrtps xmm1, xmm0
movaps xmm2, xmm0
mulps xmm2, xmm1
movaps xmm3, xmmword ptr [rip + .LCPI0_0] # xmm3 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
mulps xmm3, xmm2
mulps xmm2, xmm1
addps xmm2, xmmword ptr [rip + .LCPI0_1]
mulps xmm2, xmm3
xorps xmm1, xmm1
cmpneqps xmm0, xmm1
andps xmm0, xmm2
ret
为了规避只好手写汇编
__m128 test(__m128 vec)
{
__asm__ ("sqrtps %1, %0" : "=x"(vec) : "x"(vec));
return vec;
}
但是手写汇编优化不彻底,该有的常量折叠没做
__attribute__((always_inline)) __m128 test(__m128 vec)
{
__asm__ ("sqrtps %1, %0" : "=x"(vec) : "x"(vec));
return vec;
}
__m128 call_test()
{
return test(_mm_setr_ps(1.f, 2.f, 3.f, 4.f));
}
test:
sqrtps xmm0, xmm0
ret
.LCPI1_0:
.long 0x3f800000 # float 1
.long 0x40000000 # float 2
.long 0x40400000 # float 3
.long 0x40800000 # float 4
call_test:
movaps xmm0, xmmword ptr [rip + .LCPI1_0] # xmm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
sqrtps xmm0, xmm0
ret
sqrtps并没有优化掉
手动折叠, __builtin_constant_p
__attribute__((always_inline)) __m128 test(__m128 vec)
{
if (__builtin_constant_p(vec))
{
return _mm_sqrt_ps(vec);
}
__asm__ ("sqrtps %1, %0" : "=x"(vec) : "x"(vec));
return vec;
}
__m128 call_test()
{
return test(_mm_setr_ps(1.f, 2.f, 3.f, 4.f));
}
汇编
call_test:
movaps xmm0, xmmword ptr [rip + .LCPI11_0] # xmm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
sqrtps xmm0, xmm0
ret
并没有优化掉??又有个bug,__builtin_constant_p不能用于数组,得手动展开
__attribute__((always_inline)) __m128 test(__m128 vec)
{
if (__builtin_constant_p(vec[0]) &&
__builtin_constant_p(vec[1]) &&
__builtin_constant_p(vec[2]) &&
__builtin_constant_p(vec[3]))
{
return _mm_sqrt_ps(vec);
}
__asm__ ("sqrtps %1, %0" : "=x"(vec) : "x"(vec));
return vec;
}
__m128 call_test()
{
return test(_mm_setr_ps(1.f, 2.f, 3.f, 4.f));
}
.LCPI15_0:
.long 0x3f800000 # float 1
.long 0x3fb504f3 # float 1.41421354
.long 0x3fddb3d7 # float 1.73205078
.long 0x40000000 # float 2
call_test:
movaps xmm0, xmmword ptr [rip + .LCPI15_0] # xmm0 = [1.0E+0,1.41421354E+0,1.73205078E+0,2.0E+0]
ret
折叠了
c++20之前,文件的时间不够直观,得转一手
auto filetime = fs::last_write_time(myPath);
std::time_t convfiletime = std::chrono::system_clock::to_time_t(filetime);
std::cout << "Updated: " << std::ctime(&convfiletime) << '\n';
c++20可以直接用了
auto temp = std::filesystem::temp_directory_path() / "example.txt";
std::ofstream(temp.c_str()) << "Hello, World!";
auto ftime = std::filesystem::last_write_time(temp);
std::cout << std::format("File write time is {0:%R} on {0:%F}\n", ftime);
std::cout << ftime;
// File write time is 14:52 on 2024-06-22
// 2024-06-22 14:52:46.632061324
这个之前讲过,就是这个例子
#include <iostream>
#include <string_view>
#include <cstring>
struct Example {
char data[6] = "hello";
std::string_view sv = data;
~Example() {
strcpy(data, "bye");
std::cout <<"~Example: "<< sv << '\n';
}
};
int main() {
auto&& local = Example().sv; // Here we extend lifetime of entire Example
std::cout << local << '\n'; // 打印hello
}
Example并不会立即析构,生命周期被local延长了,local打印hello
列举了内核里使用count by的例子,比如
- cmd.cmd.scan_type = WMI_ACTIVE_SCAN;
- cmd.cmd.num_channels = 0;
+ cmd->scan_type = WMI_ACTIVE_SCAN;
+ cmd->num_channels = 0;
n = min(request->n_channels, 4U);
for (i = 0; i < n; i++) {
int ch = request->channels[i]->hw_value;
@@ -991,7 +988,8 @@ static int wil_cfg80211_scan(struct wiphy *wiphy,
continue;
}
/* 0-based channel indexes */
- cmd.cmd.channel_list[cmd.cmd.num_channels++].channel = ch - 1;
+ cmd->num_channels++;
+ cmd->channel_list[cmd->num_channels - 1].channel = ch - 1;
wil_dbg_misc(wil, "Scan for ch %d : %d MHz\n", ch,
request->channels[i]->center_freq);
}
...
--- a/drivers/net/wireless/ath/wil6210/wmi.h
+++ b/drivers/net/wireless/ath/wil6210/wmi.h
@@ -474,7 +474,7 @@ struct wmi_start_scan_cmd {
struct {
u8 channel;
u8 reserved;
- } channel_list[];
+ } channel_list[] __counted_by(num_channels);
} __packed;
没啥说的。就是针对变长数组做的一个patch,帮助编译器分析的,类似guard_by
就是异或链表,代码不贴了,谁这么写离我远点
字符串复制要注意/0截断问题
看不懂
介绍float特殊类型
#include <bit>
#include <concepts>
#include <cstdint>
#include <format>
#include <iomanip>
#include <iostream>
#include <stdfloat>
void explore_float16(std::uint16_t val) {
const auto f16 = std::bit_cast<std::float16_t>(val);
const auto bf16 = std::bit_cast<std::bfloat16_t>(val);
std::cout << std::format(
"{:#016b} {:#04x} {} {}f16 (0b{:01b}'{:05b}'{:010b}) {}bf16 "
"(0b{:01b}'{:08b}'{:07b})\n",
val, val, val, f16, (val >> 15) & 0b1, (val >> 10) & 0b11111,
(val & 0b1111111111), bf16, (val >> 15) & 0b1, (val >> 7) & 0b11111111,
(val & 0b1111111));
}
int main() {
explore_float16(0b0'01101'0101010101);
explore_float16(0b0'11110'1111111111);
explore_float16(0b1'11100'0011111111);
}
最近的热点消息那必然是各个群里都传的过劳事件了,大家保重身体不要太拼了,起码睡够,不睡够干不了活的,起码闭眼够