Releases: wanghenshui/cppweeklynews
C++ 中文周刊 第134期
qq群 手机qq点击进入
RSS https://github.com/wanghenshui/cppweeklynews/releases.atom
欢迎投稿,推荐或自荐文章/软件/资源等
感谢 不语 赞助
和群友讨论的指针乱分类
指针定义 九宫格 |
定义纯粹派 必须是指针 |
定义中立派 有指针的能力 |
定义自由派 没有能力也行 |
---|---|---|---|
形式纯粹派 必须有* |
void * | operator*() | "" 是char当然是指针 |
形式中立派 得有指向含义 |
智能指针 | 引用也是指针 | fd/handle也是指针 当然数组也是指针 |
形式自由派 有指针就行 |
表针也是指针 指南针更是指针 鼠标也是指针 |
针灸也是指针 东方不败自然也是指针 手指也是指针 |
广告也是指针 地址通讯录也是指针 酒店小卡片也是指针 |
资讯
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 OSDT Weekly 2023-10-11 第223期
brpc rpcz功能 存在xss跨站漏洞,建议尽快升级 1.6.1
如果无法升级,可以打补丁 https://github.com/apache/brpc/pull/2411/files
文章
GCC Preparing To Introduce "-fhardened" Security Hardening Option
Mick235711 投稿
gcc 14.1的新选项-fhardened会自动开启大部分相对轻量级的安全检查,包括3级保护(常见C库函数,比如strcpy等等的内存溢出诊断),标准库断言(这里面包括std::span和其他容器的operator[]边界检查),栈溢出检查等等
How to compare signed and unsigned integers in C++20?
介绍 std::cmp_xx的 之前也介绍过
使用 jegdb 来调试内存相关 crash
通过jemalloc meta信息反查bug,有点东西
flat_map性能调研
了解个大概
C++26静态反射提案解析
看一乐
视频
接着上期的内容
cppcon 2022
- A-Faster-Serialization-Library-Based-on-Compile-time-Reflection-and-C-20-Yu-Qi-CppCon-2022
介绍qimosmos的strucpack的。挺有意思。资料也很多,这里就不罗列了,感兴趣的可以搜一下,标记个TODO,咱们以后有时间单独讲一下
- binary-search-cppcon
优化二分
二分谁都知道吧
int lower_bound(int x) {
int l = 0, r = n - 1;
while (l < r) {
int m = (l + r) / 2;
if (t[m] >= x)
r = m;
else
l = m + 1;
}
return t[l];
}
不会写的深蹲十个
CPU执行基本流程还记得吧 fetch decode execute write。 然后CPU有流水线pipeline优化,整个流程15-20cycle
流水线优化能并发上面的步骤,什么能阻碍流水线优化?
- structural hazard 太多指令用相同的CPU地址,这个是fetch decode机器码环节,无解
- data hazard 需要等待前面的数据,这个其实是软件问题,也没啥好办法
- control hazard CPU不知道下一次该执行什么指令,存在依赖 分支miss的场景,这个是可以挽救修正的
我们回头再看二分的代码
while循环还好说,里面的if是非常重的
怎么改成无分支版本?换一种思路,我们不挪动index,我们挪动数组地址
int lower_bound(int x) {
int* base = t ,len = n;
while(len> 1) {
int half = len / 2;
if (base[half - 1] < x) {
base += half;
len = len - half; // = ceil(len / 2)
} else {
len = half; // = floor(len / 2)
}
}
return *base;
}
注意到重复代码,这样能把else去掉
int lower_bound(int x) {
int* base = t ,len = n;
while(len> 1) {
int half = len / 2;
if (base[half - 1] < x) {
base += half;
}
len -= half; // = ceil(len / 2)
}
return *base;
}
显然,这个if也能优化掉
while(len> 1) {
int half = len / 2;
base += (base[half - 1] < x) * half; // will be replaced with a "cmov"
len -= half; // = ceil(len / 2)
}
改成无分支版本,性能直接提升一大截,但是,对于大数组,性能是下降的,怎么办?prefetch
while(len > 1) {
int half = len / 2;
len -= half;
__builtin_prefetch(&base[len / 2 - 1]);
// middle of the left half
__builtin_prefetch(&base[half + len / 2 - 1]); // middle of the right half
base += (base[half - 1] < x) * half;
}
接下来要上强度了朋友们
prefetch实际上也解决不了特大数组的问题,因为二分,一开始的块必然很大,你怎么prefetch也白搭
我们需要从另一种角度解决问题,比如二叉树 堆 线段树的特性
利用树的特点,以及树的局部性友好,对于二分开头有明显的加速效果
二叉树的的特点就决定了,肯定不需要手写分支
那怎么构造堆呢
int a[n];
alignas(64) int t[n + 1]; //the original sorted array and the eytzinger array we build
//^ we need one element more because of one-based indexing
void eytzinger(int k = 1) {
static int i = 0;
if (k <= n) {
eytzinger(2 * k);
t[k] = a[i++];
eytzinger(2 * k + 1);
}
}
int lower_bound(int x) {
int k = 1;
while (k <= n) {
__builtin_prefetch(&t[k * 16]);
k = 2 * k + (t[k]< x);
}
k >>= __builtin_ffs(~k);
return t[k];
}
性能好起来了,但感觉有优化空间
- prefetch感觉有点多
- 带宽bandwidth换延迟,如果内存带宽没这么富裕怎么办
考虑b树,深度更低,局部性更好,跳转更少, 降低带宽
如何构造
const int B = 16, nblocks = (n + B - 1) / B;
int btree[nblocks][B];
int go(int k, int i) { return k * (B + 1) + i + 1; }
void build(int k = 0) {
static int t = 0;
while (k < nblocks) {
for (int i = 0; i < B; i++) {
build(go(k, i));
btree[k][i] = (t < n ? a[t++] : INT_MAX);
}
build(go(k, B));
}
}
如何找节点的二分?
// compute the "local" lower bound in a node
int rank(int x, int *node) {
for (int i = 0; i < B; i++)
if (node[i] >= x)
return i;
return B;
}
优化if
int rank(int x, int *node) {
int mask = (1 << B);
for (int i = 0; i < B; i++)
mask |= (btree[k][i] >= x) << i;
return __builtin_ffs(mask) - 1;
}
优化for循环,SIMD
typedef __m256i reg;
// compute a 8-bit mask corresponding to "<" elements
int cmp(reg x_vec, int* y_ptr) {
reg y_vec = _mm256_load_si256((reg*) y_ptr); // load 8 sorted elements
reg mask = _mm256_cmpgt_epi32(x_vec, y_vec); // compare against the key
return _mm256_movemask_ps((__m256)mask); // extract the 8-bit mask
}
int rank(reg x_vec, int *node) {
int mask = ~(
cmp(x, node) +
(cmp(x, node + 8) << 8)
);
return __builtin_ffs(mask) - 1; // alternative: popcount
}
最终代码
int lower_bound(int _x) {
int k = 0, res = INT_MAX;
reg x = _mm256_set1_epi32(_x);
while (k < nblocks) {
int i = rank(x,btree[k]);
if (i < B)// a local lower bound may not exist in the leaf node
res = btree[k][i];
k = go(k, i) ;
}
return res;
}
这个if很难受,怎么优化?
考虑b+树,说实话我已经汗流浃背了。就不考虑了
作者还探索了其他树,优化更彻底
代码在这 https://github.com/sslotin/amh-code/blob/main/binsearch
文章在这里 https://en.algorithmica.org/hpc/data-structures/binary-search/
他的博客咱们推荐过很多次。写的很好,就是太深了得研究半天,这里标记个TODO,后面再看
见识到思路其实是很巧妙的,换种角度考虑问题
- Fast-High-Quality-Pseudo-Random-Numbers-CPPCon2022-Roth-Michaels
简单来说,PCG32 Xoshiro128比标准库的rand以及mt19937快得多
- HPX-A-C-Standard-Library-for-Parallelism-and-Concurrency-CppCon-2022-1
介绍HPX的,基本每年都介绍
介绍c++20线程相关的组件,jthread就不说了
stop resource
void stoppable_func(std::stop_token st){
while(!st.stop_requested()){
do_stuff();
}
}
void stopper(std::stop_source source){
while(!done()){
do_something();
}
source.request_stop();
}
也可以定制
Data read_file(std::stop_token st, std::filesystem::path filename ){
auto handle=open_file(filename);
std::stop_callback cb(st,[&]{ cancel_io(handle);});
return read_data(handle); // blocking
}
latch
void foo(){
unsigned const thread_count=...;
std::latch done(thread_count);
std::vector<std::optional<my_data>> data(thread_count);
std::vector<std::jthread> threads;
for(unsigned i=0;i<thread_count;++i)
threads.push_back(std::jthread([&,i]{
data[i]=make_data(i);
done.count_down();
do_more_stuff();
}));
done.wait();
process_data(data);
}
barrier,感觉就是latch加上callback了
unsigned const num_threads=...;
void finish_task();
std::barrier<std::function<void()>> b(num_threads,finish_task);
void worker_thread(std::stop_token st,unsigned i){
while(!st.stop_requested()){
do_stuff(i);
b.arrive_and_wait();
}
}
mutex 一种死锁场景
class account {
std::mutex m;
currency_value balance;
public:
friend void transfer(account& from,account& to, currency_value amount) {
std::scoped_lock lock_from(from.m);
std::scoped_lock lock_to(to.m);
from.balance -= amount;
to.balance += amount;
}
};
相信各位也看出来什么场景会死锁 (同时发生互相转账)
c++20之后 scoped_lock可以同时锁多个锁
friend void transfer(account& from,account& to, currency_value amount)
{
std::scoped_lock locks(from.m,to.m);
from.balance -= amount;
to.balance += amount;
}
间接规避了死锁的问题 其实相当于两把锁合成一个来锁。
相当于要么同时锁上,要么等待。避免了两个上锁之间的间隔,也就避免了循环死锁问题。增加点耗时就是了,反正不出错
还有一些别的。没啥新鲜的东西,就不说了
- Managing APIs in Enterprise Systems
这个是通过visit来合并不同API的
场景是两个不同的Response,通过一个接口处理
- Optimization-Remarks
Rpass llvm-opt-report opt-viewer 三板斧
opt viewer godbolt上也集成了 https://godbolt.org/z/jG5jq7c9a
作者写了个optview2
如何看懂optview告警
Symptom | Probable cause | Action |
---|---|---|
Inlining Failure | Add header / forceinline /increase threshold | |
Clobbered by store | Aliasing | restrict / force type diff |
Clobbered by load | Escape | Attributes pure / const /noescape (typically before the remark site) |
Failed to move load loop invariant | Escape | All the above + copy to local |
其他场景 | 看不懂 | 最小代码段扔进godbolt再看 |
- The-Surprising-Complexity-of-Formatting-Ranges
介绍 fmt 占位符解析实现的。很长
- Type-Erasure-The-Implementation-Details-Klaus-Iglberger-CppCon-2022
介绍type erasure技术(function,any),这个技术大家都知道,还介绍了一些优化,比如SBO
所谓SBO就是给对象加一个数组buffer,当对象足够小,就用buffer placement new,避免系统new
代码大概这样
static constexpr size_t buffersize = 128UL;
static constexpr size_t alignment = 16UL;
alignas(alignment) std::array<std::byte,buffersize> buffer;
template< typename ShapeT >
Shape( ShapeT const& x ) {
using M = Model<ShapeT>;
static_assert( sizeof(M) <= buffersize, "Given type is too large" );
static_assert( alignof(M) <= alignment, "Given type is overaligned" );
::new (pimpl()) M( shape );
}
还有就是手工绑定 manual virtual dispatch MVD
去掉虚函数,...
C++ 中文周刊 第129期
内容不多
感谢不语赞助
资讯
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-08-30 第217期
cppcon即将来临之际,c++之父BS发表重要讲话
CppCon 2023 Delivering Safe C++ -- Bjarne Stroustrup
从去年开始各路人马对c++安全性的批评让BS有点坐不住了,生闷气,小甜甜变牛夫人了属于是
BS指出,当前C++的演化方向要向着安全性发展,对比存量代码相比要更安全更没有资源释放/类型安全/内存破坏等安全性问题
希望大家锐意进取,努力为更安全的C++做出自己的贡献
文章
Did you know that C++26 added Member visit
std::visit嵌入到variant里了
// C++23
std::visit(overload{
[](int i){ std::print("i={}\n", i); },
[](std::string s){ std::print("s={:?}\n", s); }
}, value);
// C++26
value.visit(overload{
[](int i){ std::print("i={}\n", i); },
[](std::string s){ std::print("s={:?}\n", s); }
});
Semi-static Conditions in Low-latency C for High Frequency Trading: Better than Branch Prediction Hints
很有意思的点子,在高频交易场景里,if分支开销太大,冷热分支也不能预测,既然不能预测,就自己动态改
代码在这里
https://github.com/maxlucuta/semi-static-conditions/
点子真的有意思
这种场景 一般来说只能经验的使用likely,或者PGO分析一下,然后加上likely
这种运行时动态改的点子,很有意思。感觉靠谱的话可以铺开
C++ Papercuts
列举了c++的缺点
const 不是默认 很坑
function能拷贝,很坑 lambda应该默认move,转移也应该move std::move_only_function赶紧快点能用
The Little Things: The Missing Performance in std::vector
老生常谈了,用户可能不想要默认构造0,能不能提供接口省掉,类似resize_for_overwrite之类的接口
我印象中folly是实现了类似的玩意
https://github.com/facebook/folly/blob/main/folly/memory/UninitializedMemoryHacks.h
template <
typename T,
typename = typename std::enable_if<
std::is_trivially_destructible<T>::value &&
!std::is_same<T, bool>::value>::type>
void resizeWithoutInitialization(std::vector<T>& v, std::size_t n) {
if (n <= v.size()) {
v.resize(n);
} else {
if (n > v.capacity()) {
v.reserve(n);
}
detail::unsafeVectorSetLargerSize(v, n);
}
}
Building C++ "Work Contracts"
设计了一种无锁的二叉堆,结合调度设计,比简单的MPMC要快
代码在这里 https://github.com/buildingcpp/network
The new static constexpr std::integral_constant idiom
std::array::size
不是static的,但他明明可以是static的,只能猥琐绕过
template <typename Rng>
void algorithm(Rng const& rng) {
constexpr auto a = Rng::size(); // error, std::array has no static size
constexpr auto b = rng.size(); // error, not a constant expression
constexpr auto c = std::tuple_size<Rng>::value; // okay, but ugly
}
标准库也不能把size接口直接改了,有ABI问题(我觉得改了也没啥吧这也要束手束脚不至于吧)
作者讨论通过interger_constant的新能力绕过
template <typename T, T Value>
struct integral_constant {
constexpr T operator()() const {
return Value;
}
};
没错,支持operator了,那么命名一个size字段就解决了,且不用改原来的size函数
template <typename T, std::size_t N>
struct array {
constexpr std::size_t size() const {
return N;
}
static constexpr std::integral_constant<std::size_t, N> size = {};
};
彳亍
Compile-time sizes for range adaptors
承接上文,怎么适配各种各样的size?
template <typename ... Rng>
struct concat_adaptor {
constexpr auto size() const
requires (tc::has_size<Rng> && ...)
{
if constexpr ((tc::has_constexpr_size<Rng> && ...))
return std::integral_constant<std::size_t, (tc::constexpr_size<Rng>() + ...)>{};
else
return std::apply([](auto const& ... base_rng) {
return (tc::size(base_rng) + ...);
}, base_rng_tuple);
}
};
template <auto Fn, typename ... Rng>
constexpr auto compute_range_adaptor_size(Rng&&... rng) {
if constexpr ((tc::has_constexpr_size<Rng> && ...)) {
auto constexpr value = Fn(tc::constexpr_size<Rng>()...);
return std::integral_constant<std::size_t, value>{};
} else {
auto const value = Fn(tc::size(std::forward<Rng>(rng))...);
return value;
}
}
template <typename ... Rng>
struct concat_adaptor {
constexpr auto size() const
requires (tc::has_size<Rng> && ...)
{
return std::apply([](auto const& ... base_rng) {
return tc::compute_range_adaptor_size<[](auto const ... n) {
return (n + ...);
}>(base_rng...);
}, base_rng_tuple);
}
};
开源项目需要人手
- asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群384042845和作者对线
- Unilang deepin的一个通用编程语言,点子有点意思,也缺人,感兴趣的可以github讨论区或者deepin论坛看一看。这里也挂着长期推荐了
- gcc-mcf 懂的都懂
新项目介绍/版本更新
如果有疑问评论最好在上面链接到评论区里评论,这样方便搜索,微信公众号有点封闭/知乎吞评论
C++ 中文周刊 第128期
欢迎投稿,推荐或自荐文章/软件/资源等
126期代码抄错,这里指正一下
consteval auto as_constant(auto value) {
return value;
}
constexpr int Calc(int x) { return 4 * x; }
// consteval int Calc(int x) { return 4 * x; }
int main() {
auto res = Calc(2);
// auto res = as_constant(Calc(2));
++res;
res = Calc(res); //编译不过
return res;
}
之前抄成consteval了,这里感谢 @fanenr 指正
另外感谢 不语
汪总
赞助
资讯
标准委员会动态/ide/编译器信息放在这里
八月提案 https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/#mailing2023-08
编译器信息最新动态推荐关注hellogcc公众号 上周更新 2023-08-16 第215期
本周没更新
cppcon 2023开始卖票
文章
Common patterns of typos in programming
代码review
memset用错,看不出问题的罚深蹲五个
int64_t FileList::VMProcess(int OpCode,
void *vParam,
int64_t iParam)
{
....
PluginInfo *PInfo = (PluginInfo *)vParam;
memset(PInfo, 0, sizeof(PInfo));
....
}
里面还有各种越界/复制粘贴错误。懒得贴了
The unexpected cost of shared pointers
一段简单的代码
static constexpr std::size_t kLineSize = (1 << 23); // 8MB
struct Line {
char _data[kLineSize];
std::mutex _mtx;
std::atomic<std::size_t> _size = 0;
};
auto line = std::make_shared<Line>();
查perf图有莫名其妙的memset
哈哈。之前咱们也提到过,数组,调用make_xx会帮你初始化,所以有memset
除了标准库给的make_unique_for_overwrite这种玩意之外,也可以定制构造函数,构造函数为空,啥也不做就行了
这个文章的标题不对了,这个其实和shared_ptr没啥关系,unique_ptr也有,本质是调用构造函数的问题,默认构造函数的问题
ow to Use Monadic Operations for std::optional
in C++23
std::optional<UserProfile> fetchFromCache(int userId);
std::optional<UserProfile> fetchFromServer(int userId);
std::optional<int> extractAge(const UserProfile& profile);
int main() {
const int userId = 12345;
const auto ageNext = fetchFromCache(userId)
.or_else([&]() { return fetchFromServer(userId); })
.and_then(extractAge)
.transform([](int age) { return age + 1; });
if (ageNext)
cout << format("Next year, the user will be {} years old", *ageNext);
else
cout << "Failed to determine user's age.\n";
}
就是介绍这几个新api的用法
其实看一下代码就懂了
C/C++ performance pitfall: int8_t, aliasing and the ways out
还是老生常谈的 char*歧义,导致不能充分优化,要么就restrict,要么就换成 别的类型来操作
别的类型,可以是你自己封装一下char,也可以是char8_t,_BitInt(8) 别用vector<int8_t> std::byte, 没用,背后还是char,
以下两篇文章来自CppMore,大家也可以关注cppMore公众号
Compile time dispatching in C++20
用fixed_stding做类型tag,tag有必要这么花哨吗
Monads in Modern C++, What, Why, and How
手把手教你熟悉monad,熟悉optional/expect/range,读者反馈讲的特好
其实就是状态链式流转,不知道为啥链式调用看起来很叼
现代C++学习——更好的单例模式
直接看代码吧,其实c++11的东西
template <typename T>
struct Singleton {
Singleton() = default;
~Singleton() = default;
// Delete the copy and move constructors
Singleton(const Singleton &) = delete;
Singleton &operator=(const Singleton &) = delete;
Singleton(Singleton &&) = delete;
Singleton &operator=(Singleton &&) = delete;
static T &get() {
static T instance{};
return instance;
}
};
Compile time string literal concatenation (or how to make your compiler cry)
fixed_string怎么连接????
硬拷呗
template <typename char_type, std::size_t N, std::size_t M>
constexpr auto concat_fixed_string(basic_fixed_string<char_type, N> l,
basic_fixed_string<char_type, M> r) noexcept {
basic_fixed_string<char_type, N + M> result;
auto it{ std::copy(l.begin(), l.end(), result.begin()) };
it = std::copy(r.begin(), r.end(), it);
*it = {};
return result;
}
代码来自 https://github.com/arturbac/small_vectors/blob/master/include/coll/basic_fixed_string.h
或者体验这个 c++20的版本 https://godbolt.org/z/Gdfnsf8Pa 也是硬来
C++ 异常与 longjmp
看个乐
Phantom and indulgent shared pointers
介绍shared_ptr各种转换之后的内存问题,控制块和实际内存块占用关系/判定
我的评价是就当不知道这个事儿吧,看了迷糊
On writing loops in PPL and continuation-passing style, part 1
On writing loops in PPL and continuation-passing style, part 2
On writing loops in PPL and continuation-passing style, part 3
看得我眼睛疼
User-defined class qualifiers in C++23
利用boost::mp11来做tag
#include <boost/mp11/algorithm.hpp>
#include <boost/mp11/list.hpp>
#include <type_traits>
template<typename T,typename... Qualifiers>
struct access: T
{
using qualifier_list=boost::mp11::mp_list<Qualifiers...>;
using T::T;
};
template<typename T, typename... Qualifiers>
concept qualified =
(boost::mp11::mp_contains<
typename std::remove_cvref_t<T>::qualifier_list,
Qualifiers>::value && ...);
// some qualifiers
struct mut;
struct synchronized;
template<typename T>
concept is_mut = qualified<T, mut>;
template<typename T>
concept is_synchronized = qualified<T, synchronized>;
struct X
{
void foo() {}
template<is_mut Self>
void bar(this Self&&) {}
template<is_synchronized Self>
void baz(this Self&&) {}
template<typename Self>
void qux(this Self&&)
requires qualified<Self, mut, synchronized>
{}
};
int main()
{
access<X, mut> x;
x.foo();
x.bar();
x.baz(); // error: associated constraints are not satisfied
x.qux(); // error: associated constraints are not satisfied
X y;
x.foo();
y.bar(); // error: associated constraints are not satisfied
access<X, mut, synchronized> z;
z.bar();
z.baz();
z.qux();
}
我觉得tag挺简单的,怎么看大家都实现的这么复杂
Transcoding Latin 1 strings to UTF-8 strings at 18 GB/s using AVX-512
SIMD时间
常规
unsigned char byte = data[pos];
if ((byte & 0x80) == 0) { // if ASCII
// will generate one UTF-8 byte
*utf8_output++ = (char)(byte);
pos++;
} else {
// will generate two UTF-8 bytes
*utf8_output++ = (char)((byte >> 6) | 0b11000000);
*utf8_output++ = (char)((byte & 0b111111) | 0b10000000);
pos++;
}
SIMD
__mmask32 nonascii = _mm256_movepi8_mask(input);
__mmask64 sixth =
_mm512_cmpge_epu8_mask(input, _mm512_set1_epi8(-64));
const uint64_t alternate_bits = UINT64_C(0x5555555555555555);
uint64_t ascii = ~nonascii;
uint64_t maskA = ~_pdep_u64(ascii, alternate_bits);
uint64_t maskB = ~_pdep_u64(ascii>>32, alternate_bits);
// interleave bytes from top and bottom halves (abcd...ABCD -> aAbBcCdD)
__m512i input_interleaved = _mm512_permutexvar_epi8(_mm512_set_epi32(
0x3f1f3e1e, 0x3d1d3c1c, 0x3b1b3a1a, 0x39193818,
0x37173616, 0x35153414, 0x33133212, 0x31113010,
0x2f0f2e0e, 0x2d0d2c0c, 0x2b0b2a0a, 0x29092808,
0x27072606, 0x25052404, 0x23032202, 0x21012000
), input);
// double size of each byte, and insert the leading byte
__m512i outputA = _mm512_shldi_epi16(input_interleaved, _mm512_set1_epi8(-62), 8);
outputA = _mm512_mask_add_epi16(outputA, (__mmask32)sixth, outputA, _mm512_set1_epi16(1 - 0x4000));
__m512i leadingB = _mm512_mask_blend_epi16((__mmask32)(sixth>>32), _mm512_set1_epi16(0x00c2), _mm512_set1_epi16(0x40c3));
__m512i outputB = _mm512_ternarylogic_epi32(input_interleaved, leadingB, _mm512_set1_epi16((short)0xff00), (240 & 170) ^ 204); // (input_interleaved & 0xff00) ^ leadingB
// prune redundant bytes
outputA = _mm512_maskz_compress_epi8(maskA, outputA);
outputB = _mm512_maskz_compress_epi8(maskB, outputB);
眼睛花了
代码在这里 https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/tree/master/2023/08/18
视频
- What is Low Latency C++? (Part 1) - Timur Doumler - CppNow 2023
- What is Low Latency C++? (Part 2) - Timur Doumler - CppNow 2023
太长了没看完。周末总结一下
开源项目需要人手
- asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群384042845和作者对线
- Unilang deepin的一个通用编程语言,点子有点意思,也缺人,感兴趣的可以github讨论区或者deepin论坛看一看。这里也挂着长期推荐了
- gcc-mcf 懂的都懂
新项目介绍/版本更新
- https://github.com/fmtlib/fmt/releases/tag/10.1.0 fmt 速度更快了。推荐升级
如果有疑问评论最好在上面链接到评论区里评论,这样方便搜索,微信公众号有点封闭/知乎吞评论
C++ 中文周刊 第127期
内容不多
资讯
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-08-16 第215期
boost 1.8.3发布,最后一个支持c++03的版本
https://www.boost.org/users/history/version_1_83_0.html
重点发布就是boost::concurrent_flat_map
Unordered:
Major update.
Added boost::concurrent_flat_map, a fast, thread-safe hashmap based on open addressing.
Sped up iteration of open-addressing containers.
In open-addressing containers, erase(iterator), which previously returned nothing, now returns a proxy object convertible to an iterator to the next element. This enables the typical it = c.erase(it) idiom without incurring any performance penalty when the returned proxy is not used.
文章
Did you know that C++26 std.format added formatting pointers ability
int main() {
auto i = 42;
std::cout << std::format("{:#018X}", reinterpret_cast<uintptr_t>(&i)); // prints 0X00007FFD9D71776C
}
有点用,但也不是很多
模板元编程的精妙之--2/16进制字面量转换为编译期字符串
asio::const_buffer b5 = 0xaB_buf;
ASIO_CHECK(b5.size() == 1);
ASIO_CHECK(memcmp(b5.data(), "\xab", 1) == 0);
asio::const_buffer b6 = 0xABcd_buf;
ASIO_CHECK(b6.size() == 2);
ASIO_CHECK(memcmp(b6.data(), "\xab\xcd", 2) == 0);
asio::const_buffer b7 = 0x01ab01cd01ef01ba01dc01fe_buf;
ASIO_CHECK(b7.size() == 12);
ASIO_CHECK(memcmp(b7.data(),
"\x01\xab\x01\xcd\x01\xef\x01\xba\x01\xdc\x01\xfe", 12) == 0);
编译期hex转字符串。很妙
代码在这里 https://github.com/chriskohlhoff/asio/blob/master/asio/include/asio/buffer.hpp#L2743
Inside STL: Smart pointers
shared_ptr有额外的计数信息, 大概这样
struct control_block {
virtual void Dispose() = 0;
virtual void Delete() = 0;
std::atomic<unsigned long> shareds;
std::atomic<unsigned long> weaks;
};
template<typename T>
struct shared_ptr {
T* object;
control_block* control;
};
template<typename T>
struct weak_ptr {
T* object;
control_block* control;
};
unique_ptr比较简单,就是指针+deleter,然后空基类优化(compressed_pair)
Inside STL: The shared_ptr constructor vs make_shared
讲智能指针的内存布局
shared_ptr是有额外的信息的,这部分信息需要一个分配
auto p = std::shared_ptr<S>(new S());
auto p = std::make_shared<S>();
这两种构造,第一种,由于是接管,S的内存和shared_ptr内部信息不是连续的,这对局部性缓存是不友好的
Inside STL: The shared_ptr constructor and enable_shared_from_this
enable_shared_from_this怎么实现的?大概思路就是weak_ptr
template<typename T>
struct enable_shared_from_this {
using esft_detector = enable_shared_from_this;
std::weak_ptr<T> weak_this;
std::weak_ptr<T> weak_from_this()
{ return weak_this; }
std::shared_ptr<T> shared_from_this()
{ return weak_this.lock(); }
};
weak_this由谁来赋值?肯定是shared_ptr拉
template<typename T, typename D>
struct shared_ptr {
shared_ptr(T* ptr)
{
... do the usual stuff ...
/* Here comes enable_shared_from_this magic */
if constexpr (supports_esft<T>::value) {
using detector = T::esft_detector;
ptr->detector::weak_this = *this;
}
}
... other constructors and stuff ...
};
只要enable_shared_from_this实现 esft_detector就行了,类似这样
template<typename T, typename = void>
struct supports_esft : std::false_type {};
template<typename T>
struct inline bool supports_esft<T,
std::void_t<typename T::esft_detector>>
: std::true_type {};
这样继承的类都有特化的shared_ptr构造
C++23: mdspan
编译器还没加上这个能力,可以用这个体验 https://github.com/kokkos/mdspan,在线 https://godbolt.org/z/Mxa7cej1a
之前也讲过很多次了,直接贴代码吧
std::array numbers {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
stdex::mdspan<int, stdex::extents<int, 2, 5>, stdex::layout_right> mdspanOfNumbers {numbers.data()};
for (size_t rowIndex=0; rowIndex < mdspanOfNumbers.extent(0); ++rowIndex) {
for (size_t columnIndex=0; columnIndex < mdspanOfNumbers.extent(1); ++columnIndex) {
std::cout << mdspanOfNumbers[rowIndex, columnIndex] << ' ';
}
std::cout << '\n';
}
/*
1 2 3 4 5
6 7 8 9 10
*/
std::array numbers {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
stdex::mdspan<int, stdex::extents<int, 2, 5>, stdex::layout_left> mdspanOfNumbers {numbers.data()};
for (size_t columnIndex=0; columnIndex < mdspanOfNumbers.extent(0); ++columnIndex) {
for (size_t rowIndex=0; rowIndex < mdspanOfNumbers.extent(1); ++rowIndex) {
std::cout << mdspanOfNumbers[columnIndex, rowIndex] << ' ';
}
std::cout << '\n';
}
/*
1 3 5 7 9
2 4 6 8 10
*/
Transcoding UTF-8 strings to Latin 1 strings at 18 GB/s using AVX-512
SIMD时间
常规
uint8_t leading_byte = data[pos]; // leading byte
if (leading_byte < 0b10000000) {
*latin_output++ = leading_byte;
pos++;
} else if ((leading_byte & 0b11100000) == 0b11000000) {
*latin_output++ = (leading_byte & 1) << 6 | (data[pos + 1]);
pos += 2;
}
simd
__m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
__mmask64 leading = _mm512_cmpge_epu8_mask(input, _mm512_set1_epi8(-64));
__mmask64 bit6 = _mm512_mask_test_epi8_mask(leading, input, _mm512_set1_epi8(1));
input = _mm512_mask_sub_epi8(input, (bit6<<1) | next_bit6, input, _mm512_set1_epi8(-64));
next_bit6 = bit6 >> 63;
__mmask64 retain = ~leading;
__m512i output = _mm512_maskz_compress_epi8(retain, input);
int64_t written_out = _popcnt64(retain);
__mmask64 store_mask = (1ULL << written_out) - 1;
_mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output);
完整代码 https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/tree/master/2023/08/11
Some C++20 ranges aren’t const-iterable
range的坑,没有完全零开销,需要拷贝
Writing custom C++20 coroutine systems
手把手教你写协程
Parameter Passing in Flux versus Ranges
他也写了个range库 flux,对比了一下和range的差异,优缺点
How to convert an enum to string in C++
推荐使用magic_enum
视频
CppNow基本上一周出几个视频,列个有意思的
The New C++ Library: Strong Library Foundation for Future Projects - Jonathan Müller & Arno Schödl
ppt在这里 https://www.jonathanmueller.dev/talk/think-cell-library/
代码在这里 https://github.com/think-cell/think-cell-library
非常通用的组件介绍
比如std::exchange
template <typename T>
class my_smart_ptr {
T* _ptr;
public:
my_smart_ptr(my_smart_ptr&& other) noexcept
: _ptr(other._ptr) {
other._ptr = nullptr;
}
};
用上exchange
template <typename T>
class my_smart_ptr {
T* _ptr;
public:
my_smart_ptr(my_smart_ptr&& other) noexcept
: _ptr(std::exchange(other._ptr, nullptr))
{}
};
这种语义是upsert,不只指针,其他value也可以这样优化,代码干净
所以实现了tc::change
void tc::optional<T>::reset() {
if (_has_value) {
_has_value = false;
value().~T();
}
}
使用tc::change
void tc::optional<T>::reset() {
if (tc::change(_has_value, false)) {
value().~T();
}
}
为啥为了省一个if这么麻烦?,举个例子,异常+重入
void foo1() {
…
if (dirty) {
clean();
dirty = false;
}
…
}
void foo2() {
…
if (tc::change(dirty, false)) {
try {
clean();
} catch (...) {
dirty = true;
throw;
}
}
…
}
foo2比foo1更健壮点,避免了同时clean的场景
假如多个线程都在clean,而clean时间较长,dirty更新不及时,就更新了多次
foo2就避免了这种情况
还有一些range string的就不列了,感兴趣的可以看看
开源项目需要人手
- asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群384042845和作者对线
- Unilang deepin的一个通用编程语言,点子有点意思,也缺人,感兴趣的可以github讨论区或者deepin论坛看一看。这里也挂着长期推荐了
- gcc-mcf 懂的都懂
工作招聘
有没有数据库相关的工作推荐我一下,我要失业了快(能远程更好)
如果有疑问评论最好在上面链接到评论区里评论,这样方便搜索,微信公众号有点封闭/知乎吞评论
C++ 中文周刊 第125期
资讯
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-08-02 第213期
文章
写的不错
#include <ranges>
#include <vector>
#include <iostream>
int main() {
std::vector<int> r = {1, 2, 3, 4, 5};
auto reversed = r | std::views::reverse;
for (auto i : reversed)
std::cout << i << " ";
// same as:
//for (auto i : r | std::views::reverse)
//std::cout << i << " ";
std::cout << '\n';
std::ranges::reverse_view rv(r);
for (auto i : rv)
std::cout << i << " ";
}
探讨这玩意是怎么实现的 #include
#include
#include
int main() {
std::vector r = {1, 2, 3, 4, 5};
auto reversed = r | std::views::reverse;
for (auto i : reversed)
std::cout << i << " ";
// same as:
//for (auto i : r | std::views::reverse)
//std::cout << i << " ";
std::cout << '\n';
std::ranges::reverse_view rv(r);
for (auto i : rv)
std::cout << i << " ";
}
帮你把结构体组成的数组这种行存转换为数组结构体模式列存
是代码帮你生成tuple。有点繁琐
static_assert(false, std::string_view{"message"});
既然能string_view 也就可以fmt
没啥说的。能省点空间
template<typename T1, typename T2>
struct compressed_pair {
[[no_unique_address]] T1 first;
[[no_unique_address]] T2 second;
};
介绍string对小字符串做的优化,类似clang::string的实现分析
光速入门
亮点还是扩容不需要迁移,很有意思
看不懂
视频
meeting cpp推荐了十个,我把我感兴趣的列一下
大概意思是利用constexpr算某些场景比elgen快
Filipp Gelman - What I learned From Sockets
讲了一堆设计。没有代码,听个乐
开源项目需要人手
- asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群384042845和作者对线
- Unilang deepin的一个通用编程语言,点子有点意思,也缺人,感兴趣的可以github讨论区或者deepin论坛看一看。这里也挂着长期推荐了
- gcc-mcf 懂的都懂
新项目介绍/版本更新
std::embed没有之前能对付用的替代品
这哥们写了个find,和bfs一个原理,并发宽度优先遍历,find默认是dfs。
实际效果比find快百倍。很有意思。可以装一个玩玩
一个fat pointer实现
一个实现了work steal的线程池
关于 work steal,道理都懂,可能还需要看看https://www.youtube.com/watch?v=iLHNF7SgVN4&ab_channel=CppCon 了解下
工作招聘
有没有可以远程的工作可以推荐给我,我也快失业了
API Design
最近群里收集了一些想要讨论的点子,大家比较关注API设计,我这里把一些资料放在这里。可能后面会做个视频
https://www.youtube.com/watch?v=zL-vn_pGGgY&ab_channel=CppCon
https://www.youtube.com/watch?v=2UmDvg5xv1U&ab_channel=CppNow
https://www.acodersjourney.com/top-25-cplusplus-api-design-mistakes-and-how-to-avoid-them/
如果有疑问评论最好在上面链接到评论区里评论,这样方便搜索,微信公众号有点封闭/知乎吞评论
C++ 中文周刊 第124期
欢迎投稿,推荐或自荐文章/软件/资源等
感谢 振羽
不语
赞助
资讯
标准委员会动态/ide/编译器信息放在这里
七月邮件列表
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/#mailing2023-07
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-07-26 第212期
文章
有点意思
struct foo {
auto bar(int v) { return v; }
};
static_assert(42 == std::bind_front<&foo::bar>(foo{}, 42));
不懂啥意思
讲浮点数压缩的,没看懂。这里标记个TODO后面研究一下
SIMD时间,这回不贴代码了。https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/tree/master/2023/07/27
作者想把
constexpr std::tuple<int, double, int, double, float> { 1, 2.0, 1, 3.0, 2.0 }
变成 constexpr std::tuple<int, double, double, float> { 1, 2.0, 3.0, 2.0 }
简单方案就是boost::mp_list
,或者看这个 https://stackoverflow.com/questions/55941964/how-to-filter-duplicate-types-from-tuple-c
但作者想要的是如果值相等才把类型吃掉,有点点难
直接贴代码吧,我看不懂,作者推导了半天
#include <functional>
#include <tuple>
template<typename T1, typename T2>
consteval bool JMEq(const T1& v1, const T2& v2) {
if constexpr (!std::is_same_v<T1, T2>)
return false;
else
return v1 == v2;
}
template<const auto& F>
constexpr auto Nub() {
constexpr auto tup = F();
constexpr auto indices = std::make_index_sequence<std::tuple_size_v<decltype(tup)>> {};
return [&]<std::size_t... Ix>(std::index_sequence<Ix...>)
{
return std::tuple_cat ([&]
{
constexpr auto index = Ix;
constexpr auto element = std::get<index>(tup);
if constexpr (((JMEq(element, std::get<Ix>(tup)) && Ix < index) || ...))
return std::tuple {};
else
return std::tuple { element };
} ()...);
} (indices);
}
constexpr auto structuralize(auto tuple){
return std::apply([]<typename... Args>(Args... args) { return ST<Args...>(args...); }, tuple);
}
constexpr std::tuple<int, double, int, double, float> input { 1, 2.0, 1, 3.0, 2.0 };
constexpr std::tuple<double, int, double, float> expected { 2.0, 1, 3.0, 2.0 };
constexpr auto actual = Nub<structuralize(input)>();
static_assert(expected == actual);
简单说就是lambda是对象,有时候不捕获的lambda也是对象,和函数指针差不多,太浪费了,于是引入了static lambda,static operator
auto isEven = [](int i) static {return i % 2 == 0;};
如果捕获会报错
// ERROR: 'static' lambda specifier with lambda capture
auto isDivisableBy = [operand](int i) static {return i % operand == 0;};
其实static operator[]原因也差不多。代码就不列举了
Perfect forwarding forwards objects, not braced things that are trying to become objects
forward对于 initializer_list对象行不通,initializer_list真该死啊
template<typename T, typename... Args>
std::unique_ptr<T> make_unique(Args&&... args){
return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
}
这样就不行
struct Point {
int x, y;
};
struct Segment {
Segment(Point p1, Point p2);
};
void test() {
// This works
Segment s({ 1, 1 }, { 2, 2 });
// This doesn't
auto p = std::make_unique<Segment>(
{ 1, 1 }, { 2, 2 });
}
封装一层吧
struct Segment {
Segment(Point p1, Point p2);
template<typename Arg1 = Point,
typename Arg2 = Point>
static std::unique_ptr<Segment> make_unique(
Arg1&& p1, Arg2&& p2) {
return std::make_unique<Segment>(
std::forward<Arg1>(p1),
std::forward<Arg2>(p2));
}
};
这样就行了
- Why does IAsyncAction or IAsyncOperation.GetResults() produce a E_ILLEGAL_METHOD_CALL
- On the various ways of creating Windows Runtime delegates in C++/WinRT and C++/CX
讲winrt的。不说了
视频
本周视频很多 cppnow 2023来了。基本上讲的是今年cppcon的前瞻内容
这个是之前他写的博客,直接做成视频讲了一遍,就是讲用tag dispatch替换switch加速的
周末有空我就传一下
这个华人哥们讲的也有点意思
介绍numa的,有点意思
敏感字符串过滤?hash绕过
感觉之前说过,还是布局之类的。没有细看
这个教程也不错,手把手带你了解协程以及一个task模型
讲高频交易的,很干货。值得一看
讲基于epoch的内存回收的。epoch推进技术其实已经不是新东西了。到处都可见,或多或少要了解一下。了解背景之后值得看看
开源项目需要人手
- asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群384042845和作者对线
- Unilang deepin的一个通用编程语言,点子有点意思,也缺人,感兴趣的可以github讨论区或者deepin论坛看一看。这里也挂着长期推荐了
- gcc-mcf 懂的都懂
新项目介绍/版本更新
- https://github.com/bloomberg/blazingmq 有点意思
- mold 2.0发布 https://github.com/rui314/mold/releases/tag/v2.0.0 之前商业化license发展不是很顺利,又改成MIT了,寻求赞助 开源真难搞啊
工作招聘
- 求不需要算法题笔试的可以远程的工作,我的邮箱[email protected]
如果有疑问评论最好在上面链接到评论区里评论,这样方便搜索,微信公众号有点封闭/知乎吞评论
C++ 中文周刊 第123期
欢迎投稿,推荐或自荐文章/软件/资源等
本周内容不多
资讯
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-07-19 第211期
文章
Inside boost::concurrent_flat_map
boost 1.83会有个 boost::concurrent_flat_map
, 这篇文章带你了解设计思路。还是开地址法,还并发,还快
测了一下是吊锤tbb的。没测和folly::concurrentHashMap的比较
其实这个文章可以展开讲讲
浏览器安卓端崩溃最终怀疑是CPU的问题
总之就是利用CPU流水线先走一个快速路径,再检查条件,也就是所谓的分支预测
你可能想问了,我能不能让CPU别预测,别显得你多聪明了老老实实算就完了
__builtin_unpredictable() https://clang.llvm.org/docs/LanguageExtensions.html#builtin-unpredictable
基于async_simple的。有点意思
了解一下PGO流程
看个热闹
手把手教你写个grpc server
SIMD时间,字符转数字
常规, 一个一个比
if (ch >= '0' && ch <= '9')
d = ch - '0';
else if (ch >= 'A' && ch <= 'V')
d = ch - 'A' + 10;
else if (ch >= 'a' && ch <= 'v')
d = ch - 'a' + 10;
else
return -1;
进化版本,唉我会打表了
size_t base32hex_simple(uint8_t *dst, const uint8_t *src) {
static const uint8_t table[256] = {
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 32, 32, 32, 32, 32, 32,
32, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
};
bool valid = true;
const uint8_t *srcinit = src;
do {
uint64_t r = 0;
for (size_t i = 0; i < 8; i++) {
uint8_t x = table[*src];
src++;
if (x > 31) {
r <<= (5 * (8 - i));
valid = false;
break;
}
r <<= 5;
r |= x;
}
r = (unsigned long int)_bswap64((long long int)r);
uint64_t rs = ((uint64_t)r >> (3 * 8));
memcpy(dst, (const char *)&rs, 8);
dst += 5;
} while (valid);
return (size_t)(src - srcinit);
}
超神版本 SIMD直接贴代码
size_t base32hex_simd(uint8_t *dst, const uint8_t *src) {
bool valid = true;
const __m128i delta_check =
_mm_setr_epi8(-16, -32, -48, 70, -65, 41, -97, 9, 0, 0, 0, 0, 0, 0, 0, 0);
const __m128i delta_rebase =
_mm_setr_epi8(0, 0, 0, -48, -55, -55, -87, -87, 0, 0, 0, 0, 0, 0, 0, 0);
const uint8_t *srcinit = src;
do {
__m128i v = _mm_loadu_si128((__m128i *)src);
__m128i hash_key = _mm_and_si128(_mm_srli_epi32(v, 4), _mm_set1_epi8(0x0F));
__m128i check = _mm_add_epi8(_mm_shuffle_epi8(delta_check, hash_key), v);
v = _mm_add_epi8(v, _mm_shuffle_epi8(delta_rebase, hash_key));
unsigned int m = (unsigned)_mm_movemask_epi8(check);
if (m) {
int length = __builtin_ctz(m);
if (length == 0) {
break;
}
src += length;
__m128i zero_mask =
_mm_loadu_si128((__m128i *)(zero_masks + 16 - length));
v = _mm_andnot_si128(zero_mask, v);
valid = false;
} else { // common case
src += 16;
}
v = _mm_maddubs_epi16(v, _mm_set1_epi32(0x01200120));
v = _mm_madd_epi16(
v, _mm_set_epi32(0x00010400, 0x00104000, 0x00010400, 0x00104000));
// ...00000000`0000eeee`efffffgg`ggghhhhh`00000000`aaaaabbb`bbcccccd`dddd0000
v = _mm_or_si128(v, _mm_srli_epi64(v, 48));
v = _mm_shuffle_epi8(
v, _mm_set_epi8(0, 0, 0, 0, 0, 0, 12, 13, 8, 9, 10, 4, 5, 0, 1, 2));
/* decoded 10 bytes... but write 16 cause why not? */
_mm_storeu_si128((__m128i *)dst, v);
dst += 10;
} while (valid);
return (size_t)(src - srcinit);
}
还有SWAR版本,我直接贴仓库连接,不贴代码了 https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/2023/07/20/src/base32.c
- How to clone a Windows Runtime map in the face of possible concurrent modification, part 2
- How to clone a Windows Runtime map in the face of possible concurrent modification, part 1
- How to clone a Windows Runtime vector in the face of possible concurrent modification, part 4
- Cloning a Windows Runtime vector in the face of possible concurrent modification, denial of service?
- How to clone a Windows Runtime vector in the face of possible concurrent modification, part 3
raymond chen的window时间,看不懂
开源项目需要人手
- asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群384042845和作者对线
- Unilang deepin的一个通用编程语言,点子有点意思,也缺人,感兴趣的可以github讨论区或者deepin论坛看一看。这里也挂着长期推荐了
- gcc-mcf 懂的都懂
新项目介绍/版本更新
- https://github.com/jgaa/glad 一个基于ASIO的cache server。看个乐呵
- https://github.com/jgaa/nsblast 一个dns server
如果有疑问评论最好在上面链接到评论区里评论,这样方便搜索,微信公众号有点封闭/知乎吞评论
C++ 中文周刊 第122期
欢迎投稿,推荐或自荐文章/软件/资源等
资讯
标准委员会动态/ide/编译器信息放在这里
#include cleanup in Visual Studio
支持提示删除没用的头文件,我记得clion是不是早就支持了?
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-07-12 第210期
支持grpc-plugin了,那么别的支持也能实现,感觉更强了,可以用大项目试试
文章
auto $dollar_sign = 42;
auto @commerical_at = 42;
auto `grave_accent = 42;
没懂有啥用
- 字符串截断问题
在某群和群友讨论 std::string能不能存带 '/0'的字符,我之前遇到过坑,就想当然的认为不能,然后被群友教育了一种用法
#include <iostream>
#include <vector>
#include <string>
int main() {
std::string s1 = std::string{"\0\0\0\0\0\0\0\0\0\0\0\0\0\0123"};
std::cout<<"---\n";
std::cout<< s1.size()<<"\n";
std::cout<<"---\n";
std::string s2;
s2.append("\0");
s2.append("1");
std::cout<<"---\n";
std::cout<< s2.size()<<"\n";
std::cout<< s2<<"\n";
std::cout<<"---\n";
std::vector<char> v = {'\0','\0','2','3','\0','5','6'};
std::string s3(v.begin(), v.end());
std::cout<<"---\n";
std::cout<< s3.size()<<"\n";
std::cout<< s3<<"\n";
std::cout<<"---\n";
std::string s4 = std::string{"\0\0\0\0\0\0\0\0\0\0\0\0\0\0a23", 17};
std::cout<<"---\n";
std::cout<< s4.size()<<"\n";
std::cout<< s4<<"\n";
std::cout<<"---\n";
return 0;
}
s1 s2都是截断的,但是s3不是,想要std::string包含/0只能通过迭代器构造,不能从c字符串构造,因为c字符串复制判断结尾用的/0
同理,s4加上了长度构造,就包含/0不会截断了。c字符串的缺陷,没有长度信息
我这里先入为主觉得所有构造都这样了。稍为想的远一点
boost::concurrent_flat_map
开链的并发hashmap速度不输tbb boost 1.83发布
有点意思
经典的友元函数注入
template<int N> struct tag{};
template<typename T, int N>
struct loophole_t {
friend auto loophole(tag<N>) { return T{}; };
};
auto loophole(tag<0>);
sizeof(loophole_t<std::string, 0> );
statc_assert(std::is_same<std::string, decltype(loophole(tag<0>{})) >::value, "same");
这玩意属于缺陷,说不定以后就修了,为啥要讲这个老文章?看下面这个
看代码
//Conceptify it (Requires C++20)
struct Drawable {
void draw(std::ostream &out) const {
te::call([](auto const &self, auto &out)
-> decltype(self.draw(out)) { self.draw(out); }, *this, out);
}
};
struct Square {
void draw(std::ostream &out) const { out << "Square"; }
};
template<te::conceptify<Drawable> TDrawable>
void draw(TDrawable const &drawable) {
drawable.draw(std::cout);
}
int main() {
auto drawable = Square{};
draw(drawable); // prints Square
}
我咋感觉te::conceptify<Drawable>
看上去和实现concept没区别?
这种实现能定义te::poly<Drawable>
保存在容器里然后遍历?
实现原理是通过一个mapping类注册类型和typelist,typelist绑上lambda, 我说的不精确,可以看原文
call注册
template <...>
constexpr auto call(const TExpr expr, const I &interface, Ts &&... args) {
...
return detail::call_impl<I>(...);
}
template <...>
constexpr auto call_impl(...) {
void(typename mappings<I, N>::template set<type_list<TExpr, Ts...> >{});
return ...;
}
mapping是这样的
template <class, std::size_t>
struct mappings final {
friend auto get(mappings);
template <class T>
struct set {
friend auto get(mappings) { return T{}; }
};
};
通过这个友元注入get 来记住T类型,也就是之前保存的typelist,构造出lambda
template <class T, class TExpr, class... Ts>
constexpr auto requires_impl(type_list<TExpr, Ts...>)
-> decltype(&TExpr::template operator()<T, Ts...>);
template <class I, class T, std::size_t... Ns>
constexpr auto requires_impl(std::index_sequence<Ns...>) -> type_list<
decltype(requires_impl<I>(decltype(get(mappings<T, Ns + 1>{})){}))...>;
} // namespace detail
template <class I, class T>
concept bool conceptify = requires {
detail::requires_impl<I, T>(
std::make_index_sequence<detail::mappings_size<T, I>()>{});
};
挺巧妙,友元函数注入+typelist绑定
问题在于lambda每次都是构造的,可能有小对象问题,但愿编译器能优化掉
find还要判断结果,很烦,类似optional,封装一下
#include <iostream>
#include <vector>
#include <algorithm>
#include <optional>
namespace cwt {
// first we craete a type which will hold our find result
template<typename T>
class find_result {
public:
find_result() = default;
find_result(T value) : m_value{value} {}
// this is the and_then method which gets a callback
template<typename Func>
const find_result<T>& and_then(const Func&& func) const {
// we only call the callback if we have a value
if (m_value.has_value()) {
func(m_value.value());
}
// and to further add or_else we return *this
return *this;
}
// almost same here, just with return type void
template<typename Func>
void or_else(const Func&& func) const {
if (!m_value.has_value()) {
func();
}
}
private:
// and since we don't know if we found a value
// we hold possible one as optional
std::optional<T> m_value{std::nullopt};
};
// this my find function, where try to find a value in a vector
template<typename T>
find_result<T> find(const std::vector<T>& v, const T value) {
// like before we use the iterator
auto it = std::find(v.begin(), v.end(), value);
// and if we didn't find the value we return
// find_result default constructed
if (it == v.end()) {
return find_result<T>();
} else {
// or with the found value
return find_result<T>(*it);
}
}
} // namespace cwt
int main() {
// lets create a simple vector of int values
std::vector<int> v = {1,2,3,4};
// we use our find function
// and since we return find_result<int>
// we can append or call and_then or_else directly
cwt::find(v, 2)
.and_then([](int result){ std::cout << "found " << result << '\n'; })
.or_else([](){ std::cout << "found nothing\n"; })
;
cwt::find(v, 10)
.and_then([](int result){ std::cout << "found " << result << '\n'; })
.or_else([](){ std::cout << "found nothing\n"; })
;
return 0;
}
看个乐
SIMD环节,需求,把 "20141103 012910"转成数字0x20141103012910
#include <x86intrin.h> // Windows: <intrin.h>
#include <string.h>
// From "20141103 012910", we want to get
// 0x20141103012910
uint64_t extract_nibbles(const char* c) {
uint64_t part1, part2;
memcpy(&part1, c, sizeof(uint64_t));
memcpy(&part2 , c + 7, sizeof(uint64_t));
part1 = _bswap64(part1);
part2 = _bswap64(part2);
part1 = _pext_u64(part1, 0x0f0f0f0f0f0f0f0f);
part2 = _pext_u64(part2, 0x0f000f0f0f0f0f0f);
return (part1<<24) | (part2);
}
汇编
movbe rax, QWORD PTR [rdi]
movbe rdx, QWORD PTR [rdi+7]
movabs rcx, 1085102592571150095
pext rax, rax, rcx
movabs rcx, 1080880467920490255
sal rax, 24
pext rdx, rdx, rcx
or rax, rdx
pext在amd zen3架构上开销很大,但本身也非常小巧了
ARM平台
#include <arm_neon.h>
// From "20141103 012910", we want to get
// 0x20141103012910
uint64_t extract_nibbles(const char *c) {
const uint8_t *ascii = (const uint8_t *)(c);
uint8x16_t in = vld1q_u8(ascii);
// masking the high nibbles,
in = vandq_u8(in, vmovq_n_u8(0x0f));
// shuffle the bytes
const uint8x16_t shuf = {14, 13, 12, 11, 10, 9, 7, 6,
5, 4, 3, 2, 1, 0, 255, 255};
in = vqtbl1q_u8(in, shuf);
// then shift/or
uint16x8_t ins =
vsraq_n_u16(vreinterpretq_u16_u8(in),
vreinterpretq_u16_u8(in), 4);
// then narrow (16->8),
int8x8_t packed = vmovn_u16(ins);
// extract to general register.
return vget_lane_u64(vreinterpret_u64_u16(packed), 0);
}
汇编
adrp x8, .LCPI0_0
ldr q1, [x0]
movi v0.16b, #15
ldr q2, [x8, :lo12:.LCPI0_0]
and v0.16b, v1.16b, v0.16b
tbl v0.16b, { v0.16b }, v2.16b
usra v0.8h, v0.8h, #4
xtn v0.8b, v0.8h
fmov x0, d0
SIMD环节,在一个字符串数组里找子串是否存在
普通实现,bsearsh
std::string *lookup_symbol(const char *input) {
return bsearch(input, strings.data(), strings.size(),
sizeof(std::string), compare);
}
或者trie
或者SIMD
天书
参考阅读 https://trent.me/is-prefix-of-string-in-table/ 天书
这个讲的是局部性问题
比如
class my_class {
int a;
int b;
...
int z;
};
int sum_all(my_class* m, int n) {
int sum = 0;
for (int i = 0; i < n; i++) {
sum += m[i].a + m[i].z;
...
C++ 中文周刊 第121期
RSS https://github.com/wanghenshui/cppweeklynews/releases.atom
欢迎投稿,推荐或自荐文章/软件/资源等
本周内容不多
资讯
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-07-05 第209期
文章
虽然是老文了,对于清晰概念还是有一定的帮助的。讨论了很多基础概念,以及各种时序场景,我觉得都可以复述一遍这个演讲,增加自己理解
能讲给别人进步最快,各位可能都讲过题,要不要挑战一下
template <template <class...> class TList, class TEvent, class... TEvents, class T, class TExpr>
constexpr auto dispatch(TList<TEvent, TEvents...>, const int id, const T& data,
const TExpr& expr) -> decltype(expr(TEvent{data})) {
switch (id) {
case TEvent::id:
return expr(TEvent{data});
default:
if constexpr (sizeof...(TEvents) > 0) {
return dispatch(TList<TEvents...>{}, id, data, expr);
}
}
return {};
}
不太懂有啥用
overload谁都会写,但是可能存在隐式转换,比如
template<class... Ts>
struct overload : Ts... {
using Ts::operator()...;
};
template<class... Ts>
overload(Ts...) -> overload<Ts...>;
int main() {
const std::variant<int, bool> v{true};
std::visit(
overload{
[](int val) { std::cout << val; },
[](bool val) { std::cout << std::boolalpha << val; }, // 1
},
v);
}
这里的int bool模糊,可能发生转换。如果把1这行注释掉,visit一样能遍历,走int哪个分支。发生转换了我超!
如何杀死这种异常?
template<class...>
constexpr bool always_false_v = false;
template<class... Ts>
struct overload : Ts... {
using Ts::operator()...;
template<typename T>
constexpr void operator()(T) const {
static_assert(always_false_v<T>, "Unsupported type");
// c++23 static_assert(false, "Unsupported type");
}
};
template<class... Ts>
overload(Ts...) -> overload<Ts...>;
int main() {
const std::variant<int, bool> v{true};
std::visit(overload{
[](int val) { std::cout << val; },
[](bool val) { std::cout << std::boolalpha << val; },
},
v);
}
正常走Ts的operator(),如果类型不匹配(隐式转换的前提是没有别的实现),最佳匹配是static_assert的那个实现,匹配中,编译报错
自己的overload都改一下,这个还是值得一用的
这个问题其实和上面差不多,指针有隐式转换成bool的风险,可能一不小心写出bug
你比如有个类是这样的
class string_literal {
public:
operator const char*() const noexcept {
return m_ptr;
}
};
问题来了,char*转bool
int main() {
string_literal str;
if (str) {} //char* 转bool我超
str + 1; // char*取偏移我超
}
这种代码,如果真的有bug被引入,都不想琢磨,太恶心了
怎么修?限制转换,强制约束类型,和上面的方案不谋而合
class string_literal{
public:
template <std::same_as<const char*> T>
operator T() const noexcept
{
return m_ptr;
}
};
c++20的concept感觉没有大规模推开,大家为了旧代码没升级,没用上
代码在这里 https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/2023/07/01/src/sse_date.c
解析时间,常规写法
bool parse_time(const char *date_string, uint32_t *time_in_second) {
const char *end = NULL;
struct tm tm;
if ((end = strptime(date_string, "%Y%m%d%H%M%S", &tm)) == NULL) {
return false;
}
*time_in_second = (uint32_t)mktime_from_utc(&tm);
return true;
}
SSE代码我看不懂,直接贴了
bool sse_parse_time(const char *date_string, uint32_t *time_in_second) {
// We load the block of digits. We subtract 0x30 (the code point value of the
// character '0'), and all bytes values should be between 0 and 9,
// inclusively. We know that some character must be smaller that 9, for
// example, we cannot have more than 59 seconds and never 60 seconds, in the
// time stamp string. So one character must be between 0 and 5. Similarly, we
// start the hours at 00 and end at 23, so one character must be between 0
// and 2. We do a saturating subtraction of the maximum: the result of such a
// subtraction should be zero if the value is no larger. We then use a special
// instruction to multiply one byte by 10, and sum it up with the next byte,
// getting a 16-bit value. We then repeat the same approach as before,
// checking that the result is not too large.
//
__m128i v = _mm_loadu_si128((const __m128i *)date_string);
// loaded YYYYMMDDHHmmSS.....
v = _mm_xor_si128(v, _mm_set1_epi8(0x30));
// W can use _mm_sub_epi8 or _mm_xor_si128 for the subtraction above.
// subtracting by 0x30 (or '0'), turns all values into a byte value between 0
// and 9 if the initial input was made of digits.
__m128i limit =
_mm_setr_epi8(9, 9, 9, 9, 1, 9, 3, 9, 2, 9, 5, 9, 5, 9, -1, -1);
// credit @aqrit
// overflows are still possible, if hours are in the range 24 to 29
// of if days are in the range 32 to 39
// or if months are in the range 12 to 19.
__m128i abide_by_limits = _mm_subs_epu8(v, limit); // must be all zero
__m128i byteflip = _mm_setr_epi64((__m64)0x0607040502030001ULL,
(__m64)0x0e0f0c0d0a0b0809ULL);
__m128i little_endian = _mm_shuffle_epi8(v, byteflip);
__m128i limit16 = _mm_setr_epi16(0x0909, 0x0909, 0x0102, 0x0301, 0x0203,
0x0509, 0x0509, -1);
__m128i abide_by_limits16 =
_mm_subs_epu16(little_endian, limit16); // must be all zero
__m128i combined_limits =
_mm_or_si128(abide_by_limits16, abide_by_limits); // must be all zero
// We want to disallow 0s for days and months... and we want to make
// sure that we don't go back in time prior to 1900.
__m128i limit16_low = _mm_setr_epi16(0x0109, 0, 0x0001, 0x0001, 0, 0, 0, 0);
__m128i abide_by_limits16_low =
_mm_subs_epu16(limit16_low, little_endian); // must be all zero
combined_limits = _mm_or_si128(combined_limits, abide_by_limits16_low);
if (!_mm_test_all_zeros(combined_limits, combined_limits)) {
return false;
}
// 0x000000SS0mmm0HHH`00DD00MM00YY00YY
//////////////////////////////////////////////////////
// pmaddubsw has a high latency (e.g., 5 cycles) and is
// likely a performance bottleneck.
/////////////////////////////////////////////////////
const __m128i weights = _mm_setr_epi8(
// Y Y Y Y m m d d H H M M S S - -
10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 0, 0);
v = _mm_maddubs_epi16(v, weights);
uint64_t hi = (uint64_t)_mm_extract_epi64(v, 1);
uint64_t seconds = (hi * 0x0384000F00004000) >> 46;
uint64_t lo = (uint64_t)_mm_extract_epi64(v, 0);
uint64_t yr = (lo * 0x64000100000000) >> 48;
// We checked above that dy and mo are >= 1
uint64_t mo = ((lo >> 32) & 0xff) - 1;
uint64_t dy = (uint64_t)_mm_extract_epi8(v, 6);
bool is_leap_yr = is_leap_year((int)yr);
if (dy > (uint64_t)mdays[mo]) { // unlikely branch
if (mo == 1 && is_leap_yr) {
if (dy != 29) {
return false;
}
} else {
return false;
}
}
uint64_t days = 365 * (yr - 1970) + (uint64_t)leap_days(1970, (int)yr);
days += (uint64_t)mdays_cumulative[mo];
days += is_leap_yr & (mo > 1);
days += dy - 1;
uint64_t time_in_second64 = seconds + days * 60 * 60 * 24;
*time_in_second = (uint32_t)time_in_second64;
return time_in_second64 == (uint32_t)time_in_second64;
}
static const int mdays_minus_one[] = {30, 27, 30, 29, 30, 29, 30, 30, 29, 30, 29, 30};
// uses more instructions than sse_parse_time but might be slightly faster.
bool sse_parse_time_alt(const char *date_string, uint32_t *time_in_second) {
// We load the block of digits. We subtract 0x30 (the code point value of the
// character '0'), and all bytes values should be between 0 and 9,
// inclusively. We know that some character must be smaller that 9, for
// example, we cannot have more than 59 seconds and never 60 seconds, in the
// time stamp string. So one character must be between 0 and 5. Similarly, we
// start the hours at 00 and end at 23, so one character must be between 0
// and 2. We do a saturating subtraction of the maximum: the result of such a
// subtraction should be zero if the value is no larger. We then use a special
// instruction to multiply one byte by 10, and sum it up with the next byte,
// getting a 16-bit value. We then repeat the same approach as before,
// checking that the result is not too large.
//
// We compute the month the good old ways, as an integer in [0,11], we
// check for overflows later.
uint64_t mo = (uint64_t)((date_string[4]-0x30)*10 + (date_string[5]-0x30) - 1);
__m128i v = _mm_loadu_si128((const __m128i *)date_string);
// loaded YYYYMMDDHHmmSS.....
v = _mm_xor_si128(v, _mm_set1_epi8(0x30));
// W can use _mm_sub_epi8 or _mm_xor_si128 for the subtraction above.
// subtracting by 0x30 (or '0'), turns all values into a byte value between 0
// and 9 if the initial input was made of digits.
__m128i limit =
_mm_setr_epi8(9, 9, 9, 9, 1, 9, 3, 9, 2, 9, 5, 9, 5, 9, -1, -1);
// credit @aqrit
// overflows are still possible, if hours are in the range 24 to 29
// of if days are in the range 32 to 39
// or if months are in the range 12 to 19.
__m128i abide_by_limits = _mm_subs_epu8(v, limit); // must be all zero
__m1...
C++ 中文周刊 第120期
公众号
RSS https://github.com/wanghenshui/cppweeklynews/releases.atom
欢迎投稿,推荐或自荐文章/软件/资源等
感谢不语赞助
资讯
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-06-28 第208期
文章
int main() {
using v4si = int [[gnu::vector_size(4 * sizeof(int))]];
v4si a = {1, 2, 3, 4};
v4si b = {4, 3, 2, 1};
v4si c;
c = a + b;
std::cout << c[0] << c[1] << c[2] << c[3]; // prints 5555
}
这玩意是给simd方便的。看gcc样例
#include <immintrin.h>
typedef unsigned char u8x16 __attribute__ ((vector_size (16)));
typedef unsigned int u32x4 __attribute__ ((vector_size (16)));
typedef union {
__m128i mm;
u8x16 u8;
u32x4 u32;
} v128;
v128 x, y = { 0 };
memcpy (&x, ptr, sizeof x);
y.u8 += 0x80;
x.mm = _mm_adds_epu8 (x.mm, y.mm);
x.u32 &= 0xffffff;
/* Instead of a variable, a compound literal may be used to pass the
return value of an intrinsic call to a function expecting the union: */
v128 foo (v128);
x = foo ((v128) {_mm_adds_epu8 (x.mm, y.mm)});
这个debug宏很好用,方便阅读,代码在这里 https://github.com/nosql-cn/AxeDB
不过既然已经编译了,通过clangd 应该也能抓到堆栈。不过静态的堆栈没有这种运行时堆栈有意义,运行时的更直观一些
知乎有人翻译成中文了,挺好。 https://zhuanlan.zhihu.com/p/639886110
原理就是std::variant
+ std::visit
看不懂
了解一波
代码走读,感兴趣的可以看看
还是讲他的relocatable提案
介绍vs上体验AddressSanitizer新特性COE(continue_on_error)
用 -fsanitizer=address
同时设置
set ASAN_OPTIONS=continue_on_error=1
set ASAN_OPTIONS=continue_on_error=2
打散一个64字节的数的场景
uint64_t w = some value;
uint8_t indexes[64] = {63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51,
50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38,
37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25,
24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12,
11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
bit_shuffle(w, indexes); // returns a reversed version
可能的实现
uint64_t slow_bit_shuffle(uint64_t w, uint8_t indexes[64]) {
uint64_t out{};
for (size_t i = 0; i < 64; i++) {
bool bit_set = w & (uint64_t(1) << indexes[i]);
out |= (uint64_t(bit_set) << i);
}
return out;
}
考虑avx512
uint64_t bit_shuffle(uint64_t w, uint8_t indexes[64]) {
__mmask64 as_mask = _cvtu64_mask64(w);
__m512i as_vec_register =
_mm512_maskz_mov_epi8(as_mask, _mm512_set1_epi8(0xFF));
__m512i as_vec_register_shuf =
_mm512_permutexvar_epi8(_mm512_loadu_si512(indexes), as_vec_register);
return _cvtmask64_u64(_mm512_movepi8_mask(as_vec_register_shuf));
}
快点
uint64_t faster_bit_shuffle(uint64_t w, uint8_t indexes[64]) {
__m512i as_vec_register = _mm512_set1_epi64(w);
__mmask64 as_mask = _mm512_bitshuffle_epi64_mask(as_vec_register,
_mm512_loadu_si512(indexes));
return _cvtmask64_u64(as_mask);
}
有些芯片性能太好掩盖了一些代码优化的潜力,讲了几个场景 循环展开/pipeline对于老芯片带来的性能提升。观点挺有意思
这个博客 反复介绍过多次 https://johnnysswlab.com/ 推荐收藏没事看看
讲自己的演讲经历,如何演讲,找话题找组织,如何保证感兴趣,等等
话说,想组织一个c++讨论演讲,怎么才能组织起人来看,找到演讲者呢。
之前看祁宇(qicosmos)搞过,太正式了其实。类似meetingcpp这种网络会议直播模式就行
大家给给点子,我想整一个。没话题其实可以以国外视频转述一遍。加深理解。
- How to wait for multiple C++ coroutines to complete before propagating failure, unhelpful lambda
- How to wait for multiple C++ coroutines to complete before propagating failure, false hope
- How to wait for multiple C++ coroutines to complete before propagating failure, peeling away at a tuple
raymond chen真能写。我一个没看
讲一些用法,尽量避免。没啥说的。有些有点搞笑。这里就不列举了
视频
讲constinit的。没啥说的。能用就用
这个还是很值得一看的,这个哥们是性能调优专家,有个博客搜索权重挺高 https://johnnysswlab.com/
这个改天我转b站上。或者谁有空给传一下
这个是技术债了,异常问题异常安全等等,我看不进去。这里标记个TODO改天整理一下观点(或者找chatgpt老师提取一下。。。)
- Low-Latency Trading Systems in C++: Templated Meta-State Machines in HFT - Jason McGuiness - ACCU 23
代码https://sourceforge.net/p/libjmmcg/git-repo/ci/ACCUConf2023/tree/libjmmcg/
挺能high的,没看完 PPT https://github.com/wanghenshui/wanghenshui.github.io/blob/master/assets/ACCUConf2023_JMMcG_e0d2d_PRESENTED.pdf
ppt https://jamespascoe.github.io/accu2023/
看个乐
开源项目需要人手
- asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群384042845和作者对线
- Unilang deepin的一个通用编程语言,点子有点意思,也缺人,感兴趣的可以github讨论区或者deepin论坛看一看。这里也挂着长期推荐了
- gcc-mcf 懂的都懂
新项目介绍/版本更新
- vulkan.cpp支持module了,看看怎么封装的? https://github.com/KhronosGroup/Vulkan-Hpp/blob/main/vulkan/vulkan.cppm
- https://github.com/bkryza/clang-uml 支持用clang直接画图!不过得编译,用compile database.json来生成。不知道大项目的效果。感觉周末可以试验一下 话说plant uml太难看了,不能直接生成mermaid么
- https://github.com/AMDResearch/omnitrace amd 性能工具。仅限linux可用
如果有疑问评论最好在上面链接到评论区里评论,这样方便搜索,微信公众号有点封闭/知乎吞评论