C++ 中文周刊 第27期

reddit/hackernews/lobsters/meetingcpp摘抄一些c++动态。

每周更新

周刊项目地址 github在线地址知乎专栏

欢迎投稿,推荐或自荐文章/软件/资源等,请提交 issue


资讯

编译器信息最新动态推荐关注hellogcc公众号

本周周报github直达

标准委员会八月邮件汇总

module这个功能是非常重要的,急需推进 Minimal module support for the standard library

文章

就是用using来使用父类接口实现

struct trade {};
struct add_order {};
struct transaction_end {};

struct i {
    constexpr virtual ~i() noexcept = default;
    constexpr virtual void on(const trade&) {}
    constexpr virtual void on(const add_order&) {}
    constexpr virtual void on(const transaction_end&) {}
};

struct impl1 : i {
  void on(const trade&) override {}
};

struct impl2 : i {
  using i::on;
  void on(const trade&) override {}
};

int main() {
    impl1 i1{};
    impl2 i2{};

    // via interface
    static_assert([](i& t) { return requires { t.on(trade{}); }; }(i1));
    static_assert([](i& t) { return requires { t.on(add_order{}); }; }(i1));
    static_assert([](i& t) { return requires { t.on(transaction_end{}); }; }(i1));

    // via concrete
    static_assert([](auto& t) { return requires { t.on(trade{}); }; }(i1));
    static_assert(not [](auto& t) { return requires { t.on(add_order{}); }; }(i1));
    static_assert(not [](auto& t) { return requires { t.on(transaction_end{}); }; }(i1));

    // via concrete with using::on
    static_assert([](auto& t) { return requires { t.on(trade{}); }; }(i2));
    static_assert([](auto& t) { return requires { t.on(add_order{}); }; }(i2));
    static_assert([](auto& t) { return requires { t.on(transaction_end{}); }; }(i2));
}

讲了EBO的例子,以及no_unique_address的改善

手把手教你写json解析库

简单定一个接口

#ifndef JSON_H
#define JSON_H

#include <tuple>
#include <vector>
#include <string>

namespace json {
std::tuple<std::vector<JSONToken>, std::string> lex(std::string);
std::tuple<JSONValue, int, std::string> parse(std::vector<JSONToken>, int index = 0);
std::string deparse(JSONValue, std::string whitespace = "");
} // namespace json

#endif

然后定义JSONToken和JSONvalue

#include <string>
#include <memory>
#include <map>
#include <optional>
namespace json {

enum class JSONTokenType { String, Number, Syntax, Boolean, Null };
struct JSONToken {
  std::string value;
  JSONTokenType type;
  int location;
  std::shared_ptr<std::string> full_source;
};
  

enum class JSONValueType { String, Number, Object, Array, Boolean, Null };
struct JSONValue {
  std::optional<std::string> string;
  std::optional<double> number;
  std::optional<bool> boolean;
  std::optional<std::vector<JSONValue>> array;
  std::optional<std::map<std::string, JSONValue>> object;
  JSONValueType type;
};
}

开始实现lex,简单说,就是根据分隔符(空格tab之类的),获取一个个token,然后解析。怎么确定是什么token?简单,每个token解析函数都处理一遍,总能解析出来吧

std::tuple<std::vector<JSONToken>, std::string> lex(std::string raw_json) {
  std::vector<JSONToken> tokens;
  // All tokens will embed a pointer to the raw JSON for debugging purposes
  auto original_copy = std::make_shared<std::string>(raw_json);

  auto generic_lexers = {lex_syntax, lex_string, lex_number, lex_null, lex_true, lex_false};
  for (int i = 0; i < raw_json.length(); i++) {
    // Skip past whitespace
    if (auto new_index = lex_whitespace(raw_json, i); i != new_index) {
      i = new_index - 1;
      continue;
    }

    auto found = false;
    for (auto lexer : generic_lexers) {
      if (auto [token, new_index, error] = lexer(raw_json, i); i != new_index) {
        // Error while lexing, return early
        if (error.length()) {
          return {\
              {}, error};
        }

        // Store reference to the original source
        token.full_source = original_copy;
        tokens.push_back(token);
        i = new_index - 1;
        found = true;
        break;
      }
    }

    if (found) {
      continue;
    }

    return {\
      {}, format_error("Unable to lex", raw_json, i)\
    };
  }

  return {tokens, ""};
}

generic_lexers来干活,处理完记录token

Format_error比较好理解,就是打印错误

std::string format_error(std::string base, std::string source, int index) {
  std::ostringstream s;
  int counter = 0;
  int line = 1;
  int column = 0;
  std::string lastline = "";
  std::string whitespace = "";
  for (auto c : source) {
    if (counter == index) {
      break;
    }

    if (c == '\n') {
      line++;
      column = 0;
      lastline = "";
      whitespace = "";
    } else if (c == '\t') {
      column++;
      lastline += "  ";
      whitespace += "  ";
    } else {
      column++;
      lastline += c;
      whitespace += " ";
    }

    counter++;
  }

  // Continue accumulating the lastline for debugging
  while (counter < source.size()) {
    auto c = source[counter];
    if (c == '\n') {
      break;
    }
    lastline += c;
    counter++;
  }

  s << base << " at line " << line << ", column " << column << std::endl;
  s << lastline << std::endl;
  s << whitespace << "^";

  return s.str();
}

具体的lexer实现

lex_whitespace,就是跳过空白,简单

int lex_whitespace(std::string raw_json, int index) {
  while (std::isspace(raw_json[index])) {
    if (index == raw_json.length()) break;
    index++;
  }
  return index;
}

lex_syntax

校验token特殊符号,这些符号是语法的一部分,要单独处理

std::tuple<JSONToken, int, std::string> lex_syntax(std::string raw_json, int index) {
  JSONToken token{"", JSONTokenType::Syntax, index};
  std::string value = "";
  auto c = raw_json[index];
  if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') {
    token.value += c;
    index++;
  }

  return {token, index, ""};
}

lex_string要注意两个细节,判定空串,以及判定引号结尾, 这里嵌套引号暂时不考虑

std::tuple<JSONToken, int, std::string> lex_string(std::string raw_json,
                                                   int original_index) {
  int index = original_index;
  JSONToken token{"", JSONTokenType::String, index};
  std::string value = "";
  auto c = raw_json[index];
  if (c != '"') {
    return {token, original_index, ""};
  }
  index++;
  // TODO: handle nested quotes
  while (c = raw_json[index], c != '"') {
    if (index == raw_json.length()) {
      return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)};
    }
    token.value += c;
    index++;
  }
  index++;
  return {token, index, ""};
}

lex_number 直接数字字符串拼一下完事儿,这里并没有处理什么浮点数之类的场景

std::tuple<JSONToken, int, std::string> lex_number(std::string raw_json,
                                                   int original_index) {
  int index = original_index;
  JSONToken token = {"", JSONTokenType::Number, index};
  std::string value = "";
  // TODO: handle not just integers
  while (true) {
    if (index == raw_json.length()) {
      break;
    }
    auto c = raw_json[index];
    if (!(c >= '0' && c <= '9')) {
      break;
    }
    token.value += c;
    index++;
  }
  return {token, index, ""};
}

lex_keyword 处理true false null

std::tuple<JSONToken, int, std::string> lex_keyword(std::string raw_json,
                                                    std::string keyword,
                                                    JSONTokenType type,
                                                    int original_index) {
  int index = original_index;
  JSONToken token{"", type, index};
  while (keyword[index - original_index] == raw_json[index]) {
    if (index == raw_json.length()) {
      break;
    }
    index++;
  }
  if (index - original_index == keyword.length()) {
    token.value = keyword;
  }
  return {token, index, ""};
}
std::tuple<JSONToken, int, std::string> lex_null(std::string raw_json,
                                                 int index) {
  return lex_keyword(raw_json, "null", JSONTokenType::Null, index);
}

std::tuple<JSONToken, int, std::string> lex_true(std::string raw_json,
                                                 int index) {
  return lex_keyword(raw_json, "true", JSONTokenType::Boolean, index);
}

std::tuple<JSONToken, int, std::string> lex_false(std::string raw_json,
                                                  int index) {
  return lex_keyword(raw_json, "false", JSONTokenType::Boolean, index);
}

token 处理就结束了,一个参考

int main(int argc, char *argv[]) {
  if (argc == 1) {
    std::cerr << "Expected JSON input argument to parse" << std::endl;
    return 1;
  }
  std::string in{argv[1]};
  auto [tokens, error] = json::lex(in);
  if (error.size()) {
    std::cerr << error << std::endl;
    return 1;
  }
  for (auto t : tokens) {
    std::cout << t.value << std::endl;
  }
}

lex结束了,该真正的parse了,将token转换成value

std::tuple<JSONValue, int, std::string> parse(std::vector<JSONToken> tokens,
                                              int index) {
  auto token = tokens[index];
  switch (token.type) {
  case JSONTokenType::Number: {
    auto n = std::stod(token.value);
    return {JSONValue{.number = n, .type = JSONValueType::Number}, index + 1, ""};
  }
  case JSONTokenType::Boolean:
    return {JSONValue{.boolean = token.value == "true", .type = JSONValueType::Boolean}, index + 1, ""};
  case JSONTokenType::Null:
    return {JSONValue{.type = JSONValueType::Null}, index + 1, ""};
  case JSONTokenType::String:
    return {JSONValue{.string = token.value, .type = JSONValueType::String}, index + 1, ""};
  case JSONTokenType::Syntax:
    if (token.value == "[") {
      auto [array, new_index, error] = parse_array(tokens, index + 1);
      return {JSONValue{.array = array, .type = JSONValueType::Array}, new_index, error};
    }
    if (token.value == "{") {
      auto [object, new_index, error] = parse_object(tokens, index + 1);
      return {JSONValue{.object = std::optional(object), .type = JSONValueType::Object}, new_index, error};
    }
  }
  return {\
  {}, index, format_parse_error("Failed to parse", token)\
         };
}

这里的format_parse_error类似上面的错误处理

std::string JSONTokenType_to_string(JSONTokenType jtt) {
  switch (jtt) {
  case JSONTokenType::String:
    return "String";
  case JSONTokenType::Number:
    return "Number";
  case JSONTokenType::Syntax:
    return "Syntax";
  case JSONTokenType::Boolean:
    return "Boolean";
  case JSONTokenType::Null:
    return "Null";
  }
}

std::string format_parse_error(std::string base, JSONToken token) {
  std::ostringstream s;
  s << "Unexpected token '" << token.value << "', type '"
    << JSONTokenType_to_string(token.type) << "', index ";
  s << std::endl << base;
  return format_error(s.str(), *token.full_source, token.location);
}

针对jsontokentype为syntax有两种可能,一种是array,一种是object,都会有嵌套场景

parse_array

std::tuple<std::vector<JSONValue>, int, std::string>
parse_array(std::vector<JSONToken> tokens, int index) {
  std::vector<JSONValue> children = {};
  while (index < tokens.size()) {
    auto t = tokens[index];
    if (t.type == JSONTokenType::Syntax) {
      if (t.value == "]") {
        return {children, index + 1, ""};
      }
      if (t.value == ",") {
        index++;
        t = tokens[index];
      } else if (children.size() > 0) {
        return {\
                {},
                index,
                format_parse_error("Expected comma after element in array", t)};
      }
    }
    auto [child, new_index, error] = parse(tokens, index);
    if (error.size()) {
      return {\
      	{}, index, error
      };
    }
    children.push_back(child);
    index = new_index;
  }
  return {
      {},
      index,
      format_parse_error("Unexpected EOF while parsing array", tokens[index])};
}

parse_object

std::tuple<std::map<std::string, JSONValue>, int, std::string>
parse_object(std::vector<JSONToken> tokens, int index) {
  std::map<std::string, JSONValue> values = {};
  while (index < tokens.size()) {
    auto t = tokens[index];
    if (t.type == JSONTokenType::Syntax) {
      if (t.value == "}") {
        return {values, index + 1, ""};
      }
      if (t.value == ",") {
        index++;
        t = tokens[index];
      } else if (values.size() > 0) {
        return {
            {},
            index,
            format_parse_error("Expected comma after element in object", t)};
      } else {
        return {\
                {},
                index,
                format_parse_error(
                    "Expected key-value pair or closing brace in object", t)};
      }
    }

    auto [key, new_index, error] = parse(tokens, index);
    if (error.size()) {
      return {\
          {}, index, error};
    }
    if (key.type != JSONValueType::String) {
      return {\
          {}, index, format_parse_error("Expected string key in object", t)};
    }
    index = new_index;
    t = tokens[index];

    if (!(t.type == JSONTokenType::Syntax && t.value == ":")) {
      return {\
              {},
              index,
              format_parse_error("Expected colon after key in object", t)};
    }
    index++;
    t = tokens[index];
    auto [value, new_index1, error1] = parse(tokens, index);
    if (error1.size()) {
      return {\
          {}, index, error1};
    }
    values[key.string.value()] = value;
    index = new_index1;
  }
  return {values, index + 1, ""};
}

最终,parse就完成了

std::tuple<JSONValue, std::string> parse(std::string source) {
  auto [tokens, error] = json::lex(source);
  if (error.size()) {
    return {\
          {}, error};
  }

  auto [ast, _, error1] = json::parse(tokens);
  return {ast, error1};
}

deparse就反过来

std::string deparse(JSONValue v, std::string whitespace) {
  switch (v.type) {
  case JSONValueType::String:
    return "\"" + v.string.value() + "\"";
  case JSONValueType::Boolean:
    return (v.boolean.value() ? "true" : "false");
  case JSONValueType::Number:
    return std::to_string(v.number.value());
  case JSONValueType::Null:
    return "null";
  case JSONValueType::Array: {
    std::string s = "[\n";
    auto a = v.array.value();
    for (int i = 0; i < a.size(); i++) {
      auto value = a[i];
      s += whitespace + "  " + deparse(value, whitespace + "  ");
      if (i < a.size() - 1) {
        s += ",";
      }
      s += "\n";
    }
    return s + whitespace + "]";
  }
  case JSONValueType::Object: {
    std::string s = "{\n";
    auto values = v.object.value();
    auto i = 0;
    for (auto const &[key, value] : values) {
      s += whitespace + "  " + "\"" + key +
           "\": " + deparse(value, whitespace + "  ");
      if (i < values.size() - 1) {
        s += ",";
      }
      s += "\n";
      i++;
    }
    return s + whitespace + "}";
  }
  }
}

最终使用

#include "json.hpp"
#include <iostream>
int main(int argc, char *argv[]) {
  if (argc == 1) {
    std::cerr << "Expected JSON input argument to parse" << std::endl;
    return 1;
  }
  std::string in{argv[1]};
  auto [ast, error] = json::parse(in);
  if (error.size()) {
    std::cerr << error << std::endl;
    return 1;
  }
  std::cout << json::deparse(ast);
}

思路非常清晰了可以说

介绍magic_enum这个库,针对枚举类型的反射,非常好用,不过要求c++17

介绍<=>

看这两种实现

struct MyType
{
    int member1;
    std::string member2;
    std::vector<double> member3;
    int member4;
    double member5;

    friend bool operator<(MyType const& lhs, MyType const& rhs) = default;
};

bool operator<(MyType const& lhs, MyType const& rhs)
{
    return std::tie(lhs.member1, lhs.member2, lhs.member3, lhs.member4, lhs.member5)
         < std::tie(rhs.member1, rhs.member2, rhs.member3, rhs.member4, rhs.member5);
}

cpp on sea 2020

没有视频内容了,加点以前的视频

介绍reflection-ts 这个知道有这么个事儿就行,一时半会进不了,通过引入关键字来实现反射。有个clang插件实现,挺有意思的

带你走读代码,找bug,很有意思

auto做index,可能是int,index循环的话,index最好和size()类型相同,或者直接range-for

auto默认是值,如果static_cast<const T&> 会有拷贝,用auto&

复制粘贴错误等等

项目


本文永久链接

看到这里或许你有建议或者疑问或者指出错误,请留言评论! 多谢! 你的评论非常重要!也可以帮忙点赞收藏转发!多谢支持! 觉得写的不错那就给点吧, 在线乞讨 微信转账