layout | title |
---|---|
post |
第27期 |
从reddit/hackernews/lobsters/meetingcpp摘抄一些c++动态。
每周更新
欢迎投稿,推荐或自荐文章/软件/资源等,请提交 issue
module这个功能是非常重要的,急需推进 Minimal module support for the standard library
就是用using来使用父类接口实现
struct trade {};
struct add_order {};
struct transaction_end {};
struct i {
constexpr virtual ~i() noexcept = default;
constexpr virtual void on(const trade&) {}
constexpr virtual void on(const add_order&) {}
constexpr virtual void on(const transaction_end&) {}
};
struct impl1 : i {
void on(const trade&) override {}
};
struct impl2 : i {
using i::on;
void on(const trade&) override {}
};
int main() {
impl1 i1{};
impl2 i2{};
// via interface
static_assert([](i& t) { return requires { t.on(trade{}); }; }(i1));
static_assert([](i& t) { return requires { t.on(add_order{}); }; }(i1));
static_assert([](i& t) { return requires { t.on(transaction_end{}); }; }(i1));
// via concrete
static_assert([](auto& t) { return requires { t.on(trade{}); }; }(i1));
static_assert(not [](auto& t) { return requires { t.on(add_order{}); }; }(i1));
static_assert(not [](auto& t) { return requires { t.on(transaction_end{}); }; }(i1));
// via concrete with using::on
static_assert([](auto& t) { return requires { t.on(trade{}); }; }(i2));
static_assert([](auto& t) { return requires { t.on(add_order{}); }; }(i2));
static_assert([](auto& t) { return requires { t.on(transaction_end{}); }; }(i2));
}
讲了EBO的例子,以及no_unique_address的改善
手把手教你写json解析库
简单定一个接口
#ifndef JSON_H
#define JSON_H
#include <tuple>
#include <vector>
#include <string>
namespace json {
std::tuple<std::vector<JSONToken>, std::string> lex(std::string);
std::tuple<JSONValue, int, std::string> parse(std::vector<JSONToken>, int index = 0);
std::string deparse(JSONValue, std::string whitespace = "");
} // namespace json
#endif
然后定义JSONToken和JSONvalue
#include <string>
#include <memory>
#include <map>
#include <optional>
namespace json {
enum class JSONTokenType { String, Number, Syntax, Boolean, Null };
struct JSONToken {
std::string value;
JSONTokenType type;
int location;
std::shared_ptr<std::string> full_source;
};
enum class JSONValueType { String, Number, Object, Array, Boolean, Null };
struct JSONValue {
std::optional<std::string> string;
std::optional<double> number;
std::optional<bool> boolean;
std::optional<std::vector<JSONValue>> array;
std::optional<std::map<std::string, JSONValue>> object;
JSONValueType type;
};
}
开始实现lex,简单说,就是根据分隔符(空格tab之类的),获取一个个token,然后解析。怎么确定是什么token?简单,每个token解析函数都处理一遍,总能解析出来吧
std::tuple<std::vector<JSONToken>, std::string> lex(std::string raw_json) {
std::vector<JSONToken> tokens;
// All tokens will embed a pointer to the raw JSON for debugging purposes
auto original_copy = std::make_shared<std::string>(raw_json);
auto generic_lexers = {lex_syntax, lex_string, lex_number, lex_null, lex_true, lex_false};
for (int i = 0; i < raw_json.length(); i++) {
// Skip past whitespace
if (auto new_index = lex_whitespace(raw_json, i); i != new_index) {
i = new_index - 1;
continue;
}
auto found = false;
for (auto lexer : generic_lexers) {
if (auto [token, new_index, error] = lexer(raw_json, i); i != new_index) {
// Error while lexing, return early
if (error.length()) {
return {\
{}, error};
}
// Store reference to the original source
token.full_source = original_copy;
tokens.push_back(token);
i = new_index - 1;
found = true;
break;
}
}
if (found) {
continue;
}
return {\
{}, format_error("Unable to lex", raw_json, i)\
};
}
return {tokens, ""};
}
generic_lexers来干活,处理完记录token
Format_error比较好理解,就是打印错误
std::string format_error(std::string base, std::string source, int index) {
std::ostringstream s;
int counter = 0;
int line = 1;
int column = 0;
std::string lastline = "";
std::string whitespace = "";
for (auto c : source) {
if (counter == index) {
break;
}
if (c == '\n') {
line++;
column = 0;
lastline = "";
whitespace = "";
} else if (c == '\t') {
column++;
lastline += " ";
whitespace += " ";
} else {
column++;
lastline += c;
whitespace += " ";
}
counter++;
}
// Continue accumulating the lastline for debugging
while (counter < source.size()) {
auto c = source[counter];
if (c == '\n') {
break;
}
lastline += c;
counter++;
}
s << base << " at line " << line << ", column " << column << std::endl;
s << lastline << std::endl;
s << whitespace << "^";
return s.str();
}
具体的lexer实现
lex_whitespace,就是跳过空白,简单
int lex_whitespace(std::string raw_json, int index) {
while (std::isspace(raw_json[index])) {
if (index == raw_json.length()) break;
index++;
}
return index;
}
lex_syntax
校验token特殊符号,这些符号是语法的一部分,要单独处理
std::tuple<JSONToken, int, std::string> lex_syntax(std::string raw_json, int index) {
JSONToken token{"", JSONTokenType::Syntax, index};
std::string value = "";
auto c = raw_json[index];
if (c == '[' || c == ']' || c == '{' || c == '}' || c == ':' || c == ',') {
token.value += c;
index++;
}
return {token, index, ""};
}
lex_string要注意两个细节,判定空串,以及判定引号结尾, 这里嵌套引号暂时不考虑
std::tuple<JSONToken, int, std::string> lex_string(std::string raw_json,
int original_index) {
int index = original_index;
JSONToken token{"", JSONTokenType::String, index};
std::string value = "";
auto c = raw_json[index];
if (c != '"') {
return {token, original_index, ""};
}
index++;
// TODO: handle nested quotes
while (c = raw_json[index], c != '"') {
if (index == raw_json.length()) {
return {token, index, format_error("Unexpected EOF while lexing string", raw_json, index)};
}
token.value += c;
index++;
}
index++;
return {token, index, ""};
}
lex_number 直接数字字符串拼一下完事儿,这里并没有处理什么浮点数之类的场景
std::tuple<JSONToken, int, std::string> lex_number(std::string raw_json,
int original_index) {
int index = original_index;
JSONToken token = {"", JSONTokenType::Number, index};
std::string value = "";
// TODO: handle not just integers
while (true) {
if (index == raw_json.length()) {
break;
}
auto c = raw_json[index];
if (!(c >= '0' && c <= '9')) {
break;
}
token.value += c;
index++;
}
return {token, index, ""};
}
lex_keyword 处理true false null
std::tuple<JSONToken, int, std::string> lex_keyword(std::string raw_json,
std::string keyword,
JSONTokenType type,
int original_index) {
int index = original_index;
JSONToken token{"", type, index};
while (keyword[index - original_index] == raw_json[index]) {
if (index == raw_json.length()) {
break;
}
index++;
}
if (index - original_index == keyword.length()) {
token.value = keyword;
}
return {token, index, ""};
}
std::tuple<JSONToken, int, std::string> lex_null(std::string raw_json,
int index) {
return lex_keyword(raw_json, "null", JSONTokenType::Null, index);
}
std::tuple<JSONToken, int, std::string> lex_true(std::string raw_json,
int index) {
return lex_keyword(raw_json, "true", JSONTokenType::Boolean, index);
}
std::tuple<JSONToken, int, std::string> lex_false(std::string raw_json,
int index) {
return lex_keyword(raw_json, "false", JSONTokenType::Boolean, index);
}
token 处理就结束了,一个参考
int main(int argc, char *argv[]) {
if (argc == 1) {
std::cerr << "Expected JSON input argument to parse" << std::endl;
return 1;
}
std::string in{argv[1]};
auto [tokens, error] = json::lex(in);
if (error.size()) {
std::cerr << error << std::endl;
return 1;
}
for (auto t : tokens) {
std::cout << t.value << std::endl;
}
}
lex结束了,该真正的parse了,将token转换成value
std::tuple<JSONValue, int, std::string> parse(std::vector<JSONToken> tokens,
int index) {
auto token = tokens[index];
switch (token.type) {
case JSONTokenType::Number: {
auto n = std::stod(token.value);
return {JSONValue{.number = n, .type = JSONValueType::Number}, index + 1, ""};
}
case JSONTokenType::Boolean:
return {JSONValue{.boolean = token.value == "true", .type = JSONValueType::Boolean}, index + 1, ""};
case JSONTokenType::Null:
return {JSONValue{.type = JSONValueType::Null}, index + 1, ""};
case JSONTokenType::String:
return {JSONValue{.string = token.value, .type = JSONValueType::String}, index + 1, ""};
case JSONTokenType::Syntax:
if (token.value == "[") {
auto [array, new_index, error] = parse_array(tokens, index + 1);
return {JSONValue{.array = array, .type = JSONValueType::Array}, new_index, error};
}
if (token.value == "{") {
auto [object, new_index, error] = parse_object(tokens, index + 1);
return {JSONValue{.object = std::optional(object), .type = JSONValueType::Object}, new_index, error};
}
}
return {\
{}, index, format_parse_error("Failed to parse", token)\
};
}
这里的format_parse_error类似上面的错误处理
std::string JSONTokenType_to_string(JSONTokenType jtt) {
switch (jtt) {
case JSONTokenType::String:
return "String";
case JSONTokenType::Number:
return "Number";
case JSONTokenType::Syntax:
return "Syntax";
case JSONTokenType::Boolean:
return "Boolean";
case JSONTokenType::Null:
return "Null";
}
}
std::string format_parse_error(std::string base, JSONToken token) {
std::ostringstream s;
s << "Unexpected token '" << token.value << "', type '"
<< JSONTokenType_to_string(token.type) << "', index ";
s << std::endl << base;
return format_error(s.str(), *token.full_source, token.location);
}
针对jsontokentype为syntax有两种可能,一种是array,一种是object,都会有嵌套场景
parse_array
std::tuple<std::vector<JSONValue>, int, std::string>
parse_array(std::vector<JSONToken> tokens, int index) {
std::vector<JSONValue> children = {};
while (index < tokens.size()) {
auto t = tokens[index];
if (t.type == JSONTokenType::Syntax) {
if (t.value == "]") {
return {children, index + 1, ""};
}
if (t.value == ",") {
index++;
t = tokens[index];
} else if (children.size() > 0) {
return {\
{},
index,
format_parse_error("Expected comma after element in array", t)};
}
}
auto [child, new_index, error] = parse(tokens, index);
if (error.size()) {
return {\
{}, index, error
};
}
children.push_back(child);
index = new_index;
}
return {
{},
index,
format_parse_error("Unexpected EOF while parsing array", tokens[index])};
}
parse_object
std::tuple<std::map<std::string, JSONValue>, int, std::string>
parse_object(std::vector<JSONToken> tokens, int index) {
std::map<std::string, JSONValue> values = {};
while (index < tokens.size()) {
auto t = tokens[index];
if (t.type == JSONTokenType::Syntax) {
if (t.value == "}") {
return {values, index + 1, ""};
}
if (t.value == ",") {
index++;
t = tokens[index];
} else if (values.size() > 0) {
return {
{},
index,
format_parse_error("Expected comma after element in object", t)};
} else {
return {\
{},
index,
format_parse_error(
"Expected key-value pair or closing brace in object", t)};
}
}
auto [key, new_index, error] = parse(tokens, index);
if (error.size()) {
return {\
{}, index, error};
}
if (key.type != JSONValueType::String) {
return {\
{}, index, format_parse_error("Expected string key in object", t)};
}
index = new_index;
t = tokens[index];
if (!(t.type == JSONTokenType::Syntax && t.value == ":")) {
return {\
{},
index,
format_parse_error("Expected colon after key in object", t)};
}
index++;
t = tokens[index];
auto [value, new_index1, error1] = parse(tokens, index);
if (error1.size()) {
return {\
{}, index, error1};
}
values[key.string.value()] = value;
index = new_index1;
}
return {values, index + 1, ""};
}
最终,parse就完成了
std::tuple<JSONValue, std::string> parse(std::string source) {
auto [tokens, error] = json::lex(source);
if (error.size()) {
return {\
{}, error};
}
auto [ast, _, error1] = json::parse(tokens);
return {ast, error1};
}
deparse就反过来
std::string deparse(JSONValue v, std::string whitespace) {
switch (v.type) {
case JSONValueType::String:
return "\"" + v.string.value() + "\"";
case JSONValueType::Boolean:
return (v.boolean.value() ? "true" : "false");
case JSONValueType::Number:
return std::to_string(v.number.value());
case JSONValueType::Null:
return "null";
case JSONValueType::Array: {
std::string s = "[\n";
auto a = v.array.value();
for (int i = 0; i < a.size(); i++) {
auto value = a[i];
s += whitespace + " " + deparse(value, whitespace + " ");
if (i < a.size() - 1) {
s += ",";
}
s += "\n";
}
return s + whitespace + "]";
}
case JSONValueType::Object: {
std::string s = "{\n";
auto values = v.object.value();
auto i = 0;
for (auto const &[key, value] : values) {
s += whitespace + " " + "\"" + key +
"\": " + deparse(value, whitespace + " ");
if (i < values.size() - 1) {
s += ",";
}
s += "\n";
i++;
}
return s + whitespace + "}";
}
}
}
最终使用
#include "json.hpp"
#include <iostream>
int main(int argc, char *argv[]) {
if (argc == 1) {
std::cerr << "Expected JSON input argument to parse" << std::endl;
return 1;
}
std::string in{argv[1]};
auto [ast, error] = json::parse(in);
if (error.size()) {
std::cerr << error << std::endl;
return 1;
}
std::cout << json::deparse(ast);
}
思路非常清晰了可以说
介绍magic_enum这个库,针对枚举类型的反射,非常好用,不过要求c++17
介绍<=>
的
看这两种实现
struct MyType
{
int member1;
std::string member2;
std::vector<double> member3;
int member4;
double member5;
friend bool operator<(MyType const& lhs, MyType const& rhs) = default;
};
bool operator<(MyType const& lhs, MyType const& rhs)
{
return std::tie(lhs.member1, lhs.member2, lhs.member3, lhs.member4, lhs.member5)
< std::tie(rhs.member1, rhs.member2, rhs.member3, rhs.member4, rhs.member5);
}
没有视频内容了,加点以前的视频
- Serialization in C++ has never been easier! But wait, there's more...
介绍reflection-ts 这个知道有这么个事儿就行,一时半会进不了,通过引入关键字来实现反射。有个clang插件实现,挺有意思的
- Hypercritical C++ Code Review
带你走读代码,找bug,很有意思
auto做index,可能是int,index循环的话,index最好和size()类型相同,或者直接range-for
auto默认是值,如果static_cast<const T&> 会有拷贝,用auto&
复制粘贴错误等等