# [chibicc](https://github.com/rui314/chibicc)

## va_list, va_start, va_arg

```c
// Takes a printf-style format string and returns a formatted string.
char *format(char *fmt, ...) {
  char *buf;
  size_t buflen;
  FILE *out = open_memstream(&buf, &buflen);

  va_list ap;
  va_start(ap, fmt);
  vfprintf(out, fmt, ap);
  va_end(ap);
  fclose(out);
  return buf;
}
```

1. va_* 这几个宏原理就是移动指针，如下

```c
typedef unsigned char *va_list;
#define va_start(list, param) (list = (((va_list)&param) + sizeof(param)))
#define va_arg(list, type)    (*(type *)((list += sizeof(type)) - sizeof(type)))

```

只不过在GCC里不是明确用宏定义的，而是内建函数


![](resources/01.png)

-----------------
-----------------

## tokenize.c

```c
// Tokenize a given string and returns new tokens.
Token *tokenize(File *file) {
  current_file = file;

  char *p = file->contents;
  Token head = {};
  Token *cur = &head;

  at_bol = true;
  has_space = false;

  while (*p) {
    // Skip line comments.
    if (startswith(p, "//")) {
      p += 2;
      while (*p != '\n')
        p++;
      has_space = true;
      continue;
    }

    // Skip block comments.
    if (startswith(p, "/*")) {
      char *q = strstr(p + 2, "*/");
      if (!q)
        error_at(p, "unclosed block comment");
      p = q + 2;
      has_space = true;
      continue;
    }

    // Skip newline.
    if (*p == '\n') {
      p++;
      at_bol = true;
      has_space = false;
      continue;
    }

    // Skip whitespace characters.
    if (isspace(*p)) {
      p++;
      has_space = true;
      continue;
    }

    // Numeric literal
    if (isdigit(*p) || (*p == '.' && isdigit(p[1]))) {
      char *q = p++;
      for (;;) {
        if (p[0] && p[1] && strchr("eEpP", p[0]) && strchr("+-", p[1]))
          p += 2;
        else if (isalnum(*p) || *p == '.')
          p++;
        else
          break;
      }
      cur = cur->next = new_token(TK_PP_NUM, q, p);
      continue;
    }

```

把文件token化，比较简单，主要就几种情况

1.注释 2.换行 3.空白 4.数字常量 5.字符串常量 6.关键字 7.运算符

----------------------------------------------------------------------------
-------------------------------------------------------------

## preprocess.c

[Macro Algo: Dave Prosser Algo](resources/cpp.algo.pdf)

[GCC Macros](https://gcc.gnu.org/onlinedocs/cpp/Macros.html)

![](resources/02.png)

Dave 算法：

1. 每个token都有一个hideset，表示这个token之前是由哪个macro（string）替换来的。初始的时候都是空的（{}）

2. 在macro expand的过程中，如果当前这个token的string在hideset中，说明之前已经发生过一次替换，那么这次就不再替换（这就防止了循环替换）。
   
   
3. 如果hideset中没有出现过，就把当前token替换成对应的macro，同时将原来token的string放入到hideset中（也就是代码中的 $HS \cup \{T\}$）

4. 如果是function-like 的macro（注：macro定义时的参数较parameter，传入的参数叫actual或argument），先对传入的参数（actuals）作macro expand，然后在用expanded之后的actual去替换macro中的parameter，同时actual的hideset `HS'`和parameter的hideset `HS`取交集，也就是代码中的$(HS \cap HS')$

5. 在代码中的macro expand，针对每一个token，会循环去作expand，直到当前的token无法再expand了，再去处理下一个token。这样就解决了macro嵌套定义的问题，例如

    ```c
    #define A 1
    #define B A
    #define C B
    #define D C+B
    ```
----------------------------------

### static Token *preprocess2(Token *tok)


```c
// Visit all tokens in `tok` while evaluating preprocessing
// macros and directives.
static Token *preprocess2(Token *tok) {
  Token head = {};
  Token *cur = &head;

  while (tok->kind != TK_EOF) {
    // If it is a macro, expand it.
    if (expand_macro(&tok, tok))
      continue;

    // Pass through if it is not a "#".
    if (!is_hash(tok)) {
      tok->line_delta = tok->file->line_delta;
      tok->filename = tok->file->display_name;
      cur = cur->next = tok;
      tok = tok->next;
      continue;
    }

    Token *start = tok;
    tok = tok->next;

```
1. `expand_macro` 展开当前token，如果可以展开，返回true，那么就continue，继续展开当前token，直到无法展开，再往下继续处理

-----------------------------------------

### static bool expand_macro(Token **rest, Token *tok)

```c
// If tok is a macro, expand it and return true.
// Otherwise, do nothing and return false.
static bool expand_macro(Token **rest, Token *tok) {
  if (hideset_contains(tok->hideset, tok->loc, tok->len))
    return false;

  /** xitongsys
   * 
   * find_macro 从全局的hashmap中查找当前token是否是一个macro
   * 
  **/
  Macro *m = find_macro(tok);
  if (!m)
    return false;

  // Built-in dynamic macro application such as __LINE__
  if (m->handler) {
    *rest = m->handler(tok);
    (*rest)->next = tok->next;
    return true;
  }


  /** xitongsys
   * 
   * 对于Object-like的macro，将当前token的name string和之前的hideset union作为新的hideset
   * 因为macro expand之后可能会有多个token，所以这里body是一个链表
   * 
  **/
  // Object-like macro application
  if (m->is_objlike) {
    Hideset *hs = hideset_union(tok->hideset, new_hideset(m->name));
    Token *body = add_hideset(m->body, hs);
    for (Token *t = body; t->kind != TK_EOF; t = t->next)
      t->origin = tok;
    *rest = append(body, tok->next);
    (*rest)->at_bol = tok->at_bol;
    (*rest)->has_space = tok->has_space;
    return true;
  }

  // If a funclike macro token is not followed by an argument list,
  // treat it as a normal identifier.
  if (!equal(tok->next, "("))
    return false;

  // Function-like macro application
  Token *macro_token = tok;
  MacroArg *args = read_macro_args(&tok, tok, m->params, m->va_args_name);
  Token *rparen = tok;

  // Tokens that consist a func-like macro invocation may have different
  // hidesets, and if that's the case, it's not clear what the hideset
  // for the new tokens should be. We take the interesection of the
  // macro token and the closing parenthesis and use it as a new hideset
  // as explained in the Dave Prossor's algorithm.
  Hideset *hs = hideset_intersection(macro_token->hideset, rparen->hideset);
  hs = hideset_union(hs, new_hideset(m->name));

  Token *body = subst(m->body, args);
  body = add_hideset(body, hs);
  for (Token *t = body; t->kind != TK_EOF; t = t->next)
    t->origin = macro_token;
  *rest = append(body, tok->next);
  (*rest)->at_bol = macro_token->at_bol;
  (*rest)->has_space = macro_token->has_space;
  return true;
}
```
1. see comments in the code by xitongsys

---------------------------------------

```c
static Token *add_hideset(Token *tok, Hideset *hs) {
  Token head = {};
  Token *cur = &head;

  for (; tok; tok = tok->next) {
    Token *t = copy_token(tok);
    t->hideset = hideset_union(t->hideset, hs);
    cur = cur->next = t;
  }
  return head.next;
}
```

之所以把hs加到整个token list里面是因为macro展开的时候，可以有多个token，因此macro expand的结果是一个token list。具体看Macro struct的内容。其中body就是这个macro要展开的token list

```c
typedef struct Macro Macro;
struct Macro {
  char *name;
  bool is_objlike; // Object-like or function-like
  MacroParam *params;
  char *va_args_name;
  Token *body;
  macro_handler_fn *handler;
};
```

----------------------------

```c
// Append tok2 to the end of tok1.
static Token *append(Token *tok1, Token *tok2) {
  if (tok1->kind == TK_EOF)
    return tok2;

  Token head = {};
  Token *cur = &head;

  for (; tok1->kind != TK_EOF; tok1 = tok1->next)
    cur = cur->next = copy_token(tok1);
  cur->next = tok2;
  return head.next;
}
```

1. 代码中用很多对两个token list的操作，往往都是第一个参数不变，而是重新拷贝一份，第二个参数追加上去，返回 （新拷贝的1 + 老的2）

2. `cur = cur->next = copy_token(tok1);` 赋值小技巧

------------------------
------------------------

## parse.c

```c
// This file contains a recursive descent parser for C.
//
// Most functions in this file are named after the symbols they are
// supposed to read from an input token list. For example, stmt() is
// responsible for reading a statement from a token list. The function
// then construct an AST node representing a statement.
//
// Each function conceptually returns two values, an AST node and
// remaining part of the input tokens. Since C doesn't support
// multiple return values, the remaining tokens are returned to the
// caller via a pointer argument.
//
// Input tokens are represented by a linked list. Unlike many recursive
// descent parsers, we don't have the notion of the "input token stream".
// Most parsing functions don't change the global state of the parser.
// So it is very easy to lookahead arbitrary number of tokens in this
// parser.
```

整体逻辑比较简单。就是递归下降去parse所有的token构建AST（难点在于把所有语句的文法写清楚）。当然内部有些技巧

---------------------------------------

### 复习下编译原理的一些概念

![](resources/04.png)
![](resources/05.png)
![](resources/06.png)

1. LL 剖析，就是从左向右输入，从左向有进行替换。上例中，当看到第一个是`(`后，就进行2替换。第二个括号，继续进行2替换。后面都是3替换

2. 从替换过程可以看到，是自顶向下的构建

![](resources/07.png)

### 左递归

![](resources/08.png)
![](resources/09.png)
![](resources/10.png)


## [Paull's Algorithm](resources/removing_left_recursion_from_context_free_grammars.pdf)

![](resources/11.png)
![](resources/12.png)
![](resources/13.png)

1. 对于直接左递归，$A \rightarrow A\alpha_1|\beta_1$，所有左递归的production,最终的展开必然是非左递归的某一个，也就是其中的$\beta_1$。而左递归production中的$\alpha_1$，则可以重复多次。因此将$A \rightarrow \beta_1A'，A' \rightarrow \alpha_1A'$，得到了和之前一样的语义

2. 对于间接左递归，将所有nonterminals排序，只允许从前往后的展开，不允许从后往前。例如$i>j, A_i \rightarrow A_j\alpha$就需要移除。而$A_j$在前面已经处理过了，它所有展开一定只包括$A_k, k<j$。因此消除的时候，只要把$A_j$替换成所有其可能的展开即可

3. 替换左递归后往往会导致结合律发生变化，前面wiki中也提到了几种方法。如果手写praser，最简单的就是在构造语法树的时候做特殊处理，重新安排顺序，例如龙书中的这个例子

![](resources/14.png)

------------------

### 代码笔记


```c
// program = (typedef | function-definition | global-variable)*
Obj *parse(Token *tok) {
  declare_builtin_functions();
  globals = NULL;

  while (tok->kind != TK_EOF) {
    VarAttr attr = {};
    Type *basety = declspec(&tok, tok, &attr);

    // Typedef
    if (attr.is_typedef) {
      tok = parse_typedef(tok, basety);
      continue;
    }

    // Function
    if (is_function(tok)) {
      tok = function(tok, basety, &attr);
      continue;
    }

    // Global variable
    tok = global_variable(tok, basety, &attr);
  }

  for (Obj *var = globals; var; var = var->next)
    if (var->is_root)
      mark_live(var);

  // Remove redundant tentative definitions.
  scan_globals();
  return globals;
}
```

1. 最顶层的parse，因为像#include，#define等preprocess的语句，已经在preprocess过程中转换成普通token了。所以在这里只有这三种情况：`typedef, function-definition, global-variable`

-------------------

---------------------
---------------------

## codegen.c

### 1. static void gen_addr(Node *node) 

```c
// Compute the absolute address of a given node.
// It's an error if a given node does not reside in memory.
```


1. 对于局部变量，因为是在stack上，所以直接用rbp + offset即可。`lea` 指令取地址
   ```c
       // Local variable
    if (node->var->is_local) {
      println("  lea %d(%%rbp), %%rax", node->var->offset);
      return;
    }
   ```

2. 对于局部数组，因为传入的变量就是地址，所以直接`mov rbp + offset`
   ```c
       // Variable-length array, which is always local.
    if (node->var->ty->kind == TY_VLA) {
      println("  mov %d(%%rbp), %%rax", node->var->offset);
      return;
    }
   ```

3. 对于全局变量，在运行时地址位置不固定，采用RIP-relative addressing。如果是是来至shared object的func,则采用GOT + RIP的方式
   ```c
       // Here, we generate an absolute address of a function or a global
    // variable. Even though they exist at a certain address at runtime,
    // their addresses are not known at link-time for the following
    // two reasons.
    //
    //  - Address randomization: Executables are loaded to memory as a
    //    whole but it is not known what address they are loaded to.
    //    Therefore, at link-time, relative address in the same
    //    exectuable (i.e. the distance between two functions in the
    //    same executable) is known, but the absolute address is not
    //    known.
    //
    //  - Dynamic linking: Dynamic shared objects (DSOs) or .so files
    //    are loaded to memory alongside an executable at runtime and
    //    linked by the runtime loader in memory. We know nothing
    //    about addresses of global stuff that may be defined by DSOs
    //    until the runtime relocation is complete.
    //
    // In order to deal with the former case, we use RIP-relative
    // addressing, denoted by `(%rip)`. For the latter, we obtain an
    // address of a stuff that may be in a shared object file from the
    // Global Offset Table using `@GOTPCREL(%rip)` notation.

    // Function
    if (node->ty->kind == TY_FUNC) {
      if (node->var->is_definition)
        println("  lea %s(%%rip), %%rax", node->var->name);
      else
        println("  mov %s@GOTPCREL(%%rip), %%rax", node->var->name);
      return;
    }

    // Global variable
    println("  lea %s(%%rip), %%rax", node->var->name);
    return;
   ```

![](resources/15.png)
![](resources/16.png)

1. 编译器生成代码的时候，生成的是这个sym和下一条指令的相对offset


![](resources/17.png)





-------------------------------------------------------------------------

### 2. static void gen_expr(Node *node)

1. 这个函数生成各种表达式的汇编代码，同时将最终的表达式结果放到 %eax 中（如果表达式的结果是个struct/union之类的，则将其地址放在 %eax 中），如果结果是float/double类型，侧将结果存在 %xmm0 中
2. 后续的操作通过 load/store 函数，在寄存器和内存间转移

    **store**函数如下，栈顶存放 目标地址。对于struct/union，用多条mov指令，一个字节一个字节mov过去。而其他类型直接mov过去。
```c
// Store %rax to an address that the stack top is pointing to.
static void store(Type *ty) {
  pop("%rdi");

  switch (ty->kind) {
  case TY_STRUCT:
  case TY_UNION:
    for (int i = 0; i < ty->size; i++) {
      println("  mov %d(%%rax), %%r8b", i);
      println("  mov %%r8b, %d(%%rdi)", i);
    }
    return;
  case TY_FLOAT:
    println("  movss %%xmm0, (%%rdi)");
    return;
  case TY_DOUBLE:
    println("  movsd %%xmm0, (%%rdi)");
    return;
  case TY_LDOUBLE:
    println("  fstpt (%%rdi)");
    return;
  }

  if (ty->size == 1)
    println("  mov %%al, (%%rdi)");
  else if (ty->size == 2)
    println("  mov %%ax, (%%rdi)");
  else if (ty->size == 4)
    println("  mov %%eax, (%%rdi)");
  else
    println("  mov %%rax, (%%rdi)");
}
```

  **load** 函数如下，作用是将 %rax 指针指向位置的值load到 %rax中

```c
// Load a value from where %rax is pointing to.
static void load(Type *ty) {
  switch (ty->kind) {
  case TY_ARRAY:
  case TY_STRUCT:
  case TY_UNION:
  case TY_FUNC:
  case TY_VLA:
    // If it is an array, do not attempt to load a value to the
    // register because in general we can't load an entire array to a
    // register. As a result, the result of an evaluation of an array
    // becomes not the array itself but the address of the array.
    // This is where "array is automatically converted to a pointer to
    // the first element of the array in C" occurs.
    return;
  case TY_FLOAT:
    println("  movss (%%rax), %%xmm0");
    return;
  case TY_DOUBLE:
    println("  movsd (%%rax), %%xmm0");
    return;
  case TY_LDOUBLE:
    println("  fldt (%%rax)");
    return;
  }

  char *insn = ty->is_unsigned ? "movz" : "movs";

  // When we load a char or a short value to a register, we always
  // extend them to the size of int, so we can assume the lower half of
  // a register always contains a valid value. The upper half of a
  // register for char, short and int may contain garbage. When we load
  // a long value to a register, it simply occupies the entire register.
  if (ty->size == 1)
    println("  %sbl (%%rax), %%eax", insn);
  else if (ty->size == 2)
    println("  %swl (%%rax), %%eax", insn);
  else if (ty->size == 4)
    println("  movsxd (%%rax), %%rax");
  else
    println("  mov (%%rax), %%rax");
}
```

----------------------------

### Function Call 原理

函数调用是这里面最麻烦的一部分

**传参约定**

1. 6个以内的整型，8个以内的浮点，都通过寄存器传参。超出的部分通过stack传参
2. 在stack上的每个参数，必须满足8字节对齐地址。而末尾要是16字节对齐
3. 如果是动态个数的参数，则要把float类型的参数个数存到 %rax 中
```c
// Load function call arguments. Arguments are already evaluated and
// stored to the stack as local variables. What we need to do in this
// function is to load them to registers or push them to the stack as
// specified by the x86-64 psABI. Here is what the spec says:
//
// - Up to 6 arguments of integral type are passed using RDI, RSI,
//   RDX, RCX, R8 and R9.
//
// - Up to 8 arguments of floating-point type are passed using XMM0 to
//   XMM7.
//
// - If all registers of an appropriate type are already used, push an
//   argument to the stack in the right-to-left order.
//
// - Each argument passed on the stack takes 8 bytes, and the end of
//   the argument area must be aligned to a 16 byte boundary.
//
// - If a function is variadic, set the number of floating-point type
//   arguments to RAX.
```

4. 
  * 在caller中，传递的参数有两类，第一类是存在寄存器中的。第二类是存在stack中的（注意，这个stack是caller的stack）
  * 在callee中，对于第一类寄存器中的参数，必须复制到自己的stack中，作为自己的local变量。而对于第二类参数，其直接使用即可，因为其已经存在内存中（caller的stack中），callee是直接可以访问的



**返回值约定**

1. 如果返回值size 16byte 以内，则通过寄存器返回(%rax or %xmm0)
2. 如果返回值是struct/union类型，如何返回是没有统一规定的。各个编译器实现的方法不同。这里的实现方式是 
  * 在caller中，函数的返回值也是其一个局部变量，caller将这个局部变量的*地址*看作被调用函数的第一个参数传过去
  * 在callee中，如果返回值是一个struct/union，那么第一个参数就看作是返回值的地址。在return的时候将结果拷贝到对应的位置




**内存布局和调用过程**

以下面为例

```c
void caller(){
  callee(1,2,3,4,5,6,7);
RET_ADDR:
  return;
}

struct A callee(int a1, int a2, int a3, int a4, int a5, int a6, int a7){
  int b1,b2;
  return A{};
}
```

1. 在caller中，因为callee的返回值是个struct，因此需要在自己的stack中保留一段sizeof(A)的局部变量空间作为返回值存储的地方。将其地址记作a。同时，这个地址要作为*第一个参数*传递过去，因此将其保存到 %rdi 中
   
2. 因为其参数个数是7个，所以前5个用寄存器传输，a6,a7则放到自己的stack上（先push a7，再push a6），此时寄存器和内存如下

        ```
        RDI = a 
        RSI = 1, RDX = 2, RCX = 3, R8 = 4, R9 = 5


        ---- <= caller RBP
        ...
        7
        6
            <= caller RSP  
        ```

3. caller 在这个时候调用 call 指令，call指令会将return address(也就是RET_ADDR)处的指令地址push到stack中，然后jump到callee的指令处开始执行。此时的结果如下

        ```
        RDI = a 
        RSI = 1, RDX = 2, RCX = 3, R8 = 4, R9 = 5


        ---- <= caller RBP
        ...
        7
        6
        RET_ADDR
            <= caller RSP  
        ```

4. callee函数在执行前，会先将caller函数的RBP push 到 stack中，同时将现在的RSP作为自己的RBP。同时将caller函数通过寄存器传过来的参数拷贝到自己的stack中，同时还有自己的局部变量。结果如下
        ```
        RDI = a 
        RSI = 1, RDX = 2, RCX = 3, R8 = 4, R9 = 5


        ----    <= caller RBP
        ...
        7
        6
        RET_ADDR
        caller_RBP
        ----    <= callee RBP
        a //返回值地址
        1
        2
        3
        4
        5
        b1
        b2
                <= callee RSP
        ```   

5. callee 函数执行完，return的时候，因为return的是struct，所以将返回结果memcopy到a所指向的地址，然后将自己的stack全部弹出。还原caller RBP。结果如下
        ```
        RDI = a 
        RSI = 1, RDX = 2, RCX = 3, R8 = 4, R9 = 5


        ----    <= caller RBP
        ...
        7
        6
                <= caller RSP
        ```     

--------------------------------------------------  

### Function Call 代码实现

#### static int push_args(Node *node)

```c
static void push_args2(Node *args, bool first_pass) {
  if (!args)
    return;
  push_args2(args->next, first_pass);

  if ((first_pass && !args->pass_by_stack) || (!first_pass && args->pass_by_stack))
    return;

  gen_expr(args);

  switch (args->ty->kind) {
  case TY_STRUCT:
  case TY_UNION:
    push_struct(args->ty);
    break;
  case TY_FLOAT:
  case TY_DOUBLE:
    pushf();
    break;
  case TY_LDOUBLE:
    println("  sub $16, %%rsp");
    println("  fstpt (%%rsp)");
    depth += 2;
    break;
  default:
    push();
  }
}


// Load function call arguments. Arguments are already evaluated and
// stored to the stack as local variables. What we need to do in this
// function is to load them to registers or push them to the stack as
// specified by the x86-64 psABI. Here is what the spec says:
//
// - Up to 6 arguments of integral type are passed using RDI, RSI,
//   RDX, RCX, R8 and R9.
//
// - Up to 8 arguments of floating-point type are passed using XMM0 to
//   XMM7.
//
// - If all registers of an appropriate type are already used, push an
//   argument to the stack in the right-to-left order.
//
// - Each argument passed on the stack takes 8 bytes, and the end of
//   the argument area must be aligned to a 16 byte boundary.
//
// - If a function is variadic, set the number of floating-point type
//   arguments to RAX.
static int push_args(Node *node) {
  int stack = 0, gp = 0, fp = 0;

  // If the return type is a large struct/union, the caller passes
  // a pointer to a buffer as if it were the first argument.
  if (node->ret_buffer && node->ty->size > 16)
    gp++;

  // Load as many arguments to the registers as possible.
  for (Node *arg = node->args; arg; arg = arg->next) {
    Type *ty = arg->ty;

    switch (ty->kind) {
    case TY_STRUCT:
    case TY_UNION:
      if (ty->size > 16) {
        arg->pass_by_stack = true;
        stack += align_to(ty->size, 8) / 8;
      } else {
        bool fp1 = has_flonum1(ty);
        bool fp2 = has_flonum2(ty);

        if (fp + fp1 + fp2 < FP_MAX && gp + !fp1 + !fp2 < GP_MAX) {
          fp = fp + fp1 + fp2;
          gp = gp + !fp1 + !fp2;
        } else {
          arg->pass_by_stack = true;
          stack += align_to(ty->size, 8) / 8;
        }
      }
      break;
    case TY_FLOAT:
    case TY_DOUBLE:
      if (fp++ >= FP_MAX) {
        arg->pass_by_stack = true;
        stack++;
      }
      break;
    case TY_LDOUBLE:
      arg->pass_by_stack = true;
      stack += 2;
      break;
    default:
      if (gp++ >= GP_MAX) {
        arg->pass_by_stack = true;
        stack++;
      }
    }
  }

  if ((depth + stack) % 2 == 1) {
    println("  sub $8, %%rsp");
    depth++;
    stack++;
  }

  push_args2(node->args, true);
  push_args2(node->args, false);

  // If the return type is a large struct/union, the caller passes
  // a pointer to a buffer as if it were the first argument.
  if (node->ret_buffer && node->ty->size > 16) {
    println("  lea %d(%%rbp), %%rax", node->ret_buffer->offset);
    push();
  }

  return stack;
}
```

1. 这两个函数是生成caller里面执行的代码。作用是将所有的传递参数都push到自己的stack中，同时将`arg->pass_by_stack`进行赋值。
2. 注意，这里其将所有的参数都先push到了自己的栈中，但是分了两步（也就是里面的参数 first=true/false），第一步，先把pass_by_stack的参数push到stack中。第二步，将用寄存器传递的参数push进stack中（这部分参数会在下一步pop进寄存器）


----------------------------------
   
#### case ND_FUNCALL

这一步生成的代码也是在caller中运行的

```c
  case ND_FUNCALL: {
    if (node->lhs->kind == ND_VAR && !strcmp(node->lhs->var->name, "alloca")) {
      gen_expr(node->args);
      println("  mov %%rax, %%rdi");
      builtin_alloca();
      return;
    }

    int stack_args = push_args(node);

    /* xitongsys

      node 是function call，他的lhs就是func的地址
      这一步就把要调用的function的地址放到了rax里面

    */
    gen_expr(node->lhs);

    int gp = 0, fp = 0;

    // If the return type is a large struct/union, the caller passes
    // a pointer to a buffer as if it were the first argument.
    if (node->ret_buffer && node->ty->size > 16)
      pop(argreg64[gp++]);

    for (Node *arg = node->args; arg; arg = arg->next) {
      Type *ty = arg->ty;

      switch (ty->kind) {
      case TY_STRUCT:
      case TY_UNION:
        if (ty->size > 16)
          continue;

        bool fp1 = has_flonum1(ty);
        bool fp2 = has_flonum2(ty);

        if (fp + fp1 + fp2 < FP_MAX && gp + !fp1 + !fp2 < GP_MAX) {
          if (fp1)
            popf(fp++);
          else
            pop(argreg64[gp++]);

          if (ty->size > 8) {
            if (fp2)
              popf(fp++);
            else
              pop(argreg64[gp++]);
          }
        }
        break;
      case TY_FLOAT:
      case TY_DOUBLE:
        if (fp < FP_MAX)
          popf(fp++);
        break;
      case TY_LDOUBLE:
        break;
      default:
        if (gp < GP_MAX)
          pop(argreg64[gp++]);
      }
    }

    /*
     xitongsys

     rax 里面存的是要调用的function的地址，将其复制到 r10中
     因为rax需要存放有多少个float类型的变量，也就是fp
     call 调用 r10 中存放的函数
    */

    println("  mov %%rax, %%r10");
    println("  mov $%d, %%rax", fp);
    println("  call *%%r10");
    println("  add $%d, %%rsp", stack_args * 8);

    depth -= stack_args;

    // It looks like the most significant 48 or 56 bits in RAX may
    // contain garbage if a function return type is short or bool/char,
    // respectively. We clear the upper bits here.
    switch (node->ty->kind) {
    case TY_BOOL:
      println("  movzx %%al, %%eax");
      return;
    case TY_CHAR:
      if (node->ty->is_unsigned)
        println("  movzbl %%al, %%eax");
      else
        println("  movsbl %%al, %%eax");
      return;
    case TY_SHORT:
      if (node->ty->is_unsigned)
        println("  movzwl %%ax, %%eax");
      else
        println("  movswl %%ax, %%eax");
      return;
    }

    // If the return type is a small struct, a value is returned
    // using up to two registers.
    if (node->ret_buffer && node->ty->size <= 16) {
      copy_ret_buffer(node->ret_buffer);
      println("  lea %d(%%rbp), %%rax", node->ret_buffer->offset);
    }

    return;
  }
```

1. 在上一步中，已经把所有传入参数push到了caller的stack中，需要用寄存器传递的参数在栈顶。在这一步中，如果这个参数是从寄存器传递，则从stack中pop到寄存器里，直到寄存器都满了，则剩下的留在stack里

2. see comments in codes

---------------------------------

#### emit_text

callee 的代码生成在这个函数里

```c
static void emit_text(Obj *prog) {
  for (Obj *fn = prog; fn; fn = fn->next) {
    if (!fn->is_function || !fn->is_definition)
      continue;

    // No code is emitted for "static inline" functions
    // if no one is referencing them.
    if (!fn->is_live)
      continue;

    if (fn->is_static)
      println("  .local %s", fn->name);
    else
      println("  .globl %s", fn->name);

    println("  .text");
    println("  .type %s, @function", fn->name);
    println("%s:", fn->name);
    current_fn = fn;


    /* xitongsys
     callee来保存caller的rbp信息，同时调整为自己的stack
    */

    // Prologue
    println("  push %%rbp");
    println("  mov %%rsp, %%rbp");
    println("  sub $%d, %%rsp", fn->stack_size);
    println("  mov %%rsp, %d(%%rbp)", fn->alloca_bottom->offset);

    // Save arg registers if function is variadic
    if (fn->va_area) {
      int gp = 0, fp = 0;
      for (Obj *var = fn->params; var; var = var->next) {
        if (is_flonum(var->ty))
          fp++;
        else
          gp++;
      }

      int off = fn->va_area->offset;

      // va_elem
      println("  movl $%d, %d(%%rbp)", gp * 8, off);          // gp_offset
      println("  movl $%d, %d(%%rbp)", fp * 8 + 48, off + 4); // fp_offset
      println("  movq %%rbp, %d(%%rbp)", off + 8);            // overflow_arg_area
      println("  addq $16, %d(%%rbp)", off + 8);
      println("  movq %%rbp, %d(%%rbp)", off + 16);           // reg_save_area
      println("  addq $%d, %d(%%rbp)", off + 24, off + 16);

      // __reg_save_area__
      println("  movq %%rdi, %d(%%rbp)", off + 24);
      println("  movq %%rsi, %d(%%rbp)", off + 32);
      println("  movq %%rdx, %d(%%rbp)", off + 40);
      println("  movq %%rcx, %d(%%rbp)", off + 48);
      println("  movq %%r8, %d(%%rbp)", off + 56);
      println("  movq %%r9, %d(%%rbp)", off + 64);
      println("  movsd %%xmm0, %d(%%rbp)", off + 72);
      println("  movsd %%xmm1, %d(%%rbp)", off + 80);
      println("  movsd %%xmm2, %d(%%rbp)", off + 88);
      println("  movsd %%xmm3, %d(%%rbp)", off + 96);
      println("  movsd %%xmm4, %d(%%rbp)", off + 104);
      println("  movsd %%xmm5, %d(%%rbp)", off + 112);
      println("  movsd %%xmm6, %d(%%rbp)", off + 120);
      println("  movsd %%xmm7, %d(%%rbp)", off + 128);
    }

    // Save passed-by-register arguments to the stack
    int gp = 0, fp = 0;
    for (Obj *var = fn->params; var; var = var->next) {
      if (var->offset > 0)
        continue;

      Type *ty = var->ty;

      switch (ty->kind) {
      case TY_STRUCT:
      case TY_UNION:
        assert(ty->size <= 16);
        if (has_flonum(ty, 0, 8, 0))
          store_fp(fp++, var->offset, MIN(8, ty->size));
        else
          store_gp(gp++, var->offset, MIN(8, ty->size));

        if (ty->size > 8) {
          if (has_flonum(ty, 8, 16, 0))
            store_fp(fp++, var->offset + 8, ty->size - 8);
          else
            store_gp(gp++, var->offset + 8, ty->size - 8);
        }
        break;
      case TY_FLOAT:
      case TY_DOUBLE:
        store_fp(fp++, var->offset, ty->size);
        break;
      default:
        store_gp(gp++, var->offset, ty->size);
      }
    }

    // Emit code
    gen_stmt(fn->body);
    assert(depth == 0);

    // [https://www.sigbus.info/n1570#5.1.2.2.3p1] The C spec defines
    // a special rule for the main function. Reaching the end of the
    // main function is equivalent to returning 0, even though the
    // behavior is undefined for the other functions.
    if (strcmp(fn->name, "main") == 0)
      println("  mov $0, %%rax");



    /* xitongsys
      函数的return 部分。.L.return.fn->name 做个label，goto的时候直接用
      这一部分就是恢复caller的rbp,rsp信息，这时候栈顶就是return address

      ret指令做的就是弹出return address，同时返回到这个位置继续执行
    */


    // Epilogue
    println(".L.return.%s:", fn->name);
    println("  mov %%rbp, %%rsp");
    println("  pop %%rbp");
    println("  ret");
  }
}
```

1. see comments in codes

--------------------------------------------

#### static void assign_lvar_offsets(Obj *prog)

这个函数就是计算每个函数中的local变量的相对rbp偏移的地址。

1. 其中top的部分是正向偏移，也就是caller的stack中的内容。包括一些传递的参数，return address，old rbp。之所以top从16开始，就是因为最后两个8字节是用来存放return address和caller rbp的
2. bottom部分是通过寄存器传递的参数以及局部变量

```c
// Assign offsets to local variables.
static void assign_lvar_offsets(Obj *prog) {
  for (Obj *fn = prog; fn; fn = fn->next) {
    if (!fn->is_function)
      continue;

    // If a function has many parameters, some parameters are
    // inevitably passed by stack rather than by register.
    // The first passed-by-stack parameter resides at RBP+16.
    int top = 16;
    int bottom = 0;

    int gp = 0, fp = 0;

    // Assign offsets to pass-by-stack parameters.
    for (Obj *var = fn->params; var; var = var->next) {
      Type *ty = var->ty;

      switch (ty->kind) {
      case TY_STRUCT:
      case TY_UNION:
        if (ty->size <= 16) {
          bool fp1 = has_flonum(ty, 0, 8, 0);
          bool fp2 = has_flonum(ty, 8, 16, 8);
          if (fp + fp1 + fp2 < FP_MAX && gp + !fp1 + !fp2 < GP_MAX) {
            fp = fp + fp1 + fp2;
            gp = gp + !fp1 + !fp2;
            continue;
          }
        }
        break;
      case TY_FLOAT:
      case TY_DOUBLE:
        if (fp++ < FP_MAX)
          continue;
        break;
      case TY_LDOUBLE:
        break;
      default:
        if (gp++ < GP_MAX)
          continue;
      }

      top = align_to(top, 8);
      var->offset = top;
      top += var->ty->size;
    }

    // Assign offsets to pass-by-register parameters and local variables.
    for (Obj *var = fn->locals; var; var = var->next) {
      if (var->offset)
        continue;

      // AMD64 System V ABI has a special alignment rule for an array of
      // length at least 16 bytes. We need to align such array to at least
      // 16-byte boundaries. See p.14 of
      // https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-draft.pdf.
      int align = (var->ty->kind == TY_ARRAY && var->ty->size >= 16)
        ? MAX(16, var->align) : var->align;

      bottom += var->ty->size;
      bottom = align_to(bottom, align);
      var->offset = -bottom;
    }

    fn->stack_size = align_to(bottom, 16);
  }
}
```

---------------------
----------------------

## some functions

```c
/* Open a stream that writes into a malloc'd buffer that is expanded as
   necessary.  *BUFLOC and *SIZELOC are updated with the buffer's location
   and the number of characters written on fflush or fclose.  */
extern FILE *open_memstream (char **__bufloc, size_t *__sizeloc) __THROW
  __attribute_malloc__ __attr_dealloc_fclose __wur;
```

------------------------

![](resources/03.png)

---------------------------