Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Node.js源码-一个node程序是如何运行的 #7

Open
tsy77 opened this issue Jul 22, 2018 · 0 comments
Open

Node.js源码-一个node程序是如何运行的 #7

tsy77 opened this issue Jul 22, 2018 · 0 comments

Comments

@tsy77
Copy link
Owner

tsy77 commented Jul 22, 2018

本文从node入口出发,一步一步的阅读源码,直到运行结束。

node入口

node的入口是node/src/node_main.cc文件,main函数代码如下:

int main(int argc, char *argv[]) {
#if defined(__POSIX__) && defined(NODE_SHARED_MODE)
  // In node::PlatformInit(), we squash all signal handlers for non-shared lib
  // build. In order to run test cases against shared lib build, we also need
  // to do the same thing for shared lib build here, but only for SIGPIPE for
  // now. If node::PlatformInit() is moved to here, then this section could be
  // removed.
  // socket一端clode的情况下,进程第二次write会触发操作系统给进程发送SIGPIPE信号,默认处理操作是关闭进程
  // SIG_IGN作为处理函数,将忽略该信号
  {
    struct sigaction act;
    memset(&act, 0, sizeof(act));
    act.sa_handler = SIG_IGN;
    sigaction(SIGPIPE, &act, nullptr);
  }
#endif

#if defined(__linux__)
  char** envp = environ;
  while (*envp++ != nullptr) {}
  Elf_auxv_t* auxv = reinterpret_cast<Elf_auxv_t*>(envp);
  for (; auxv->a_type != AT_NULL; auxv++) {
    if (auxv->a_type == AT_SECURE) {
      node::linux_at_secure = auxv->a_un.a_val;
      break;
    }
  }
#endif
  // Disable stdio buffering, it interacts poorly with printf()
  // calls elsewhere in the program (e.g., any logging from V8.)
  setvbuf(stdout, nullptr, _IONBF, 0);
  setvbuf(stderr, nullptr, _IONBF, 0);
  return node::Start(argc, argv);
}
#endif

这里主要做了三件事:

1.屏蔽SIGPIPE信号(具体可看代码注释)
2.定义node::linux_at_secure,这里是根据linux中的Elf32_auxv_t(动态链接器所需的辅助信息)
3.node::Start(argc, argv)

node::Start执行流程

node::Start代码如下:

int Start(int argc, char** argv) {
  atexit([] () { uv_tty_reset_mode(); });
  PlatformInit();
  performance::performance_node_start = PERFORMANCE_NOW();

  CHECK_GT(argc, 0);

  // Hack around with the argv pointer. Used for process.title = "blah".
  argv = uv_setup_args(argc, argv);

  // This needs to run *before* V8::Initialize().  The const_cast is not
  // optional, in case you're wondering.
  int exec_argc;
  const char** exec_argv;
  Init(&argc, const_cast<const char**>(argv), &exec_argc, &exec_argv);

#if HAVE_OPENSSL
  {
    std::string extra_ca_certs;
    if (SafeGetenv("NODE_EXTRA_CA_CERTS", &extra_ca_certs))
      crypto::UseExtraCaCerts(extra_ca_certs);
  }
#ifdef NODE_FIPS_MODE
  // In the case of FIPS builds we should make sure
  // the random source is properly initialized first.
  OPENSSL_init();
#endif  // NODE_FIPS_MODE
  // V8 on Windows doesn't have a good source of entropy. Seed it from
  // OpenSSL's pool.
  V8::SetEntropySource(crypto::EntropySource);
#endif  // HAVE_OPENSSL

  v8_platform.Initialize(v8_thread_pool_size);
  // Enable tracing when argv has --trace-events-enabled.
  v8_platform.StartTracingAgent();
  V8::Initialize();
  performance::performance_v8_start = PERFORMANCE_NOW();
  v8_initialized = true;
  const int exit_code =
      Start(uv_default_loop(), argc, argv, exec_argc, exec_argv);
  v8_platform.StopTracingAgent();
  v8_initialized = false;
  V8::Dispose();

  // uv_run cannot be called from the time before the beforeExit callback
  // runs until the program exits unless the event loop has any referenced
  // handles after beforeExit terminates. This prevents unrefed timers
  // that happen to terminate during shutdown from being run unsafely.
  // Since uv_run cannot be called, uv_async handles held by the platform
  // will never be fully cleaned up.
  v8_platform.Dispose();

  delete[] exec_argv;
  exec_argv = nullptr;

  return exit_code;
}

1.PlatformInit

inline void PlatformInit() {
#ifdef __POSIX__
#if HAVE_INSPECTOR
  // 信号集,描述信号的集合
  // 每个信号占用一位(64位)
  sigset_t sigmask;
  sigemptyset(&sigmask);
  sigaddset(&sigmask, SIGUSR1);
  // 屏蔽了除SIGUSR1外的所有信号
  // 一般按照sigdelset(&set, SIGALRM);pthread_sigmask(SIG_SETMASK, &set, NULL);方式使用
  const int err = pthread_sigmask(SIG_SETMASK, &sigmask, nullptr);
#endif  // HAVE_INSPECTOR

  // Make sure file descriptors 0-2 are valid before we start logging anything.
  for (int fd = STDIN_FILENO; fd <= STDERR_FILENO; fd += 1) {
    struct stat ignored;
    if (fstat(fd, &ignored) == 0)
      continue;
    // Anything but EBADF means something is seriously wrong.  We don't
    // have to special-case EINTR, fstat() is not interruptible.
    if (errno != EBADF)
      ABORT();
    if (fd != open("/dev/null", O_RDWR))
      ABORT();
  }

#if HAVE_INSPECTOR
  CHECK_EQ(err, 0);
#endif  // HAVE_INSPECTOR

#ifndef NODE_SHARED_MODE
  // Restore signal dispositions, the parent process may have changed them.
  struct sigaction act;
  memset(&act, 0, sizeof(act));

  // The hard-coded upper limit is because NSIG is not very reliable; on Linux,
  // it evaluates to 32, 34 or 64, depending on whether RT signals are enabled.
  // Counting up to SIGRTMIN doesn't work for the same reason.
  // 跟main中一样,忽略SIGPIPE信号
  // sigaction与pthread_sigmask区别在于线程中调用signal或者sigaction等函数会改变所有线程中的信号处理函数
  for (unsigned nr = 1; nr < kMaxSignal; nr += 1) {
    if (nr == SIGKILL || nr == SIGSTOP)
      continue;
    act.sa_handler = (nr == SIGPIPE) ? SIG_IGN : SIG_DFL;
    CHECK_EQ(0, sigaction(nr, &act, nullptr));
  }
#endif  // !NODE_SHARED_MODE

  RegisterSignalHandler(SIGINT, SignalExit, true);
  RegisterSignalHandler(SIGTERM, SignalExit, true);

  // Raise the open file descriptor limit.
  // 提高进程打开文件数量
  struct rlimit lim;
  if (getrlimit(RLIMIT_NOFILE, &lim) == 0 && lim.rlim_cur != lim.rlim_max) {
    // Do a binary search for the limit.
    rlim_t min = lim.rlim_cur;
    rlim_t max = 1 << 20;
    // But if there's a defined upper bound, don't search, just set it.
    if (lim.rlim_max != RLIM_INFINITY) {
      min = lim.rlim_max;
      max = lim.rlim_max;
    }
    do {
      lim.rlim_cur = min + (max - min) / 2;
      if (setrlimit(RLIMIT_NOFILE, &lim)) {
        max = lim.rlim_cur;
      } else {
        min = lim.rlim_cur;
      }
    } while (min + 1 < max);
  }
#endif  // __POSIX__
#ifdef _WIN32
  for (int fd = 0; fd <= 2; ++fd) {
    auto handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
    if (handle == INVALID_HANDLE_VALUE ||
        GetFileType(handle) == FILE_TYPE_UNKNOWN) {
      // Ignore _close result. If it fails or not depends on used Windows
      // version. We will just check _open result.
      _close(fd);
      if (fd != _open("nul", _O_RDWR))
        ABORT();
    }
  }
#endif  // _WIN32
}

主要以下几件事:

1.利用pthread_sigmask阻塞了线程除SIGUSR1外的所有信号
2.利用STDIN_FILENO、STDERR_FILENO,确定标准输入、输出的文件描述符可用,已备后面去打log
3.对非共享库做信号处理,忽略SIGPIPE信号,跟上述node_main中对共享库做的操作一样
4.利用sigaction注册信号SIGINT、SIGTERM处理函数,当然处理函数是exit
5.提高进程打开文件数量

下面我将挑一些重点的点来讲解。

pthread_sigmask sigaction

pthread_sigmask用来设置线程的信号屏蔽集,注意这里是线程自己的;sigaction用来安装信号的处理函数,这里操作的进程的,进程中所有线程会共享这个出个处理函数。也就是说线程可以有自己的信号屏蔽集,但是处理函数是进程中所有线程共享的。

提高进程打开文件描述符数量

依据上述代码,我们发现其使用的是setrlimit方法,当rlimit中有max属性时,直接setrlimit;没有max属性时,从lim.rlim_cur到2的19次方之间指数递增。

2.uv_setup_args(argc, argv)

其实就是复制一份argv,返回new_argv,给process.title用。

3.Init

void Init(int* argc,
          const char** argv,
          int* exec_argc,
          const char*** exec_argv) {
  // Initialize prog_start_time to get relative uptime.
  prog_start_time = static_cast<double>(uv_now(uv_default_loop()));

  // Register built-in modules
  // 注册内置模块
  RegisterBuiltinModules();

  // Make inherited handles noninheritable.
  // disable掉继承过来的handle
  uv_disable_stdio_inheritance();

#if defined(NODE_V8_OPTIONS)
  // Should come before the call to V8::SetFlagsFromCommandLine()
  // so the user can disable a flag --foo at run-time by passing
  // --no_foo from the command line.
  // 设置v8虚拟机启动的命令行标志
  V8::SetFlagsFromString(NODE_V8_OPTIONS, sizeof(NODE_V8_OPTIONS) - 1);
#endif

  // 从环境变量中获取各种参数
  {
    std::string text;
    config_pending_deprecation =
        SafeGetenv("NODE_PENDING_DEPRECATION", &text) && text[0] == '1';
  }

  // Allow for environment set preserving symlinks.
  {
    std::string text;
    config_preserve_symlinks =
        SafeGetenv("NODE_PRESERVE_SYMLINKS", &text) && text[0] == '1';
  }

  if (config_warning_file.empty())
    SafeGetenv("NODE_REDIRECT_WARNINGS", &config_warning_file);

#if HAVE_OPENSSL
  if (openssl_config.empty())
    SafeGetenv("OPENSSL_CONF", &openssl_config);
#endif

#if !defined(NODE_WITHOUT_NODE_OPTIONS)
  std::string node_options;
  if (SafeGetenv("NODE_OPTIONS", &node_options)) {
    // Smallest tokens are 2-chars (a not space and a space), plus 2 extra
    // pointers, for the prepended executable name, and appended NULL pointer.
    size_t max_len = 2 + (node_options.length() + 1) / 2;
    const char** argv_from_env = new const char*[max_len];
    int argc_from_env = 0;
    // [0] is expected to be the program name, fill it in from the real argv.
    argv_from_env[argc_from_env++] = argv[0];

    char* cstr = strdup(node_options.c_str());
    char* initptr = cstr;
    char* token;
    while ((token = strtok(initptr, " "))) {  // NOLINT(runtime/threadsafe_fn)
      initptr = nullptr;
      argv_from_env[argc_from_env++] = token;
    }
    argv_from_env[argc_from_env] = nullptr;
    int exec_argc_;
    const char** exec_argv_ = nullptr;
    ProcessArgv(&argc_from_env, argv_from_env, &exec_argc_, &exec_argv_, true);
    delete[] exec_argv_;
    delete[] argv_from_env;
    free(cstr);
  }
#endif

  // 获取node和v8的参数
  ProcessArgv(argc, argv, exec_argc, exec_argv);

#if defined(NODE_HAVE_I18N_SUPPORT)
  // If the parameter isn't given, use the env variable.
  if (icu_data_dir.empty())
    SafeGetenv("NODE_ICU_DATA", &icu_data_dir);
  // Initialize ICU.
  // If icu_data_dir is empty here, it will load the 'minimal' data.
  if (!i18n::InitializeICUDirectory(icu_data_dir)) {
    fprintf(stderr,
            "%s: could not initialize ICU "
            "(check NODE_ICU_DATA or --icu-data-dir parameters)\n",
            argv[0]);
    exit(9);
  }
#endif

  // Needed for access to V8 intrinsics.  Disabled again during bootstrapping,
  // see lib/internal/bootstrap/node.js.
  // 允许用户代码去调用v8的内置函数
  // 调用方式以%开头,谋面大家会看见
  const char allow_natives_syntax[] = "--allow_natives_syntax";
  V8::SetFlagsFromString(allow_natives_syntax,
                         sizeof(allow_natives_syntax) - 1);

  // We should set node_is_initialized here instead of in node::Start,
  // otherwise embedders using node::Init to initialize everything will not be
  // able to set it and native modules will not load for them.
  node_is_initialized = true;
}

Init方法主要做了以下几件事:

1.注册内置模块
2.disable掉继承过来的文件描述符
3.设置v8虚拟机启动的命令行标志
4.利用getenv(),从环境变量中获取各种参数
5.获取node和v8的运行参数exec_argv
6.设置v8标志--allow_natives_syntax

还是挑几个重点讲解一下

RegisterBuiltinModules

注册内置模块,也就是src里的.cc文件。

void RegisterBuiltinModules() {
#define V(modname) _register_##modname();
  NODE_BUILTIN_MODULES(V)
#undef V
}

RegisterBuiltinModules做了两件事:

1.宏定义V
2.调用NODE_BUILTIN_MODULES

NODE_BUILTIN_MODULES也是一个宏定义,定义如下:

#define NODE_BUILTIN_MODULES(V)                                               \
  NODE_BUILTIN_STANDARD_MODULES(V)                                            \
  NODE_BUILTIN_OPENSSL_MODULES(V)                                             \
  NODE_BUILTIN_ICU_MODULES(V)

NODE_BUILTIN_STANDARD_MODULES定义如下:

#define NODE_BUILTIN_STANDARD_MODULES(V)                                      \
    V(async_wrap)                                                             \
    V(buffer)                                                                 \
    V(cares_wrap)                                                             \
	......

也就是注册每个模块,其实调用了_register_##modname()。

_register_##modname()定义如下:

static node::node_module _module = {                                        \
    NODE_MODULE_VERSION,                                                      \
    flags,                                                                    \
    nullptr,                                                                  \
    __FILE__,                                                                 \
    nullptr,                                                                  \
    (node::addon_context_register_func) (regfunc),                            \
    NODE_STRINGIFY(modname),                                                  \
    priv,                                                                     \
    nullptr                                                                   \
  };                                                                          \
  void _register_ ## modname() {                                              \
    node_module_register(&_module);                                           \
  }

node_module_register定义在src/node.cc中,源码如下:

extern "C" void node_module_register(void* m) {
  struct node_module* mp = reinterpret_cast<struct node_module*>(m);

  if (mp->nm_flags & NM_F_BUILTIN) {
    mp->nm_link = modlist_builtin;
    modlist_builtin = mp;
  } else if (mp->nm_flags & NM_F_INTERNAL) {
    mp->nm_link = modlist_internal;
    modlist_internal = mp;
  } else if (!node_is_initialized) {
    // "Linked" modules are included as part of the node project.
    // Like builtins they are registered *before* node::Init runs.
    mp->nm_flags = NM_F_LINKED;
    mp->nm_link = modlist_linked;
    modlist_linked = mp;
  } else {
    modpending = mp;
  }
}

其实就是把上面定义的module加到了modlist_builtin链表里。

uv_disable_stdio_inheritance

void uv_disable_stdio_inheritance(void) {
  int fd;

  /* Set the CLOEXEC flag on all open descriptors. Unconditionally try the
   * first 16 file descriptors. After that, bail out after the first error.
   */
  for (fd = 0; ; fd++)
    if (uv__cloexec(fd, 1) && fd > 15)
      break;
}

其实就是利用了cloexec,在子进程执行时,关闭相应文件描述符。这里多说几句,为什么要这样呢?原因在于当fork子进程时,会将父进程文件描述符及堆栈信息复制到子进程,但当子进程执行时,原有执行栈被重置,原有的文件描述符对应变量也就不见了,所以将无法关闭对应文件描述符。cloexec就是为了解决这个问题的,在子进程执行时,关闭文件描述符。

--allow_natives_syntax

V8通过设置--allow_natives_syntax来允许用户的代码调用v8的内置函数,但调用时要以%开头。

4.判断OPENSSL

#if HAVE_OPENSSL
  {
    std::string extra_ca_certs;
    if (SafeGetenv("NODE_EXTRA_CA_CERTS", &extra_ca_certs))
      crypto::UseExtraCaCerts(extra_ca_certs);
  }

主要判断是否需要openssl,如果需要从NODE_EXTRA_CA_CERTS中取证书。

5.v8_platform.Initialize

void Initialize(int thread_pool_size) {
    tracing_agent_.reset(new tracing::Agent(trace_file_pattern));
    platform_ = new NodePlatform(thread_pool_size,
        tracing_agent_->GetTracingController());
    V8::InitializePlatform(platform_);
    tracing::TraceEventHelper::SetTracingController(
        tracing_agent_->GetTracingController());
  }

主要对V8做了线程池容积的初始化。

6.V8::Initialize();

这里是v8的初始化,定义再src/deps/v8/src/v8.cc中,

bool V8::Initialize() {
  InitializeOncePerProcess();
  return true;
}

InitializeOncePerProcess做了什么呢?

void V8::InitializeOncePerProcess() {
  base::CallOnce(&init_once, &InitializeOncePerProcessImpl);
}

CallOnce

CallOnce顾名思义就是只调用一次,其通过判断init_once是否为ONCE_STATE_DONE来判断是否曾经调用过。

inline void CallOnce(OnceType* once, NoArgFunction init_func) {
  if (Acquire_Load(once) != ONCE_STATE_DONE) {
    CallOnceImpl(once, init_func);
  }
}

其中Acquire_Load为原子性的获取once的值,CallOnceImpl则再其中修改once值,并且执行init_func。

下面我们看下Acquire_Load的定义:

inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
  return __atomic_load_n(ptr, __ATOMIC_ACQUIRE);
}

__atomic_load_n即为原子性的加载ptr指针所指向的内存所存储的变量。

CallOnceImpl代码如下:

if (state == ONCE_STATE_UNINITIALIZED) {
    // We are the first thread to call this function, so we have to call the
    // function.
    init_func();
    Release_Store(once, ONCE_STATE_DONE);

主要做了两件事:

1.执行init_func
2.原子性的设置once的值,表明在该进程中,已经执行过了。

InitializeOncePerProcessImpl

void V8::InitializeOncePerProcessImpl() {
  FlagList::EnforceFlagImplications();

  if (FLAG_predictable && FLAG_random_seed == 0) {
    // Avoid random seeds in predictable mode.
    FLAG_random_seed = 12347;
  }

  if (FLAG_stress_compaction) {
    FLAG_force_marking_deque_overflows = true;
    FLAG_gc_global = true;
    FLAG_max_semi_space_size = 1;
  }

  base::OS::Initialize(FLAG_hard_abort, FLAG_gc_fake_mmap);

  if (FLAG_random_seed) SetRandomMmapSeed(FLAG_random_seed);

  // 初始化线程
  // 创建TLS,thread_table_data等
  Isolate::InitializeOncePerProcess();

#if defined(USE_SIMULATOR)
  Simulator::InitializeOncePerProcess();
#endif
  sampler::Sampler::SetUp();
  CpuFeatures::Probe(false);
  ElementsAccessor::InitializeOncePerProcess();
  ExternalReference::SetUp();
  Bootstrapper::InitializeOncePerProcess();
}

这里主要做了两件事:

1.操作系统相关的初始化
2.初始化线程,创建TLS,thread_table_data等

Isolate::InitializeOncePerProcess

void Isolate::InitializeOncePerProcess() {
  // 管理互斥锁(二元信号量),lock_guard类似智能指针
  // 栈销毁时析构
  // A lock guard is an object that manages a mutex object by keeping it always locked.
  base::LockGuard<base::Mutex> lock_guard(thread_data_table_mutex_.Pointer());
  CHECK_NULL(thread_data_table_);
  // pthread_create_key()
  // 线程局部存储,TLSaloc();
  isolate_key_ = base::Thread::CreateThreadLocalKey();
#if DEBUG
  base::Relaxed_Store(&isolate_key_created_, 1);
#endif
  thread_id_key_ = base::Thread::CreateThreadLocalKey();
  per_isolate_thread_data_key_ = base::Thread::CreateThreadLocalKey();
  // ThreadDataTable为list链表
  thread_data_table_ = new Isolate::ThreadDataTable();
}

主要做了三件事:

1.加互斥锁
2.利用TLSaloc申请线程本地存储
3.创建thread_data_table_链表

7.Start(uv_default_loop(), argc, argv, exec_argc, exec_argv)

inline int Start(uv_loop_t* event_loop,
                 int argc, const char* const* argv,
                 int exec_argc, const char* const* exec_argv) {
  Isolate::CreateParams params;
  // BufferAllocator,node中buffer不会占用V8分配的内存,而是直接从堆中申请
  ArrayBufferAllocator allocator;
  params.array_buffer_allocator = &allocator;
#ifdef NODE_ENABLE_VTUNE_PROFILING
  params.code_event_handler = vTune::GetVtuneCodeEventHandler();
#endif

  Isolate* const isolate = Isolate::New(params);
  if (isolate == nullptr)
    return 12;  // Signal internal error.

  // 给isolate添加监听处理函数,这里监听message级别
  isolate->AddMessageListener(OnMessage);
  isolate->SetAbortOnUncaughtExceptionCallback(ShouldAbortOnUncaughtException);
  isolate->SetMicrotasksPolicy(v8::MicrotasksPolicy::kExplicit);
  isolate->SetFatalErrorHandler(OnFatalError);
  isolate->SetAllowWasmCodeGenerationCallback(AllowWasmCodeGenerationCallback);

  {
    // lock_guard的升级版本
    Mutex::ScopedLock scoped_lock(node_isolate_mutex);
    CHECK_EQ(node_isolate, nullptr);
    node_isolate = isolate;
  }

  int exit_code;
  {
    // 加互斥锁,因为isolate不是线程安全的
    Locker locker(isolate);
    Isolate::Scope isolate_scope(isolate);
    HandleScope handle_scope(isolate);
    IsolateData isolate_data(
        isolate,
        event_loop,
        v8_platform.Platform(),
        allocator.zero_fill_field());
    if (track_heap_objects) {
      isolate->GetHeapProfiler()->StartTrackingHeapObjects(true);
    }
    exit_code = Start(isolate, &isolate_data, argc, argv, exec_argc, exec_argv);
  }

  {
    Mutex::ScopedLock scoped_lock(node_isolate_mutex);
    CHECK_EQ(node_isolate, isolate);
    node_isolate = nullptr;
  }

  isolate->Dispose();

  return exit_code;
}

这里主要做了如下几件事:

1.初始化isolate的params,这里需要注意的是array_buffer_allocator,设置这个分配器是为了分配buffer时使用,node中buffer不会占用V8的内存,而是直接从堆中申请,这也是buffer不受v8内存限制的原因
2.创建Isolate
3.给Isolate添加监听回调
4.Start(isolate, &isolate_data, argc, argv, exec_argc, exec_argv)

array_buffer_allocator

ArrayBufferAllocator::Allocate其实就是调用了realloc,在原来基础上将pointer所指向的内存大小增加到full_size。

allocated = realloc(pointer, full_size);

Isolate添加监听回调

以AddMessageListener为例,其实最终调用的是Isolate::AddMessageListenerWithErrorLevel,代码如下:

bool Isolate::AddMessageListenerWithErrorLevel(MessageCallback that,
                                               int message_levels,
                                               Local<Value> data) {
  i::Isolate* isolate = reinterpret_cast<i::Isolate*>(this);
  ENTER_V8_NO_SCRIPT_NO_EXCEPTION(isolate);
  i::HandleScope scope(isolate);
  i::Handle<i::TemplateList> list = isolate->factory()->message_listeners();
  i::Handle<i::FixedArray> listener = isolate->factory()->NewFixedArray(3);
  i::Handle<i::Foreign> foreign =
      isolate->factory()->NewForeign(FUNCTION_ADDR(that));
  listener->set(0, *foreign);
  listener->set(1, data.IsEmpty() ? isolate->heap()->undefined_value()
                                  : *Utils::OpenHandle(*data));
  listener->set(2, i::Smi::FromInt(message_levels));
  list = i::TemplateList::Add(isolate, list, listener);
  isolate->heap()->SetMessageListeners(*list);
  return true;
}

其实就是给堆内存增加了监听,在message_listeners中加入对应listener。

8.Start(isolate, &isolate_data, argc, argv, exec_argc, exec_argv)

inline int Start(Isolate* isolate, IsolateData* isolate_data,
                 int argc, const char* const* argv,
                 int exec_argc, const char* const* exec_argv) {
  HandleScope handle_scope(isolate);
  Local<Context> context = NewContext(isolate);
  Context::Scope context_scope(context);
  Environment env(isolate_data, context, v8_platform.GetTracingAgent());
  // 初始化uv handle、process
  env.Start(argc, argv, exec_argc, exec_argv, v8_is_profiling);

  const char* path = argc > 1 ? argv[1] : nullptr;
  StartInspector(&env, path, debug_options);

  if (debug_options.inspector_enabled() && !v8_platform.InspectorStarted(&env))
    return 12;  // Signal internal error.

  env.set_abort_on_uncaught_exception(abort_on_uncaught_exception);

  if (no_force_async_hooks_checks) {
    env.async_hooks()->no_force_checks();
  }

  {
    Environment::AsyncCallbackScope callback_scope(&env);
    env.async_hooks()->push_async_ids(1, 0);
    LoadEnvironment(&env);
    env.async_hooks()->pop_async_id(1);
  }
  
  ......
}

这里主要做了如下几件事:

1.调用env.Start()来初始化uv handle、process
2.LoadEnvironment()

LoadEnvironment()

void LoadEnvironment(Environment* env) {
  HandleScope handle_scope(env->isolate());

  TryCatch try_catch(env->isolate());
  // Disable verbose mode to stop FatalException() handler from trying
  // to handle the exception. Errors this early in the start-up phase
  // are not safe to ignore.
  try_catch.SetVerbose(false);

  // The bootstrapper scripts are lib/internal/bootstrap/loaders.js and
  // lib/internal/bootstrap/node.js, each included as a static C string
  // defined in node_javascript.h, generated in node_javascript.cc by
  // node_js2c.
  Local<String> loaders_name =
      FIXED_ONE_BYTE_STRING(env->isolate(), "internal/bootstrap/loaders.js");
  // LoadersBootstrapperSource从node_js2c中获取loaders.js的ascII源码
  Local<Function> loaders_bootstrapper =
      GetBootstrapper(env, LoadersBootstrapperSource(env), loaders_name);
  Local<String> node_name =
      FIXED_ONE_BYTE_STRING(env->isolate(), "internal/bootstrap/node.js");
  Local<Function> node_bootstrapper =
      GetBootstrapper(env, NodeBootstrapperSource(env), node_name);

  // Add a reference to the global object
  Local<Object> global = env->context()->Global();

#if defined HAVE_DTRACE || defined HAVE_ETW
  InitDTrace(env, global);
#endif

#if defined HAVE_PERFCTR
  InitPerfCounters(env, global);
#endif

  // Enable handling of uncaught exceptions
  // (FatalException(), break on uncaught exception in debugger)
  //
  // This is not strictly necessary since it's almost impossible
  // to attach the debugger fast enough to break on exception
  // thrown during process startup.
  try_catch.SetVerbose(true);

  env->SetMethod(env->process_object(), "_rawDebug", RawDebug);

  // Expose the global object as a property on itself
  // (Allows you to set stuff on `global` from anywhere in JavaScript.)
  global->Set(FIXED_ONE_BYTE_STRING(env->isolate(), "global"), global);

  // Create binding loaders
  v8::Local<v8::Function> get_binding_fn =
      env->NewFunctionTemplate(GetBinding)->GetFunction(env->context())
          .ToLocalChecked();

  v8::Local<v8::Function> get_linked_binding_fn =
      env->NewFunctionTemplate(GetLinkedBinding)->GetFunction(env->context())
          .ToLocalChecked();

  v8::Local<v8::Function> get_internal_binding_fn =
      env->NewFunctionTemplate(GetInternalBinding)->GetFunction(env->context())
          .ToLocalChecked();

  Local<Value> loaders_bootstrapper_args[] = {
    env->process_object(),
    get_binding_fn,
    get_linked_binding_fn,
    get_internal_binding_fn
  };

  // Bootstrap internal loaders
  Local<Value> bootstrapped_loaders;
  if (!ExecuteBootstrapper(env, loaders_bootstrapper,
                           arraysize(loaders_bootstrapper_args),
                           loaders_bootstrapper_args,
                           &bootstrapped_loaders)) {
    return;
  }

  // Bootstrap Node.js
  Local<Value> bootstrapped_node;
  // bootstrapped_loaders中是loaders_bootstrapper执行返回的{ internalBinding, NativeModule }
  Local<Value> node_bootstrapper_args[] = {
    env->process_object(),
    bootstrapped_loaders
  };
  if (!ExecuteBootstrapper(env, node_bootstrapper,
                           arraysize(node_bootstrapper_args),
                           node_bootstrapper_args,
                           &bootstrapped_node)) {
    return;
  }
}

这里主要做了以下几件事:

1.从node_javascript.cc中获取node.js、loaders.js的ascII源码,这里的node_javascript.cc在[上一篇文章](https://github.com/tsy77/blog/issues/6)中有过简单介绍,通过js2c.py将./lib中所有js文件的ascII码存入node_javascript.cc中。
2.创建v8::Local<v8::Function> get_binding_fn、get_linked_binding_fn、get_internal_binding_fn
3.执行loader.js和node.js。在node.js中,运行了我们想要执行的js文件。

node_js2c

下面便是node_javascript.cc中的一部分:

static const uint8_t raw_internal_bootstrap_loaders_key[] = { 105,110,116,101,114,110,97,108,47,98,111,111,116,115,116,114,97,112,47,108,
111,97,100,101,114,115 };
static struct : public v8::String::ExternalOneByteStringResource {
  const char* data() const override {
    return reinterpret_cast<const char*>(raw_internal_bootstrap_loaders_key);
  }
  size_t length() const override { return arraysize(raw_internal_bootstrap_loaders_key); }
  void Dispose() override { /* Default calls `delete this`. */ }
  v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) {
    return v8::String::NewExternalOneByte(isolate, this).ToLocalChecked();
  }
} internal_bootstrap_loaders_key;

static const uint8_t raw_internal_bootstrap_loaders_value[] = { 47,47,32,84,104,105,115,32,102,105,108,101,32,99,114,101,97,116,101,115,
32,116,104,101,32,105,110,116,101,114,110,97,108,32,109,111,100,117,108,101,
32,38,32,98,105,110,100,105,110,103,32,108,111,97,100,101,114,115,32,117,
115,101,100,32,98,121,32,98,117,105,108,116,45,105,110,10,47,47,32,109,
111,100,117,108,101,115,46,32,73,110,32,99,111,110,116,114,97,115,116,44,
32,117,115,101,114,32,108,97,110,100,32,109,111,100,117,108,101,115,32,97,
114,101,32,108,111,97,100,101,100, };
static struct : public v8::String::ExternalOneByteStringResource {
  const char* data() const override {
    return reinterpret_cast<const char*>(raw_internal_bootstrap_loaders_value);
  }
  size_t length() const override { return arraysize(raw_internal_bootstrap_loaders_value); }
  void Dispose() override { /* Default calls `delete this`. */ }
  v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) {
    return v8::String::NewExternalOneByte(isolate, this).ToLocalChecked();
  }
} internal_bootstrap_loaders_value;

我们可以看到两个数组和两个struct,其中raw_internal_bootstrap_loaders_key和raw_internal_bootstrap_loaders_value分别记录bootstrap_loaders的key和value(文件内容),两个结构体internal_bootstrap_loaders_key和internal_bootstrap_loaders_value均有方法ToStringChecked,而ToStringChecked其实会去找data()方法,也就是说internal_bootstrap_loaders_value.ToStringChecked()便会返回对应的ascII码。

node_javascript.cc又是如何产生的呢?

{
      'target_name': 'node_js2c',
      'type': 'none',
      'toolsets': ['host'],
      'actions': [
        {
          'action_name': 'node_js2c',
          'process_outputs_as_sources': 1,
          'inputs': [
            '<@(library_files)',
            './config.gypi',
            'tools/check_macros.py'
          ],
          'outputs': [
            '<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc',
          ],
          'conditions': [
            [ 'node_use_dtrace=="false" and node_use_etw=="false"', {
              'inputs': [ 'src/notrace_macros.py' ]
            }],
            [ 'node_use_perfctr=="false"', {
              'inputs': [ 'src/noperfctr_macros.py' ]
            }],
            [ 'node_debug_lib=="false"', {
              'inputs': [ 'tools/nodcheck_macros.py' ]
            }],
            [ 'node_debug_lib=="true"', {
              'inputs': [ 'tools/dcheck_macros.py' ]
            }]
          ],
          'action': [
            'python',
            'tools/js2c.py',
            '<@(_outputs)',
            '<@(_inputs)',
          ],
        },
      ],

我看看到在node.gyp中定义了action,其实就是调用了python tools/js2c.py,这个后面文章再来介绍吧,这里先简单提一下。

GetBinding

getBinding又是干什么的呢?

static void GetBinding(const FunctionCallbackInfo<Value>& args) {
  Environment* env = Environment::GetCurrent(args);

  CHECK(args[0]->IsString());

  Local<String> module = args[0].As<String>();
  node::Utf8Value module_v(env->isolate(), module);

  node_module* mod = get_builtin_module(*module_v);
  Local<Object> exports;
  if (mod != nullptr) {
    exports = InitModule(env, mod, module);
  } else if (!strcmp(*module_v, "constants")) {
    exports = Object::New(env->isolate());
    CHECK(exports->SetPrototype(env->context(),
                                Null(env->isolate())).FromJust());
    DefineConstants(env->isolate(), exports);
  } else if (!strcmp(*module_v, "natives")) {
    exports = Object::New(env->isolate());
    DefineJavaScript(env, exports);
  } else {
    return ThrowIfNoSuchModule(env, *module_v);
  }

  args.GetReturnValue().Set(exports);
}

我们不难发现,逻辑上有三个分叉:

1. get_builtin_module,获取buildin模块,如果获取到了(是buildin模块),exports = InitModule(env, mod, module);
2.如果是常量,DefineConstants
3.如果是natives,DefineJavaScript

get_builtin_module又是怎么做的呢?

node_module* get_builtin_module(const char* name) {
  return FindModule(modlist_builtin, name, NM_F_BUILTIN);
}

inline struct node_module* FindModule(struct node_module* list,
                                      const char* name,
                                      int flag) {
  struct node_module* mp;

  for (mp = list; mp != nullptr; mp = mp->nm_link) {
    if (strcmp(mp->nm_modname, name) == 0)
      break;
  }

  CHECK(mp == nullptr || (mp->nm_flags & flag) != 0);
  return mp;
}

很简单,就是从modlist_builtin里面遍历,上述的Init函数中调用RegisterBuiltinModules将所有的内置模块加入到链表modlist_builtin中。

InitModule其实就是执行了module::Initialize(),以async_wrap为例:

oid AsyncWrap::Initialize(Local<Object> target,
                           Local<Value> unused,
                           Local<Context> context) {
  Environment* env = Environment::GetCurrent(context);
  Isolate* isolate = env->isolate();
  HandleScope scope(isolate);

  env->BeforeExit(DestroyAsyncIdsCallback, env);

  env->SetMethod(target, "setupHooks", SetupHooks);
  env->SetMethod(target, "pushAsyncIds", PushAsyncIds);
  env->SetMethod(target, "popAsyncIds", PopAsyncIds);
  env->SetMethod(target, "queueDestroyAsyncId", QueueDestroyAsyncId);
  env->SetMethod(target, "enablePromiseHook", EnablePromiseHook);
  env->SetMethod(target, "disablePromiseHook", DisablePromiseHook);
  env->SetMethod(target, "registerDestroyHook", RegisterDestroyHook);

  ......

  env->set_async_hooks_init_function(Local<Function>());
  env->set_async_hooks_before_function(Local<Function>());
  env->set_async_hooks_after_function(Local<Function>());
  env->set_async_hooks_destroy_function(Local<Function>());
  env->set_async_hooks_promise_resolve_function(Local<Function>());
  env->set_async_hooks_binding(target);
}

上述async_wrap中可以看到其实就是在exports上挂载各种方法,然后初始化。

DefineJavaScript干了什么呢?

CHECK(target->Set(env->context(),
                  internal_bootstrap_loaders_key.ToStringChecked(env->isolate()),
                  internal_bootstrap_loaders_value.ToStringChecked(env->isolate())).FromJust());

我们看到其实就是将node_javascript.cc中的模块以key/value的形式挂载到exports,这里可以注意下上面提到的ToStringChecked。

ExecuteBootstrapper

这里就是执行internal/loader.js和internal/node.js,这里先简单讲下,后面会做详细介绍。其最主要的逻辑如下:

if (process._syntax_check_only != null) {
          const fs = NativeModule.require('fs');
          // read the source
          const filename = CJSModule._resolveFilename(process.argv[1]);
          const source = fs.readFileSync(filename, 'utf-8');
          checkScriptSyntax(source, filename);
          process.exit(0);
        }
        CJSModule.runMain();

检测语法,然后执行。

8.资源释放

v8_platform.StopTracingAgent();
  v8_initialized = false;
  V8::Dispose();

  // uv_run cannot be called from the time before the beforeExit callback
  // runs until the program exits unless the event loop has any referenced
  // handles after beforeExit terminates. This prevents unrefed timers
  // that happen to terminate during shutdown from being run unsafely.
  // Since uv_run cannot be called, uv_async handles held by the platform
  // will never be fully cleaned up.
  v8_platform.Dispose();

  delete[] exec_argv;
  exec_argv = nullptr;

  return exit_code;

这里把v8_platform、exec_argv等资源释放,此次运行结束。

总结

本次主要沿着node::Start函数的逻辑,将运行一个node程序完整的流程呈现给大家,后面会对其中涉及的一些点以及一些模块进行分别介绍。

@tsy77 tsy77 changed the title node源码-一个node程序是如何运行的 Node.js源码-一个node程序是如何运行的 Jul 22, 2018
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant