diff --git a/src/python_be.cc b/src/python_be.cc index c152e035..94e3509d 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -2416,6 +2416,26 @@ TRITONBACKEND_ModelInstanceExecute( return nullptr; } +TRITONBACKEND_ISPEC TRITONSERVER_Error* +TRITONBACKEND_ModelInstanceReady(TRITONBACKEND_ModelInstance* instance) +{ + void* vstate; + RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate)); + ModelInstanceState* instance_state = + reinterpret_cast(vstate); + + // Check if the stub process is running + if (!instance_state->Stub()->StubActive()) { + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_INTERNAL, + (std::string("Stub process '") + instance_state->Name() + + "' is not healthy.") + .c_str()); + } + + return nullptr; +} + TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance) { diff --git a/src/stub_launcher.cc b/src/stub_launcher.cc index 32f5d1bd..cbe039a4 100644 --- a/src/stub_launcher.cc +++ b/src/stub_launcher.cc @@ -743,7 +743,29 @@ StubLauncher::StubActive() GetExitCodeProcess(stub_pid_.hProcess, &ec); return (ec == STILL_ACTIVE); #else - return (stub_pid_ != 0); + if (stub_pid_ == 0) { + return false; + } + + int status; + pid_t return_pid = waitpid(stub_pid_, &status, WNOHANG); + if (return_pid == -1) { + // If waitpid fails, it likely means the process no longer exists (ECHILD) + if (errno != ECHILD) { + LOG_MESSAGE( + TRITONSERVER_LOG_VERBOSE, + (std::string("waitpid failed for stub process ") + + std::to_string(stub_pid_) + ": " + strerror(errno)) + .c_str()); + } + return false; + } else if (return_pid == stub_pid_) { + // Process has exited and has been reaped + return false; + } + + // return_pid == 0 means the process is still running + return true; #endif } @@ -824,9 +846,11 @@ StubLauncher::KillStubProcess() CloseHandle(stub_pid_.hProcess); CloseHandle(stub_pid_.hThread); #else - kill(stub_pid_, SIGKILL); - WaitForStubProcess(); - stub_pid_ = 0; + if (stub_pid_ != 0) { + kill(stub_pid_, SIGKILL); + WaitForStubProcess(); + stub_pid_ = 0; + } #endif }