From 975b51da72427017e1e1d865fdea8e1b9dec6971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Wr=C3=B3blewski?= Date: Thu, 13 Apr 2017 09:38:58 +0200 Subject: [PATCH 1/5] Real time profiling support. --- src/_vmprof.c | 12 +++++++-- src/vmprof.h | 1 + src/vmprof_common.h | 19 +++++++++++---- src/vmprof_main.h | 59 ++++++++++++++++++++++++++++++++++++++++----- vmprof/__init__.py | 4 +-- 5 files changed, 80 insertions(+), 15 deletions(-) diff --git a/src/_vmprof.c b/src/_vmprof.c index 07e9a074..9975d488 100644 --- a/src/_vmprof.c +++ b/src/_vmprof.c @@ -269,10 +269,11 @@ static PyObject *enable_vmprof(PyObject* self, PyObject *args) int memory = 0; int lines = 0; int native = 0; + int real_time = 0; double interval; char *p_error; - if (!PyArg_ParseTuple(args, "id|iii", &fd, &interval, &memory, &lines, &native)) { + if (!PyArg_ParseTuple(args, "id|iiii", &fd, &interval, &memory, &lines, &native, &real_time)) { return NULL; } @@ -291,6 +292,13 @@ static PyObject *enable_vmprof(PyObject* self, PyObject *args) return NULL; } +#ifndef VMPROF_LINUX + if (real_time) { + PyErr_SetString(PyExc_ValueError, "real time profiling is only supported on linux"); + return NULL; + } +#endif + vmp_profile_lines(lines); if (!Original_code_dealloc) { @@ -298,7 +306,7 @@ static PyObject *enable_vmprof(PyObject* self, PyObject *args) PyCode_Type.tp_dealloc = &cpyprof_code_dealloc; } - p_error = vmprof_init(fd, interval, memory, lines, "cpython", native); + p_error = vmprof_init(fd, interval, memory, lines, "cpython", native, real_time); if (p_error) { PyErr_SetString(PyExc_ValueError, p_error); return NULL; diff --git a/src/vmprof.h b/src/vmprof.h index a5747970..f72f5c0e 100644 --- a/src/vmprof.h +++ b/src/vmprof.h @@ -26,6 +26,7 @@ #define PROFILE_LINES '\x02' #define PROFILE_NATIVE '\x04' #define PROFILE_RPYTHON '\x08' +#define PROFILE_REAL_TIME '\x10' #define DYN_JIT_FLAG 0xbeefbeef diff --git a/src/vmprof_common.h b/src/vmprof_common.h index 24aa3c18..88d70ed2 100644 --- a/src/vmprof_common.h +++ b/src/vmprof_common.h @@ -17,9 +17,11 @@ static long prepare_interval_usec = 0; static long profile_interval_usec = 0; -static int opened_profile(const char *interp_name, int memory, int proflines, int native); +static int opened_profile(const char *interp_name, int memory, int proflines, int native, int real_time); #ifdef VMPROF_UNIX +static int signal_type = SIGPROF; +static int itimer_type = ITIMER_PROF; static struct profbuf_s *volatile current_codes; #endif @@ -64,7 +66,7 @@ typedef struct prof_stacktrace_s { RPY_EXTERN char *vmprof_init(int fd, double interval, int memory, - int proflines, const char *interp_name, int native) + int proflines, const char *interp_name, int native, int real_time) { if (!(interval >= 1e-6 && interval < 1.0)) { /* also if it is NaN */ return "bad value for 'interval'"; @@ -74,6 +76,13 @@ char *vmprof_init(int fd, double interval, int memory, if (prepare_concurrent_bufs() < 0) return "out of memory"; #if VMPROF_UNIX + if (real_time) { + signal_type = SIGALRM; + itimer_type = ITIMER_REAL; + } else { + signal_type = SIGPROF; + itimer_type = ITIMER_PROF; + } current_codes = NULL; assert(fd >= 0); #else @@ -85,14 +94,14 @@ char *vmprof_init(int fd, double interval, int memory, } #endif vmp_set_profile_fileno(fd); - if (opened_profile(interp_name, memory, proflines, native) < 0) { + if (opened_profile(interp_name, memory, proflines, native, real_time) < 0) { vmp_set_profile_fileno(0); return strerror(errno); } return NULL; } -static int opened_profile(const char *interp_name, int memory, int proflines, int native) +static int opened_profile(const char *interp_name, int memory, int proflines, int native, int real_time) { int success; int bits; @@ -119,7 +128,7 @@ static int opened_profile(const char *interp_name, int memory, int proflines, in header.interp_name[1] = '\x00'; header.interp_name[2] = VERSION_TIMESTAMP; header.interp_name[3] = memory*PROFILE_MEMORY + proflines*PROFILE_LINES + \ - native*PROFILE_NATIVE; + native*PROFILE_NATIVE + real_time*PROFILE_REAL_TIME; #ifdef RPYTHON_VMPROF header.interp_name[3] += PROFILE_RPYTHON; #endif diff --git a/src/vmprof_main.h b/src/vmprof_main.h index a85ae8d0..d6cf2ccb 100644 --- a/src/vmprof_main.h +++ b/src/vmprof_main.h @@ -48,11 +48,15 @@ #include "rss_darwin.h" #endif +#if VMPROF_LINUX +#include +#include +#endif /************************************************************/ static void *(*mainloop_get_virtual_ip)(char *) = 0; -static int opened_profile(const char *interp_name, int memory, int proflines, int native); +static int opened_profile(const char *interp_name, int memory, int proflines, int native, int real_time); static void flush_codes(void); /************************************************************/ @@ -94,7 +98,7 @@ int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth, i { PY_STACK_FRAME_T * frame; #ifdef RPYTHON_VMPROF - // do nothing here, + // do nothing here, frame = (PY_STACK_FRAME_T*)current; #else if (!current) { @@ -181,11 +185,43 @@ static PY_THREAD_STATE_T * _get_pystate_for_this_thread(void) { } #endif +#ifdef VMPROF_LINUX +static void broadcast_signal_for_threads(pid_t pid) +{ + pid_t tid; + DIR * dir; + struct dirent dirs; + struct dirent * dirp; + + dir = opendir("/proc/self/task"); + + while(1) { + readdir_r(dir, &dirs, &dirp); + if (!dirp) + break; + if (dirp->d_name[0] == '.') + continue; + tid = atoi(dirp->d_name); + if (tid == pid) + continue; + syscall(SYS_tgkill, pid, tid, signal_type); + } + + closedir(dir); +} +#endif + static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) { int commit; PY_THREAD_STATE_T * tstate = NULL; void (*prevhandler)(int); + +#ifdef VMPROF_LINUX + pid_t pid = getpid(); + pid_t tid = syscall(SYS_gettid); +#endif + #ifndef RPYTHON_VMPROF // TERRIBLE HACK AHEAD // on OS X, the thread local storage is sometimes uninitialized @@ -213,6 +249,17 @@ static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) __sync_lock_release(&spinlock); #endif +#ifdef VMPROF_LINUX + // SIGNAL ABUSE AHEAD + // On linux, the prof timer will deliver the signal to the thread which triggered the timer, + // because these timers are based on process and system time, and as such, are thread-aware. + // For the real timer, the signal gets delivered to the main thread, seemingly always. + // Consequently if we want to sample all threads, we need to forward the signal with tgkill. + if ((signal_type == SIGALRM) && (pid == tid)) { + broadcast_signal_for_threads(pid); + } +#endif + long val = __sync_fetch_and_add(&signal_handler_value, 2L); if ((val & 1) == 0) { @@ -257,7 +304,7 @@ static int install_sigprof_handler(void) sa.sa_sigaction = sigprof_handler; sa.sa_flags = SA_RESTART | SA_SIGINFO; if (sigemptyset(&sa.sa_mask) == -1 || - sigaction(SIGPROF, &sa, NULL) == -1) + sigaction(signal_type, &sa, NULL) == -1) return -1; return 0; } @@ -269,7 +316,7 @@ static int remove_sigprof_handler(void) ign_sigint.sa_flags = 0; sigemptyset(&ign_sigint.sa_mask); - if (sigaction(SIGPROF, &ign_sigint, NULL) < 0) { + if (sigaction(signal_type, &ign_sigint, NULL) < 0) { fprintf(stderr, "Could not remove the signal handler (for profiling)\n"); return -1; } @@ -282,7 +329,7 @@ static int install_sigprof_timer(void) timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = (int)profile_interval_usec; timer.it_value = timer.it_interval; - if (setitimer(ITIMER_PROF, &timer, NULL) != 0) + if (setitimer(itimer_type, &timer, NULL) != 0) return -1; return 0; } @@ -291,7 +338,7 @@ static int remove_sigprof_timer(void) { static struct itimerval timer; timerclear(&(timer.it_interval)); timerclear(&(timer.it_value)); - if (setitimer(ITIMER_PROF, &timer, NULL) != 0) { + if (setitimer(itimer_type, &timer, NULL) != 0) { fprintf(stderr, "Could not disable the signal handler (for profiling)\n"); return -1; } diff --git a/vmprof/__init__.py b/vmprof/__init__.py index 4ed6eb67..b656aebf 100644 --- a/vmprof/__init__.py +++ b/vmprof/__init__.py @@ -66,12 +66,12 @@ def enable(fileno, period=DEFAULT_PERIOD, memory=False, lines=False, native=None _vmprof.enable(gz_fileno, period) # , memory, lines, native) else: # CPYTHON - def enable(fileno, period=DEFAULT_PERIOD, memory=False, lines=False, native=None): + def enable(fileno, period=DEFAULT_PERIOD, memory=False, lines=False, native=None, real_time=False): if not isinstance(period, float): raise ValueError("You need to pass a float as an argument") gz_fileno = _gzip_start(fileno) native = _is_native_enabled(native) - _vmprof.enable(gz_fileno, period, memory, lines, native) + _vmprof.enable(gz_fileno, period, memory, lines, native, real_time) def sample_stack_now(skip=0): """ Helper utility mostly for tests, this is considered From ad7d9a2dc5a12621a25ad5c3299040a0f1563e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Wr=C3=B3blewski?= Date: Sat, 15 Apr 2017 23:40:50 +0200 Subject: [PATCH 2/5] Real time profiling, simple version. --- src/vmprof_common.h | 8 +++++++ src/vmprof_main.h | 50 +++++++++-------------------------------- vmprof/profiler.py | 10 +++++---- vmprof/test/test_run.py | 35 +++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 43 deletions(-) diff --git a/src/vmprof_common.h b/src/vmprof_common.h index 88d70ed2..6d97ed78 100644 --- a/src/vmprof_common.h +++ b/src/vmprof_common.h @@ -12,6 +12,10 @@ #include "vmprof_mt.h" #endif +#ifdef VMPROF_LINUX +#include +#endif + #define MAX_FUNC_NAME 1024 static long prepare_interval_usec = 0; @@ -22,6 +26,7 @@ static int opened_profile(const char *interp_name, int memory, int proflines, in #ifdef VMPROF_UNIX static int signal_type = SIGPROF; static int itimer_type = ITIMER_PROF; +static pid_t original_tid = 0; static struct profbuf_s *volatile current_codes; #endif @@ -79,6 +84,9 @@ char *vmprof_init(int fd, double interval, int memory, if (real_time) { signal_type = SIGALRM; itimer_type = ITIMER_REAL; +#if VMPROF_LINUX + original_tid = (pid_t) syscall(SYS_gettid); +#endif } else { signal_type = SIGPROF; itimer_type = ITIMER_PROF; diff --git a/src/vmprof_main.h b/src/vmprof_main.h index d6cf2ccb..ac18b9da 100644 --- a/src/vmprof_main.h +++ b/src/vmprof_main.h @@ -49,7 +49,6 @@ #endif #if VMPROF_LINUX -#include #include #endif @@ -185,32 +184,6 @@ static PY_THREAD_STATE_T * _get_pystate_for_this_thread(void) { } #endif -#ifdef VMPROF_LINUX -static void broadcast_signal_for_threads(pid_t pid) -{ - pid_t tid; - DIR * dir; - struct dirent dirs; - struct dirent * dirp; - - dir = opendir("/proc/self/task"); - - while(1) { - readdir_r(dir, &dirs, &dirp); - if (!dirp) - break; - if (dirp->d_name[0] == '.') - continue; - tid = atoi(dirp->d_name); - if (tid == pid) - continue; - syscall(SYS_tgkill, pid, tid, signal_type); - } - - closedir(dir); -} -#endif - static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) { int commit; @@ -219,7 +192,17 @@ static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) #ifdef VMPROF_LINUX pid_t pid = getpid(); - pid_t tid = syscall(SYS_gettid); + pid_t tid = (pid_t) syscall(SYS_gettid); + + // SIGNAL ABUSE AHEAD + // On linux, the prof timer will deliver the signal to the thread which triggered the timer, + // because these timers are based on process and system time, and as such, are thread-aware. + // For the real timer, the signal gets delivered to the main thread, seemingly always. + // Consequently if we want to sample the original thread, we may need to forward the signal. + if ((signal_type == SIGALRM) && (tid != original_tid)) { + syscall(SYS_tgkill, pid, original_tid, SIGALRM); + return; + } #endif #ifndef RPYTHON_VMPROF @@ -249,17 +232,6 @@ static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) __sync_lock_release(&spinlock); #endif -#ifdef VMPROF_LINUX - // SIGNAL ABUSE AHEAD - // On linux, the prof timer will deliver the signal to the thread which triggered the timer, - // because these timers are based on process and system time, and as such, are thread-aware. - // For the real timer, the signal gets delivered to the main thread, seemingly always. - // Consequently if we want to sample all threads, we need to forward the signal with tgkill. - if ((signal_type == SIGALRM) && (pid == tid)) { - broadcast_signal_for_threads(pid); - } -#endif - long val = __sync_fetch_and_add(&signal_handler_value, 2L); if ((val & 1) == 0) { diff --git a/vmprof/profiler.py b/vmprof/profiler.py index 2ee2fa84..26a5a99f 100644 --- a/vmprof/profiler.py +++ b/vmprof/profiler.py @@ -11,7 +11,7 @@ class VMProfError(Exception): class ProfilerContext(object): done = False - def __init__(self, name, memory, native, only_needed): + def __init__(self, name, memory, native, only_needed, real_time): if name is None: self.tmpfile = tempfile.NamedTemporaryFile("w+b", delete=False) else: @@ -20,9 +20,11 @@ def __init__(self, name, memory, native, only_needed): self.memory = memory self.native = native self.only_needed = only_needed + self.real_time = real_time def __enter__(self): - vmprof.enable(self.tmpfile.fileno(), 0.001, self.memory, native=self.native) + vmprof.enable(self.tmpfile.fileno(), 0.001, self.memory, native=self.native, + real_time=self.real_time) def __exit__(self, type, value, traceback): vmprof.disable(only_needed=self.only_needed) @@ -54,8 +56,8 @@ class Profiler(object): def __init__(self): self._lib_cache = {} - def measure(self, name=None, memory=False, native=False, only_needed=False): - self.ctx = ProfilerContext(name, memory, native, only_needed) + def measure(self, name=None, memory=False, native=False, only_needed=False, real_time=False): + self.ctx = ProfilerContext(name, memory, native, real_time, only_needed) return self.ctx def get_stats(self): diff --git a/vmprof/test/test_run.py b/vmprof/test/test_run.py index 5a9fdc3f..f05a1e3d 100644 --- a/vmprof/test/test_run.py +++ b/vmprof/test/test_run.py @@ -3,6 +3,7 @@ import py import sys import tempfile +import time import gzip import time import pytz @@ -77,10 +78,16 @@ def function_bar(): return function_foo() +def function_slp(t=0.05): + return time.sleep(t) + + foo_full_name = "py:function_foo:%d:%s" % (function_foo.__code__.co_firstlineno, function_foo.__code__.co_filename) bar_full_name = "py:function_bar:%d:%s" % (function_bar.__code__.co_firstlineno, function_bar.__code__.co_filename) +slp_full_name = "py:function_slp:%d:%s" % (function_slp.__code__.co_firstlineno, + function_slp.__code__.co_filename) GZIP = False @@ -238,6 +245,34 @@ def function_bar(): prof.get_stats() + +@py.test.mark.skipif("sys.platform != 'linux'") +def test_vmprof_real_time(): + prof = vmprof.Profiler() + with prof.measure(real_time=True): + function_slp() + stats = prof.get_stats() + tprof = stats.top_profile() + d = dict(tprof) + assert d[slp_full_name] > 0 + +@py.test.mark.skipif("'__pypy__' in sys.builtin_module_names") +@py.test.mark.skipif("sys.platform != 'linux'") +def test_vmprof_real_time_threaded(): + import threading + prof = vmprof.Profiler() + thread = threading.Thread(target=function_slp, args=[0.5]) + with prof.measure(real_time=True): + thread.start() + time.sleep(0.25) + thread.join() + stats = prof.get_stats() + tprof = stats.top_profile() + d = dict(tprof) + assert slp_full_name not in d + + + if GZIP: def test_gzip_problem(): tmpfile = tempfile.NamedTemporaryFile(delete=False) From a488f4f1b34b13182fcc46f5d2b6bb4a9adf606f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Wr=C3=B3blewski?= Date: Sun, 16 Apr 2017 11:58:10 +0200 Subject: [PATCH 3/5] Real time profiling, thread version. --- src/_vmprof.c | 58 +++++++++++++++++++++++++++++++++++-- src/vmprof_common.h | 63 ++++++++++++++++++++++++++++++++++++++--- src/vmprof_main.h | 61 ++++++++++++++++++++++++++++----------- src/vmprof_main_win32.h | 2 +- vmprof/__init__.py | 19 +++++++++++-- vmprof/profiler.py | 11 +++---- vmprof/test/test_run.py | 40 +++++++++++++++++++------- 7 files changed, 214 insertions(+), 40 deletions(-) diff --git a/src/_vmprof.c b/src/_vmprof.c index 9975d488..474ab2b2 100644 --- a/src/_vmprof.c +++ b/src/_vmprof.c @@ -312,7 +312,7 @@ static PyObject *enable_vmprof(PyObject* self, PyObject *args) return NULL; } - if (vmprof_enable(memory, native) < 0) { + if (vmprof_enable(memory, native, real_time) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } @@ -474,18 +474,72 @@ static PyObject * vmp_get_profile_path(PyObject *module, PyObject *noargs) { } #endif + +#ifdef VMPROF_LINUX +static PyObject * +insert_real_time_thread(PyObject *module, PyObject * noargs) { + ssize_t thread_count; + + if (!is_enabled) { + PyErr_SetString(PyExc_ValueError, "vmprof is not enabled"); + return NULL; + } + + if (signal_type != SIGALRM) { + PyErr_SetString(PyExc_ValueError, "vmprof is not in real time mode"); + return NULL; + } + + while (__sync_lock_test_and_set(&spinlock, 1)) { + } + thread_count = insert_thread((pid_t) syscall(SYS_gettid), -1); + __sync_lock_release(&spinlock); + + return PyLong_FromSsize_t(thread_count); +} + +static PyObject * +remove_real_time_thread(PyObject *module, PyObject * noargs) { + ssize_t thread_count; + + if (!is_enabled) { + PyErr_SetString(PyExc_ValueError, "vmprof is not enabled"); + return NULL; + } + + if (signal_type != SIGALRM) { + PyErr_SetString(PyExc_ValueError, "vmprof is not in real time mode"); + return NULL; + } + + while (__sync_lock_test_and_set(&spinlock, 1)) { + } + thread_count = remove_thread((pid_t) syscall(SYS_gettid), -1); + __sync_lock_release(&spinlock); + + return PyLong_FromSsize_t(thread_count); +} +#endif + + static PyMethodDef VMProfMethods[] = { {"enable", enable_vmprof, METH_VARARGS, "Enable profiling."}, {"disable", disable_vmprof, METH_VARARGS, "Disable profiling."}, {"write_all_code_objects", write_all_code_objects, METH_VARARGS, "Write eagerly all the IDs of code objects"}, {"sample_stack_now", sample_stack_now, METH_VARARGS, "Sample the stack now"}, + {"is_enabled", vmp_is_enabled, METH_NOARGS, "Indicates if vmprof is currently sampling."}, #ifdef VMP_SUPPORTS_NATIVE_PROFILING {"resolve_addr", resolve_addr, METH_VARARGS, "Return the name of the addr"}, #endif - {"is_enabled", vmp_is_enabled, METH_NOARGS, "Indicates if vmprof is currently sampling."}, #ifdef VMPROF_UNIX {"get_profile_path", vmp_get_profile_path, METH_NOARGS, "Profile path the profiler logs to."}, +#endif +#ifdef VMPROF_LINUX + {"insert_real_time_thread", insert_real_time_thread, METH_NOARGS, + "Insert a thread into the real time profiling list."}, + {"remove_real_time_thread", remove_real_time_thread, METH_NOARGS, + "Remove a thread from the real time profiling list."}, #endif {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/src/vmprof_common.h b/src/vmprof_common.h index 6d97ed78..349da554 100644 --- a/src/vmprof_common.h +++ b/src/vmprof_common.h @@ -26,10 +26,68 @@ static int opened_profile(const char *interp_name, int memory, int proflines, in #ifdef VMPROF_UNIX static int signal_type = SIGPROF; static int itimer_type = ITIMER_PROF; -static pid_t original_tid = 0; +static pid_t *threads = NULL; +static size_t threads_size = 0; +static size_t thread_count = 0; +static size_t threads_size_step = 8; static struct profbuf_s *volatile current_codes; #endif +#ifdef VMPROF_LINUX + +static inline ssize_t search_thread(pid_t tid, ssize_t i) { + if (i < 0) + i = 0; + while (i < thread_count) { + if (threads[i] == tid) + return i; + i++; + } + return -1; +} + +ssize_t insert_thread(pid_t tid, ssize_t i) { + assert(signal_type == SIGALRM); + i = search_thread(tid, i); + if (i > 0) + return -1; + if (thread_count == threads_size) { + threads_size += threads_size_step; + threads = realloc(threads, sizeof(pid_t) * threads_size); + assert(threads != NULL); + memset(threads + thread_count, 0, sizeof(pid_t) * threads_size_step); + } + threads[thread_count++] = tid; + return thread_count; +} + +ssize_t remove_thread(pid_t tid, ssize_t i) { + assert(signal_type == SIGALRM); + if (thread_count == 0) + return -1; + if (threads == NULL) + return -1; + i = search_thread(tid, i); + if (i < 0) + return -1; + threads[i] = threads[--thread_count]; + threads[thread_count] = 0; + return thread_count; +} + +ssize_t remove_threads(void) { + assert(signal_type == SIGALRM); + if (threads != NULL) { + free(threads); + threads = NULL; + } + thread_count = 0; + threads_size = 0; + return 0; +} + +#endif + #define MAX_STACK_DEPTH \ ((SINGLE_BUF_SIZE - sizeof(struct prof_stacktrace_s)) / sizeof(void *)) @@ -84,9 +142,6 @@ char *vmprof_init(int fd, double interval, int memory, if (real_time) { signal_type = SIGALRM; itimer_type = ITIMER_REAL; -#if VMPROF_LINUX - original_tid = (pid_t) syscall(SYS_gettid); -#endif } else { signal_type = SIGPROF; itimer_type = ITIMER_PROF; diff --git a/src/vmprof_main.h b/src/vmprof_main.h index ac18b9da..eaeb5cde 100644 --- a/src/vmprof_main.h +++ b/src/vmprof_main.h @@ -184,27 +184,31 @@ static PY_THREAD_STATE_T * _get_pystate_for_this_thread(void) { } #endif +#ifdef VMPROF_LINUX +static int broadcast_signal_for_threads(pid_t pid) +{ + int done = 1; + size_t i = 0; + pid_t tid; + while (i < thread_count) { + tid = threads[i]; + if (tid == pid) + done = 0; + else + if (syscall(SYS_tgkill, pid, tid, SIGALRM)) + remove_thread(tid, i); + i++; + } + return done; +} +#endif + static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) { int commit; PY_THREAD_STATE_T * tstate = NULL; void (*prevhandler)(int); -#ifdef VMPROF_LINUX - pid_t pid = getpid(); - pid_t tid = (pid_t) syscall(SYS_gettid); - - // SIGNAL ABUSE AHEAD - // On linux, the prof timer will deliver the signal to the thread which triggered the timer, - // because these timers are based on process and system time, and as such, are thread-aware. - // For the real timer, the signal gets delivered to the main thread, seemingly always. - // Consequently if we want to sample the original thread, we may need to forward the signal. - if ((signal_type == SIGALRM) && (tid != original_tid)) { - syscall(SYS_tgkill, pid, original_tid, SIGALRM); - return; - } -#endif - #ifndef RPYTHON_VMPROF // TERRIBLE HACK AHEAD // on OS X, the thread local storage is sometimes uninitialized @@ -218,6 +222,23 @@ static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) // get_current_thread_state returns a sane result while (__sync_lock_test_and_set(&spinlock, 1)) { } + +#ifdef VMPROF_LINUX + // SIGNAL ABUSE AHEAD + // On linux, the prof timer will deliver the signal to the thread which triggered the timer, + // because these timers are based on process and system time, and as such, are thread-aware. + // For the real timer, the signal gets delivered to the main thread, seemingly always. + // Consequently if we want to sample multiple threads, we need to forward this signal. + if (signal_type == SIGALRM) { + pid_t pid = getpid(); + pid_t tid = (pid_t) syscall(SYS_gettid); + if ((pid == tid) && broadcast_signal_for_threads(pid)) { + __sync_lock_release(&spinlock); + return; + } + } +#endif + prevhandler = signal(SIGSEGV, &segfault_handler); int fault_code = setjmp(restore_point); if (fault_code == 0) { @@ -380,7 +401,7 @@ static void disable_cpyprof(void) #endif RPY_EXTERN -int vmprof_enable(int memory, int native) +int vmprof_enable(int memory, int native, int real_time) { #ifdef VMP_SUPPORTS_NATIVE_PROFILING init_cpyprof(native); @@ -390,6 +411,10 @@ int vmprof_enable(int memory, int native) profile_interval_usec = prepare_interval_usec; if (memory && setup_rss() == -1) goto error; +#if VMPROF_LINUX + if (real_time && insert_thread((pid_t)syscall(SYS_gettid), -1) == -1) + goto error; +#endif if (install_pthread_atfork_hooks() == -1) goto error; if (install_sigprof_handler() == -1) @@ -435,6 +460,10 @@ int vmprof_disable(void) return -1; if (remove_sigprof_handler() == -1) return -1; +#ifdef VMPROF_LINUX + if ((signal_type == SIGALRM) && remove_threads() == -1) + return -1; +#endif flush_codes(); if (shutdown_concurrent_bufs(vmp_profile_fileno()) < 0) return -1; diff --git a/src/vmprof_main_win32.h b/src/vmprof_main_win32.h index ce4d7151..8e769ca7 100644 --- a/src/vmprof_main_win32.h +++ b/src/vmprof_main_win32.h @@ -160,7 +160,7 @@ long __stdcall vmprof_mainloop(void *arg) } RPY_EXTERN -int vmprof_enable(int memory, int native) +int vmprof_enable(int memory, int native, int real_time) { if (!thread_started) { if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) { diff --git a/vmprof/__init__.py b/vmprof/__init__.py index b656aebf..e3296406 100644 --- a/vmprof/__init__.py +++ b/vmprof/__init__.py @@ -47,7 +47,7 @@ def _is_native_enabled(native): return native if IS_PYPY: - def enable(fileno, period=DEFAULT_PERIOD, memory=False, lines=False, native=None, warn=True): + def enable(fileno, period=DEFAULT_PERIOD, memory=False, lines=False, native=None, real_time=False, warn=True): pypy_version_info = sys.pypy_version_info[:3] if not isinstance(period, float): raise ValueError("You need to pass a float as an argument") @@ -61,7 +61,7 @@ def enable(fileno, period=DEFAULT_PERIOD, memory=False, lines=False, native=None native = _is_native_enabled(native) gz_fileno = _gzip_start(fileno) if pypy_version_info >= (5, 8, 0): - _vmprof.enable(gz_fileno, period, memory, lines, native) + _vmprof.enable(gz_fileno, period, memory, lines, native, real_time) else: _vmprof.enable(gz_fileno, period) # , memory, lines, native) else: @@ -91,6 +91,21 @@ def resolve_addr(addr): return _vmprof.resolve_addr(addr) +def insert_real_time_thread(): + """ Inserts a thread into the list of threads to be sampled in real time mode. + When enabling real time mode, the caller thread is inserted automatically. + Returns the number of registered threads, or -1 if we can't insert thread. + """ + return _vmprof.insert_real_time_thread() + +def remove_real_time_thread(): + """ Removes a thread from the list of threads to be sampled in real time mode. + When disabling in real time mode, *all* threads are removed automatically. + Returns the number of registered threads, or -1 if we can't remove thread. + """ + return _vmprof.remove_real_time_thread() + + def is_enabled(): """ Indicates if vmprof has already been enabled for this process. Returns True or False. None is returned if the state is unknown. diff --git a/vmprof/profiler.py b/vmprof/profiler.py index 26a5a99f..b281d80e 100644 --- a/vmprof/profiler.py +++ b/vmprof/profiler.py @@ -11,20 +11,21 @@ class VMProfError(Exception): class ProfilerContext(object): done = False - def __init__(self, name, memory, native, only_needed, real_time): + def __init__(self, name, period, memory, native, only_needed, real_time): if name is None: self.tmpfile = tempfile.NamedTemporaryFile("w+b", delete=False) else: self.tmpfile = open(name, "w+b") self.filename = self.tmpfile.name + self.period = period self.memory = memory self.native = native self.only_needed = only_needed self.real_time = real_time def __enter__(self): - vmprof.enable(self.tmpfile.fileno(), 0.001, self.memory, native=self.native, - real_time=self.real_time) + vmprof.enable(self.tmpfile.fileno(), self.period, self.memory, + native=self.native, real_time=self.real_time) def __exit__(self, type, value, traceback): vmprof.disable(only_needed=self.only_needed) @@ -56,8 +57,8 @@ class Profiler(object): def __init__(self): self._lib_cache = {} - def measure(self, name=None, memory=False, native=False, only_needed=False, real_time=False): - self.ctx = ProfilerContext(name, memory, native, real_time, only_needed) + def measure(self, name=None, period=0.001, memory=False, native=False, only_needed=False, real_time=False): + self.ctx = ProfilerContext(name, period, memory, native, only_needed, real_time) return self.ctx def get_stats(self): diff --git a/vmprof/test/test_run.py b/vmprof/test/test_run.py index f05a1e3d..aded30fe 100644 --- a/vmprof/test/test_run.py +++ b/vmprof/test/test_run.py @@ -78,7 +78,15 @@ def function_bar(): return function_foo() -def function_slp(t=0.05): +def functime_foo(t=0.05, insert=False): + if (insert): + vmprof.insert_real_time_thread() + return time.sleep(t) + + +def functime_bar(t=0.05, remove=False): + if (remove): + vmprof.remove_real_time_thread() return time.sleep(t) @@ -86,8 +94,11 @@ def function_slp(t=0.05): function_foo.__code__.co_filename) bar_full_name = "py:function_bar:%d:%s" % (function_bar.__code__.co_firstlineno, function_bar.__code__.co_filename) -slp_full_name = "py:function_slp:%d:%s" % (function_slp.__code__.co_firstlineno, - function_slp.__code__.co_filename) + +foo_time_name = "py:functime_foo:%d:%s" % (functime_foo.__code__.co_firstlineno, + functime_foo.__code__.co_filename) +bar_time_name = "py:functime_bar:%d:%s" % (functime_bar.__code__.co_firstlineno, + functime_bar.__code__.co_filename) GZIP = False @@ -250,26 +261,35 @@ def function_bar(): def test_vmprof_real_time(): prof = vmprof.Profiler() with prof.measure(real_time=True): - function_slp() + functime_foo() stats = prof.get_stats() tprof = stats.top_profile() d = dict(tprof) - assert d[slp_full_name] > 0 + assert d[foo_time_name] > 0 + @py.test.mark.skipif("'__pypy__' in sys.builtin_module_names") @py.test.mark.skipif("sys.platform != 'linux'") -def test_vmprof_real_time_threaded(): +@py.test.mark.parametrize("insert_foo,remove_bar", [ + (False, False), + (False, True), + ( True, False), + ( True, True), +]) +def test_vmprof_real_time_threaded(insert_foo, remove_bar): import threading prof = vmprof.Profiler() - thread = threading.Thread(target=function_slp, args=[0.5]) - with prof.measure(real_time=True): + wait = 0.5 + thread = threading.Thread(target=functime_foo, args=[wait, insert_foo]) + with prof.measure(period=0.25, real_time=True): thread.start() - time.sleep(0.25) + functime_bar(wait, remove_bar) thread.join() stats = prof.get_stats() tprof = stats.top_profile() d = dict(tprof) - assert slp_full_name not in d + assert insert_foo == (foo_time_name in d) + assert remove_bar != (bar_time_name in d) From 5001a6d3e96226070069adec8539ede541fc9eeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Wr=C3=B3blewski?= Date: Fri, 28 Apr 2017 18:27:15 +0200 Subject: [PATCH 4/5] Real time profiling, somewhat portable. --- src/_vmprof.c | 14 +++++++------- src/vmprof_common.h | 12 ++++++------ src/vmprof_main.h | 39 +++++++++++++++++++++++++++------------ vmprof/test/test_run.py | 4 ++-- 4 files changed, 42 insertions(+), 27 deletions(-) diff --git a/src/_vmprof.c b/src/_vmprof.c index 474ab2b2..36fe706a 100644 --- a/src/_vmprof.c +++ b/src/_vmprof.c @@ -292,9 +292,9 @@ static PyObject *enable_vmprof(PyObject* self, PyObject *args) return NULL; } -#ifndef VMPROF_LINUX +#ifndef VMPROF_UNIX if (real_time) { - PyErr_SetString(PyExc_ValueError, "real time profiling is only supported on linux"); + PyErr_SetString(PyExc_ValueError, "real time profiling is only supported on Linux and MacOS"); return NULL; } #endif @@ -475,7 +475,7 @@ static PyObject * vmp_get_profile_path(PyObject *module, PyObject *noargs) { #endif -#ifdef VMPROF_LINUX +#ifdef VMPROF_UNIX static PyObject * insert_real_time_thread(PyObject *module, PyObject * noargs) { ssize_t thread_count; @@ -492,7 +492,8 @@ insert_real_time_thread(PyObject *module, PyObject * noargs) { while (__sync_lock_test_and_set(&spinlock, 1)) { } - thread_count = insert_thread((pid_t) syscall(SYS_gettid), -1); + + thread_count = insert_thread(pthread_self(), -1); __sync_lock_release(&spinlock); return PyLong_FromSsize_t(thread_count); @@ -514,7 +515,8 @@ remove_real_time_thread(PyObject *module, PyObject * noargs) { while (__sync_lock_test_and_set(&spinlock, 1)) { } - thread_count = remove_thread((pid_t) syscall(SYS_gettid), -1); + + thread_count = remove_thread(pthread_self(), -1); __sync_lock_release(&spinlock); return PyLong_FromSsize_t(thread_count); @@ -534,8 +536,6 @@ static PyMethodDef VMProfMethods[] = { #endif #ifdef VMPROF_UNIX {"get_profile_path", vmp_get_profile_path, METH_NOARGS, "Profile path the profiler logs to."}, -#endif -#ifdef VMPROF_LINUX {"insert_real_time_thread", insert_real_time_thread, METH_NOARGS, "Insert a thread into the real time profiling list."}, {"remove_real_time_thread", remove_real_time_thread, METH_NOARGS, diff --git a/src/vmprof_common.h b/src/vmprof_common.h index 349da554..66da392e 100644 --- a/src/vmprof_common.h +++ b/src/vmprof_common.h @@ -26,27 +26,27 @@ static int opened_profile(const char *interp_name, int memory, int proflines, in #ifdef VMPROF_UNIX static int signal_type = SIGPROF; static int itimer_type = ITIMER_PROF; -static pid_t *threads = NULL; +static pthread_t *threads = NULL; static size_t threads_size = 0; static size_t thread_count = 0; static size_t threads_size_step = 8; static struct profbuf_s *volatile current_codes; #endif -#ifdef VMPROF_LINUX +#ifdef VMPROF_UNIX -static inline ssize_t search_thread(pid_t tid, ssize_t i) { +static inline ssize_t search_thread(pthread_t tid, ssize_t i) { if (i < 0) i = 0; while (i < thread_count) { - if (threads[i] == tid) + if (pthread_equal(threads[i], tid)) return i; i++; } return -1; } -ssize_t insert_thread(pid_t tid, ssize_t i) { +ssize_t insert_thread(pthread_t tid, ssize_t i) { assert(signal_type == SIGALRM); i = search_thread(tid, i); if (i > 0) @@ -61,7 +61,7 @@ ssize_t insert_thread(pid_t tid, ssize_t i) { return thread_count; } -ssize_t remove_thread(pid_t tid, ssize_t i) { +ssize_t remove_thread(pthread_t tid, ssize_t i) { assert(signal_type == SIGALRM); if (thread_count == 0) return -1; diff --git a/src/vmprof_main.h b/src/vmprof_main.h index eaeb5cde..03676629 100644 --- a/src/vmprof_main.h +++ b/src/vmprof_main.h @@ -184,18 +184,19 @@ static PY_THREAD_STATE_T * _get_pystate_for_this_thread(void) { } #endif -#ifdef VMPROF_LINUX -static int broadcast_signal_for_threads(pid_t pid) +#ifdef VMPROF_UNIX +static int broadcast_signal_for_threads(void) { int done = 1; size_t i = 0; - pid_t tid; + pthread_t self = pthread_self(); + pthread_t tid; while (i < thread_count) { tid = threads[i]; - if (tid == pid) + if (pthread_equal(tid, self)) done = 0; else - if (syscall(SYS_tgkill, pid, tid, SIGALRM)) + if (pthread_kill(tid, SIGALRM)) remove_thread(tid, i); i++; } @@ -203,6 +204,22 @@ static int broadcast_signal_for_threads(pid_t pid) } #endif +#ifdef VMPROF_LINUX +static inline int is_main_thread(void) +{ + pid_t pid = getpid(); + pid_t tid = (pid_t) syscall(SYS_gettid); + return (pid == tid); +} +#endif + +#ifdef VMPROF_APPLE +static inline int is_main_thread(void) +{ + return pthread_main_np(); +} +#endif + static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) { int commit; @@ -223,16 +240,14 @@ static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) while (__sync_lock_test_and_set(&spinlock, 1)) { } -#ifdef VMPROF_LINUX +#ifdef VMPROF_UNIX // SIGNAL ABUSE AHEAD // On linux, the prof timer will deliver the signal to the thread which triggered the timer, // because these timers are based on process and system time, and as such, are thread-aware. // For the real timer, the signal gets delivered to the main thread, seemingly always. // Consequently if we want to sample multiple threads, we need to forward this signal. if (signal_type == SIGALRM) { - pid_t pid = getpid(); - pid_t tid = (pid_t) syscall(SYS_gettid); - if ((pid == tid) && broadcast_signal_for_threads(pid)) { + if (is_main_thread() && broadcast_signal_for_threads()) { __sync_lock_release(&spinlock); return; } @@ -411,8 +426,8 @@ int vmprof_enable(int memory, int native, int real_time) profile_interval_usec = prepare_interval_usec; if (memory && setup_rss() == -1) goto error; -#if VMPROF_LINUX - if (real_time && insert_thread((pid_t)syscall(SYS_gettid), -1) == -1) +#if VMPROF_UNIX + if (real_time && insert_thread(pthread_self(), -1) == -1) goto error; #endif if (install_pthread_atfork_hooks() == -1) @@ -460,7 +475,7 @@ int vmprof_disable(void) return -1; if (remove_sigprof_handler() == -1) return -1; -#ifdef VMPROF_LINUX +#ifdef VMPROF_UNIX if ((signal_type == SIGALRM) && remove_threads() == -1) return -1; #endif diff --git a/vmprof/test/test_run.py b/vmprof/test/test_run.py index aded30fe..cd12f695 100644 --- a/vmprof/test/test_run.py +++ b/vmprof/test/test_run.py @@ -257,7 +257,7 @@ def function_bar(): prof.get_stats() -@py.test.mark.skipif("sys.platform != 'linux'") +@py.test.mark.skipif("sys.platform == 'win32'") def test_vmprof_real_time(): prof = vmprof.Profiler() with prof.measure(real_time=True): @@ -269,7 +269,7 @@ def test_vmprof_real_time(): @py.test.mark.skipif("'__pypy__' in sys.builtin_module_names") -@py.test.mark.skipif("sys.platform != 'linux'") +@py.test.mark.skipif("sys.platform == 'win32'") @py.test.mark.parametrize("insert_foo,remove_bar", [ (False, False), (False, True), From a53d732f4b609b6f8277d863d80cd73df48d778a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Wr=C3=B3blewski?= Date: Sat, 29 Apr 2017 15:20:01 +0200 Subject: [PATCH 5/5] Minor clarity enchancement. --- src/vmprof_main.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vmprof_main.h b/src/vmprof_main.h index 03676629..9f0e0c74 100644 --- a/src/vmprof_main.h +++ b/src/vmprof_main.h @@ -193,11 +193,11 @@ static int broadcast_signal_for_threads(void) pthread_t tid; while (i < thread_count) { tid = threads[i]; - if (pthread_equal(tid, self)) + if (pthread_equal(tid, self)) { done = 0; - else - if (pthread_kill(tid, SIGALRM)) + } else if (pthread_kill(tid, SIGALRM)) { remove_thread(tid, i); + } i++; } return done;