Skip to content
This repository has been archived by the owner on Dec 3, 2019. It is now read-only.

Commit

Permalink
implement trace mode, fixes #26
Browse files Browse the repository at this point in the history
  • Loading branch information
eklitzke committed Oct 7, 2016
1 parent 16958de commit c83412c
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 36 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,25 @@ You can also change the sample time and sampling frequency:
pyflame -s 60 -r 0.10 PID
```

### Trace Mode

Sometimes you want to trace a process from start to finish. An example would be
tracing the run of a test suite. Pyflame supports this use case. To use it, you
invoke Pyflame like this:

pyflame [regular options] -t command arg1 arg2...

Frequently the value of `command` will actually be `python`, but it could be
something else like `uwsgi` or `py.test`. For instance, here's how Pyflame can
be used to trace its own test suite (a.k.a. "pyflameception"):

pyflame -t py.test tests/

Beware that when using the trace mode the stdout/stderr of the pyflame process
and the traced process will be mixed. This means if the traced process sends
data to stdout you may need to filter it somehow before sending the output to
`flamegraph.pl`.

## FAQ

### What Is "(idle)" Time?
Expand Down
133 changes: 107 additions & 26 deletions src/pyflame.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@
// limitations under the License.

#include <getopt.h>
#include <sys/types.h>
#include <sys/wait.h>

#include <chrono>
#include <fstream>
#include <iostream>
#include <limits>
#include <sstream>
#include <string>
#include <thread>
#include <unordered_map>
Expand All @@ -35,12 +39,14 @@ using namespace pyflame;
namespace {
const char usage_str[] =
("Usage: pyflame [options] <pid>\n"
" pyflame [-t|--trace] command arg1 arg2...\n"
"\n"
"General Options:\n"
" -h, --help Show help\n"
" -s, --seconds=SECS How many seconds to run for (default 1)\n"
" -r, --rate=RATE Sample rate, as a fractional value of seconds "
"(default 0.001)\n"
" -t, --trace Trace a child process\n"
" -v, --version Show the version\n"
" -x, --exclude-idle Exclude idle time from statistics\n");

Expand All @@ -60,9 +66,14 @@ void PrintBuckets(const buckets_t &buckets) {
std::cout << *last << " " << kv.second << "\n";
}
}

inline bool IsPyflame(const std::string &str) {
return str.find("pyflame") != std::string::npos;
}
} // namespace

int main(int argc, char **argv) {
bool trace = false;
bool include_idle = true;
double seconds = 1;
double sample_rate = 0.001;
Expand All @@ -71,14 +82,16 @@ int main(int argc, char **argv) {
{"help", no_argument, 0, 'h'},
{"rate", required_argument, 0, 'r'},
{"seconds", required_argument, 0, 's'},
{"trace", no_argument, 0, 't'},
{"version", no_argument, 0, 'v'},
{"exclude-idle", no_argument, 0, 'x'},
{0, 0, 0, 0}};
int option_index = 0;
int c = getopt_long(argc, argv, "hr:s:vx", long_options, &option_index);
int c = getopt_long(argc, argv, "hr:s:tvx", long_options, &option_index);
if (c == -1) {
break;
}
bool break_from_loop = false;
switch (c) {
case 0:
if (long_options[option_index].flag != 0) {
Expand All @@ -96,6 +109,11 @@ int main(int argc, char **argv) {
case 's':
seconds = std::stod(optarg);
break;
case 't':
trace = true;
seconds = -1;
break_from_loop = true; // double break
break;
case 'v':
std::cout << PACKAGE_STRING << "\n\n";
std::cout << kBuildNote << "\n";
Expand All @@ -110,26 +128,97 @@ int main(int argc, char **argv) {
default:
abort();
}
if (break_from_loop) {
break;
}
}
if (optind != argc - 1) {
std::cerr << usage_str;
return 1;
}
long pid = std::strtol(argv[argc - 1], nullptr, 10);
if (pid > std::numeric_limits<pid_t>::max() ||
pid < std::numeric_limits<pid_t>::min()) {
std::cerr << "PID " << pid << " is out of valid PID range.\n";
return 1;
const std::chrono::microseconds interval{
static_cast<long>(sample_rate * 1000000)};
pid_t pid;
if (trace) {
if (optind == argc) {
std::cerr << usage_str;
return 1;
}
if (IsPyflame(argv[optind])) {
std::cerr << "You tried to pyflame a pyflame, naughty!\n";
return 1;
}
// In trace mode, all of the remaining arguments are a command to run. We
// fork and have the child run the command; the parent traces.
pid = fork();
if (pid == -1) {
perror("fork()");
return 1;
} else if (pid == 0) {
// child; run the command
if (execvp(argv[optind], argv + optind)) {
perror("execlp()");
return 1;
}
} else {
// parent; wait for the child to not be pyflame
std::ostringstream os;
os << "/proc/" << pid << "/comm";
for (;;) {
int status;
pid_t pid_status = waitpid(pid, &status, WNOHANG);
if (pid_status == -1) {
perror("waitpid()");
return 1;
} else if (pid_status > 0) {
std::cerr << "Child process exited with status "
<< WEXITSTATUS(pid_status) << "\n";
return 1;
}

std::ifstream ifs(os.str());
std::string line;
std::getline(ifs, line);

// If the child is not named pyflame we break, otherwise we sleep and
// retry. Hopefully this is not an infinite loop, since we already
// checked that the child should not have been pyflame. All bets are off
// if the child tries to be extra pathological (e.g. immediately exec a
// pyflame).
if (!IsPyflame(line)) {
break;
}
std::this_thread::sleep_for(interval);
}
}
} else {
// there should be one remaining argument: the pid to trace
if (optind != argc - 1) {
std::cerr << usage_str;
return 1;
}
pid = static_cast<pid_t>(std::strtol(argv[argc - 1], nullptr, 10));
if (pid > std::numeric_limits<pid_t>::max() ||
pid < std::numeric_limits<pid_t>::min()) {
std::cerr << "PID " << pid << " is out of valid PID range.\n";
return 1;
}
}
buckets_t buckets;
try {
PtraceAttach(pid);
Namespace ns(pid);
const unsigned long tstate_addr = ThreadStateAddr(pid, &ns);
if (seconds) {
const std::chrono::microseconds interval{
static_cast<long>(sample_rate * 1000000)};
if (seconds == 0) {
// if seconds == 0 then we do a "one shot" mode
const unsigned long frame_addr = FirstFrameAddr(pid, tstate_addr);
if (frame_addr) {
std::vector<Frame> stack = GetStack(pid, frame_addr);
for (auto it = stack.rbegin(); it != stack.rend(); it++) {
std::cout << *it << "\n";
}
} else {
std::cout << "(idle)\n";
}
} else {
size_t idle = 0;
bool check_end = seconds > 0;
auto end =
std::chrono::system_clock::now() +
std::chrono::microseconds(static_cast<long>(seconds * 1000000));
Expand All @@ -148,9 +237,11 @@ int main(int argc, char **argv) {
it->second++;
}
}
auto now = std::chrono::system_clock::now();
if (now + interval >= end) {
break;
if (check_end) {
auto now = std::chrono::system_clock::now();
if (now + interval >= end) {
break;
}
}
PtraceDetach(pid);
std::this_thread::sleep_for(interval);
Expand All @@ -160,16 +251,6 @@ int main(int argc, char **argv) {
std::cout << "(idle) " << idle << "\n";
}
PrintBuckets(buckets);
} else {
const unsigned long frame_addr = FirstFrameAddr(pid, tstate_addr);
if (frame_addr) {
std::vector<Frame> stack = GetStack(pid, frame_addr);
for (auto it = stack.rbegin(); it != stack.rend(); it++) {
std::cout << *it << "\n";
}
} else {
std::cout << "(idle)\n";
}
}
} catch (const PtraceException &exc) {
// If the process terminates early then we just print the buckets up until
Expand Down
11 changes: 8 additions & 3 deletions tests/exit_early.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import os
import sys
import time


def main():
sys.stdout.write('%d\n' % (os.getpid(),))
sys.stdout.flush()
max_time = time.time() + 5
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--silent', action='store_true')
args = parser.parse_args()
if not args.silent:
sys.stdout.write('%d\n' % (os.getpid(),))
sys.stdout.flush()
max_time = time.time() + 2
while True:
time.sleep(0.1)
target = time.time() + 0.1
Expand Down
17 changes: 17 additions & 0 deletions tests/sleep.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/sh
#
# Copyright 2016 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

sleep 1

0 comments on commit c83412c

Please sign in to comment.