Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Makes TimedPhase auto-closeable. #143

Merged
merged 4 commits into from
Mar 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,16 @@ Current
* The previous message of `reject <url>` wasn't helpful, useful, nor very nice to users, and the message logged was
not very useful either. The message has been made nicer (`Service is unhealthy. At least 1 healthcheck is
failing`), and the log has been made better as well.
- [`RequestLog` timings support the try-with-resources block](https://github.com/yahoo/fili/pull/143)
* A block of code can now be timed by wrapping the timed block in a try-with-resources block that
starts the timer. Note: This won't work when performing timings across threads, or across
contexts. Those need to be started and stopped manually.

### Deprecated:

- [`RequestLog::stopMostRecentTimer` has been deprecated](https://github.com/yahoo/fili/pull/143)
- This method is a part of the infrastructure to support the recently
deprecated `RequestLog::switchTiming`.


### Fixed:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public void onNext(String jobView) {
* @param asyncResponse The channel over which to send the response
*/
private void send(Response response, AsyncResponse asyncResponse) {
if (RequestLog.isStarted(RESPONSE_WORKFLOW_TIMER)) {
if (RequestLog.isRunning(RESPONSE_WORKFLOW_TIMER)) {
RequestLog.stopTiming(RESPONSE_WORKFLOW_TIMER);
}
asyncResponse.resume(response);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public void onNext(PreResponse preResponse) {
* @param response The Response to send back to the user
*/
private void publishResponse(Response response) {
if (RequestLog.isStarted(RESPONSE_WORKFLOW_TIMER)) {
if (RequestLog.isRunning(RESPONSE_WORKFLOW_TIMER)) {
RequestLog.stopTiming(RESPONSE_WORKFLOW_TIMER);
}
asyncResponse.resume(response);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import com.yahoo.bard.webservice.data.dimension.KeyValueStore;
import com.yahoo.bard.webservice.data.dimension.SearchProvider;
import com.yahoo.bard.webservice.logging.RequestLog;
import com.yahoo.bard.webservice.logging.TimedPhase;
import com.yahoo.bard.webservice.util.DimensionStoreKeyUtils;
import com.yahoo.bard.webservice.util.Pagination;
import com.yahoo.bard.webservice.util.SinglePagePagination;
Expand Down Expand Up @@ -547,9 +548,8 @@ private Pagination<DimensionRow> getResultsPage(Query query, PaginationParameter
initializeIndexSearcher();
lock.readLock().lock();
try {
RequestLog.startTiming("QueryingLucene");
ScoreDoc[] hits;
try {
try (TimedPhase timer = RequestLog.startTiming("QueryingLucene")) {
hits = getPageOfData(
luceneIndexSearcher,
null,
Expand All @@ -571,13 +571,10 @@ private Pagination<DimensionRow> getResultsPage(Query query, PaginationParameter
throw new PageNotFoundException(requestedPageNumber, perPage, 0);
}
}
} finally {
RequestLog.stopTiming("QueryingLucene");
}

// convert hits to dimension rows
RequestLog.startTiming("LuceneHydratingDimensionRows");
try {
try (TimedPhase timer = RequestLog.startTiming("LuceneHydratingDimensionRows")) {
String idKey = DimensionStoreKeyUtils.getColumnKey(dimension.getKey().getName());
filteredDimRows = Arrays.stream(hits)
.map(
Expand All @@ -593,8 +590,6 @@ private Pagination<DimensionRow> getResultsPage(Query query, PaginationParameter
.map(document -> document.get(idKey))
.map(dimension::findDimensionRowByKeyValue)
.collect(Collectors.toCollection(TreeSet::new));
} finally {
RequestLog.stopTiming("LuceneHydratingDimensionRows");
}

documentCount = luceneIndexSearcher.count(query); //throws the caught IOException
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public class RequestLog {

private String logId;
private LogBlock info;
@Deprecated
private TimedPhase mostRecentTimer;
private final Map<String, TimedPhase> times;
private final Set<String> threadIds;
Expand Down Expand Up @@ -85,54 +86,6 @@ private RequestLog(RequestLog rl) {
MDC.put(ID_KEY, logId);
}

/**
* Represents a phase that is timed.
* TimedPhase is used to associate a Timer located in the registry with the exact duration of such a phase for a
* specific request.
*/
private static class TimedPhase {
private final String name;
private long start;
private long duration;

/**
* Constructor.
*
* @param name Name of the phase
*/
private TimedPhase(String name) {
this.name = name;
}

/**
* Start the phase.
*/
private void start() {
if (start != 0) {
LOG.warn("Tried to start timer that is already running: {}", name);
return;
}
start = System.nanoTime();
}

/**
* Stop the phase.
*/
private void stop() {
if (start == 0) {
LOG.warn("Tried to stop timer that has not been started: {}", name);
return;
}
duration += System.nanoTime() - start;
REGISTRY.timer(name).update(duration, TimeUnit.NANOSECONDS);
start = 0;
}

private boolean isStarted() {
return start != 0;
}
}

/**
* Resets the contents of a request log at the calling thread.
*/
Expand Down Expand Up @@ -180,19 +133,7 @@ private void init() {
* @return the map containing all the recorded times per phase in milliseconds
*/
private Map<String, Long> getDurations() {
return times.values()
.stream()
.peek(
phase -> {
if (phase.start != 0) {
LOG.warn(
"Exporting duration while timer is running. Measurement might be wrong: {}",
phase.name
);
}
}
)
.collect(Collectors.toMap(phase -> phase.name, phase -> phase.duration));
return times.values().stream().collect(Collectors.toMap(TimedPhase::getName, TimedPhase::getDuration));
}

/**
Expand Down Expand Up @@ -221,15 +162,15 @@ private Map<String, Float> aggregateDurations() {
}

/**
* Check if a stopwatch is started.
* Check if a stopwatch is currently running.
*
* @param caller the caller to name this stopwatch with its class's simple name
*
* @return whether this stopwatch is started
*/

public static boolean isStarted(Object caller) {
return isStarted(caller.getClass().getSimpleName());
public static boolean isRunning(Object caller) {
return isRunning(caller.getClass().getSimpleName());
}

/**
Expand All @@ -239,29 +180,33 @@ public static boolean isStarted(Object caller) {
*
* @return whether this stopwatch is started
*/
public static boolean isStarted(String timePhaseName) {
public static boolean isRunning(String timePhaseName) {
RequestLog current = RLOG.get();
TimedPhase timePhase = current.times.get(timePhaseName);
return timePhase != null && timePhase.isStarted();
return timePhase != null && timePhase.isRunning();
}

/**
* Start a stopwatch.
* Time is accumulated if the stopwatch is already registered
*
* @param caller the caller to name this stopwatch with its class's simple name
*
* @return The stopwatch
*/
public static void startTiming(Object caller) {
startTiming(caller.getClass().getSimpleName());
public static TimedPhase startTiming(Object caller) {
return startTiming(caller.getClass().getSimpleName());
}

/**
* Start a stopwatch.
* Time is accumulated if the stopwatch is already registered
*
* @param timePhaseName the name of this stopwatch
*
* @return The stopwatch
*/
public static void startTiming(String timePhaseName) {
public static TimedPhase startTiming(String timePhaseName) {
RequestLog current = RLOG.get();
TimedPhase timePhase = current.times.get(timePhaseName);
if (timePhase == null) {
Expand All @@ -274,7 +219,7 @@ public static void startTiming(String timePhaseName) {
current.times.put(timePhaseName, timePhase);
}
current.mostRecentTimer = timePhase;
timePhase.start();
return timePhase.start();
}

/**
Expand Down Expand Up @@ -302,6 +247,15 @@ public static void stopTiming(Object caller) {
stopTiming(caller.getClass().getSimpleName());
}

/**
* Registers the final duration of a stopped timer with the RequestLog.
*
* @param stoppedPhase The phase that has been stopped, and whose duration needs to be stored
*/
public static void registerTime(TimedPhase stoppedPhase) {
RequestLog.REGISTRY.timer(stoppedPhase.getName()).update(stoppedPhase.getDuration(), stoppedPhase.getUnit());
}

/**
* Pause a stopwatch.
*
Expand All @@ -313,15 +267,19 @@ public static void stopTiming(String timePhaseName) {
LOG.warn("Tried to stop non-existent phase: {}", timePhaseName);
return;
}
timePhase.stop();
timePhase.close();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we make "stop" convert to doing the registering, calling "close" is likely not needed, but "stop" will be.

Copy link
Contributor Author

@archolewa archolewa Jan 23, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

close has to do the registering anyway, because that's the method invoked when the try-with-resources block ends. So even if we change how stop works, and had close delegate to stop, we still wouldn't need to change this invocation.

Copy link
Contributor Author

@archolewa archolewa Jan 23, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Plus we shouldn't change this from close to stop. In my opinion, RequestLog::stopTiming should have the same semantics as what the try-with-resources block does (with the exception of the existence check that the try-with-resources doesn't have to worry about).

}

/**
* Stop the most recent stopwatch.
*
* @deprecated Stopping a timer based on context is brittle, and prone to unexpected changes. Timers should be
* stopped explicitly, or started in a try-with-resources block
*/
@Deprecated
public static void stopMostRecentTimer() {
try {
stopTiming(RLOG.get().mostRecentTimer.name);
stopTiming(RLOG.get().mostRecentTimer.getName());
} catch (NullPointerException ignored) {
LOG.warn("Stopping timing failed because mostRecentTimer wasn't registered.");
}
Expand Down Expand Up @@ -450,8 +408,8 @@ public static void accumulate(RequestLog ctx) {
.stream()
.filter(
e -> e.getKey().contains(DRUID_QUERY_TIMER) ||
(e.getKey().equals(REQUEST_WORKFLOW_TIMER) && !e.getValue().isStarted()) ||
(e.getKey().equals(RESPONSE_WORKFLOW_TIMER) && e.getValue().isStarted())
(e.getKey().equals(REQUEST_WORKFLOW_TIMER) && !e.getValue().isRunning()) ||
(e.getKey().equals(RESPONSE_WORKFLOW_TIMER) && e.getValue().isRunning())
)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))
);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.logging;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.TimeUnit;

/**
* Represents a phase that is timed.
* TimedPhase is used to associate a Timer located in the registry with the exact duration of such a phase for a
* specific request. Times are in nanoseconds.
* <p>
* Note: This class is NOT thread-safe. Timers are intended to be started once by one thread, and stopped once by
* one thread (though those threads are not necessarily the same).
*/
public class TimedPhase implements AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(TimedPhase.class);

private final String name;
private long start;
private long duration;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think start and duration need to be made thread-safe (likely with AtomicLongs)... It wasn't an issue before, because TimedPhase was encapsulated within the RequestLog, which was a ThreadLocal and was never able to exist on more than 1 thread at a time, but TimedPhase is now it's own thing, and there's a possibility of sharing them across threads.

I tried playing around with using AtomicLong for these variables a little bit, and it's not hard to retain the existing semantics, though it did make me think about if we wanted to maintain the existing semantics around multiple consecutive starts / stops, especially in the new world of being across threads. No good solutions to this came to mind immediately, so it may make sense to add a cautionary note to the class-level doc for TimedPhase indicating the possible race condition of starting and stopping the same phase on multiple threads.

Anyway, here's the relevant results of my exploration, in case they help:

private static final long NOT_RUNNING = 0;

private final String name;
private final AtomicLong start = new AtomicLong();
private final AtomicLong duration = new AtomicLong();

/**
 * Start the phase.
 *
 * @return This phase after being started
 */
public TimedPhase start() {
    if (!start.compareAndSet(NOT_RUNNING, System.nanoTime())) {
        LOG.warn("Tried to start timer that is already running: {}", name);
    }
    return this;
}

/**
 * Stop the phase.
 */
public void stop() {
    long started = start.getAndUpdate(ignoredCurrent -> NOT_RUNNING);
    if (started == 0) {
        LOG.warn("Tried to stop timer that has not been started: {}", name);
        return;
    }
    duration.addAndGet(System.nanoTime() - started);
}

public long getDuration() {
    return duration.get();
}

public boolean isStarted() {
    return start.get() != 0;
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm ok with making it thread safe, especially since many of these timers are stopped on a different thread than they're started (the request vs response thread). Rethinking the semantics is outside the scope of this PR though, I think.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, after playing with it a little bit, I think I'm going to punt on this, largely because validating whether a timer is running outside of this class or not becomes a lot trickier in a multi-threaded scenario.


/**
* Constructor.
*
* @param name Name of the phase
*/
public TimedPhase(String name) {
this.name = name;
}

/**
* Start the phase.
*
* @return This phase after being started
*/
public TimedPhase start() {
if (isRunning()) {
LOG.warn("Tried to start timer that is already running: {}", name);
} else {
start = System.nanoTime();
}
return this;
}

/**
* Stop the phase.
* <p>
* This method just stops the timer. It does not register the time with the {@link RequestLog}. To register
* the timer, invoke {@link TimedPhase#registerTime()}. To do both with a single method call, see
* {@link TimedPhase#close()}
*
* @see TimedPhase#registerTime()
* @see TimedPhase#close()
*/
public void stop() {
if (!isRunning()) {
LOG.warn("Tried to stop timer that has not been started: {}", name);
return;
}
duration += System.nanoTime() - start;
start = 0;
}

/**
* Registers the duration of this timer with the RequestLog.
* <p>
* It is highly recommended that you {@link TimedPhase#stop()}} the timer first. Otherwise, the timings may
* be inaccurate. To both stop and register the timer at once see {@link TimedPhase#close}.
*
* @see TimedPhase#stop()
* @see TimedPhase#close()
*/
public void registerTime() {
RequestLog.registerTime(this);
}

/**
* Return the duration of the timer in nanoseconds.
*
* @return The duration of the timer in nanoseconds
*/
public long getDuration() {
if (isRunning()) {
LOG.warn("Timer '{}' is still running. Timings may be incorrect.", getName());
}
return duration;
}

public String getName() {
return name;
}

public TimeUnit getUnit() {
return TimeUnit.NANOSECONDS;
}

public boolean isRunning() {
return start != 0;
}

/**
* Stops the timer, and registers the timer with the RequestLog.
* <p>
* This is primarily meant to be used by the try-with-resources block, which both stops the timer and registers it
* with the RequestLog, though it can of course be called manually as well. If you want to stop the timer, but
* don't want to register the timer just yet, then see {@link TimedPhase#stop}.
*
* @see TimedPhase#stop()
* @see TimedPhase#registerTime()
*/
@Override
public void close() {
stop();
registerTime();
}
}