# Analyzing Rate Limit Exceeded events

Use this notebook to dig into Rate Limit Exceeded events on Monorail.

In [None]:
import gcp
import gcp.bigquery as bq

context = gcp.Context.default()
print 'The current project is %s' % context.project_id

# Set the date to analyze here:
date = 20160514

In [None]:
%%sql --module by_ip
SELECT
  protoPayload.ip as ip,
  COUNT(protoPayload.requestId) AS num
FROM
  [logs.appengine_googleapis_com_request_log_$date]
WHERE
  protoPayload.moduleId is null # == "default", otherwise you get backend queries too.
  AND
  protoPayload.line.logMessage LIKE "Rate Limit Exceeded%"
GROUP BY
  ip
ORDER BY
  num DESC
LIMIT
  100;

In [None]:
%%sql --module by_ip_class
SELECT
  REGEXP_EXTRACT(protoPayload.ip,r'^(?:[^\.]*\.){0}([^\.]*)\.?') AS a,
  REGEXP_EXTRACT(protoPayload.ip,r'^(?:[^\.]*\.){1}([^\.]*)\.?') AS b,
  REGEXP_EXTRACT(protoPayload.ip,r'^(?:[^\.]*\.){2}([^\.]*)\.?') AS c,
  REGEXP_EXTRACT(protoPayload.ip,r'^(?:[^\.]*\.){3}([^\.]*)\.?') AS d,
  COUNT(protoPayload.requestId) AS num
FROM
  [logs.appengine_googleapis_com_request_log_$date]
WHERE
  protoPayload.moduleId is null # == "default", otherwise you get backend queries too.
  AND
  protoPayload.line.logMessage LIKE "Rate Limit Exceeded%"
GROUP BY
  a,
  b,
  c,
  d
ORDER BY
  num DESC
LIMIT
  100;

In [None]:
%%sql --module by_country
SELECT
  protoPayload.line.logMessage as line,
  COUNT(DISTINCT protoPayload.ip) as ip_count,
  COUNT(protoPayload.requestId) AS req_count
FROM
  FLATTEN ([logs.appengine_googleapis_com_request_log_$date], protoPayload.line)
WHERE
  protoPayload.moduleId is null # == "default", otherwise you get backend queries too.
  AND
  protoPayload.line.logMessage LIKE "Rate Limit Exceeded%"
  AND
  REGEXP_MATCH(protoPayload.line.logMessage, 'X-AppEngine-Country')
GROUP BY
  line
ORDER BY
  req_count DESC
LIMIT
  100;

In [None]:
%%sql --module by_resource
SELECT
  protoPayload.resource as resource,
  COUNT(protoPayload.requestId) AS req_count
FROM
  [logs.appengine_googleapis_com_request_log_$date]
WHERE
  protoPayload.moduleId is null # == "default", otherwise you get backend queries too.
  AND
  protoPayload.line.logMessage LIKE "Rate Limit Exceeded%"
GROUP BY
  resource
ORDER BY
  req_count DESC
LIMIT
  100;

## Requests by IP

In [None]:
df = bq.Query(by_ip, date=date).to_dataframe()
df.head(20)

In [None]:
if len(df):
  df.plot()

## Requests by IP Class

In [None]:
df = bq.Query(by_ip_class,  date=date).to_dataframe()
df.head(20)

In [None]:
if len(df):
  df.plot()

## Requests by Country Code

In [None]:
df = bq.Query(by_country, date=date).to_dataframe()
df.head(20)

In [None]:
if len(df):
  df.plot()

## Requests by Requested Resource

In [None]:
df = bq.Query(by_resource, date=date).to_dataframe()
df.head(20)

In [None]:
if len(df):
  df.plot()