# Analyze Lambda Logs in CloudWatch

Get Data from CloudWatch

In [None]:
import boto3
import pandas as pd
import pandas as pd
import numpy as np

class display(object):
    """Display HTML representation of multiple objects"""
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)
    
    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
                           for a in self.args)


boto3.setup_default_session(profile_name='dev')
client = boto3.client('logs', region_name='us-east-1')
groups=client.describe_log_groups()['logGroups']

start_events=[]
report_events=[]
kmeans_events=[]
for i in groups:
    if i['logGroupName'].startswith("/aws/lambda"):
        response= client.filter_log_events(logGroupName=i['logGroupName'], filterPattern="START")
        start_events.extend(response["events"])
        response= client.filter_log_events(logGroupName=i['logGroupName'], filterPattern="REPORT")
        report_events.extend(response["events"])
        response= client.filter_log_events(logGroupName=i['logGroupName'], filterPattern="LambdaKMeans")
        kmeans_events.extend(response["events"])

# Output of KMeans Lambda Function
kmeans_data=[i['message'].replace(" ", "").strip().split(",") for i in kmeans_events]

runtime = pd.DataFrame(kmeans_data, columns=["Measurement", "Request ID", "Log Group", 
                                             "Log Stream", "Cores", "Number Points", 
                                             "Number Dimensions", "Number Events",  "Number Clusters",
                                             "Download Time", "Processing Time", "Upload Time", 
                                             "Total Time",  "Latency"])

print("Found %d records."%len(runtime))

# Lambda Memory Report
report_data=[(i['timestamp'], i['message'].split("\t")) for i in report_events]
lines=[]
header=[]
end = []
for record in report_data:
    line = [record[0]]
    header=["End Timestamp"]
    for column in record[1]:
        try:
            line.append(column.split(":")[1].strip().rstrip('MB').rstrip("ms").strip())
            header.append(column.split(":")[0].lstrip("REPORT").strip())
        except:
            pass
    lines.append(line)
accounting=pd.DataFrame(lines, columns=header)

# Start Time for Request
start_data=[(i["timestamp"], i['message'].split(" ")[2]) for i in start_events]
start = pd.DataFrame(start_data, columns=["Start Timestamp", "RequestId"])
all_df = pd.merge(runtime, accounting, left_on="Request ID", right_on="RequestId", how="inner", suffixes=["_l", "_r"])
all_df = pd.merge(all_df, start, left_on="Request ID", right_on="RequestId", how="inner", suffixes=["_l", "_r"])
all_df.head(3).T

In [None]:
len(all_df)

In [None]:
all_df.to_csv("lambda_2_2.csv")

In [None]:
client.filter_log_events(logGroupName=i['logGroupName'], filterPattern="REPORT")

# Clean S3 and CloudWatch

In [None]:
response=client.describe_log_groups()
for i in response['logGroups']:
    print("Delete %s"%str(i["logGroupName"]))
    client.delete_log_group(logGroupName=i["logGroupName"])

In [None]:
import os
output=!aws s3 ls --profile dev | awk -F" " '{print $3}'
for i in output:
    print(i)
    os.system("aws s3 rm s3://%s --recursive --profile dev"%i)
    os.system("aws s3 rb s3://%s --profile dev"%i)

# Scratch

In [None]:
import sklearn.cluster, pickle
import numpy as np
kmeans_model = sklearn.cluster.MiniBatchKMeans(n_clusters=10)
data = np.random.randn(30, 3)


In [None]:
if "cluster_centers_" in kmeans_model:
    print ("yes")

In [None]:
kmeans_model.fit(data)
kmeans_model.cluster_centers_

In [None]:
bucket_name = "model-test-123"
s3_client = boto3.client('s3', region_name='us-east-1')
s3_client.create_bucket(ACL='private', Bucket=bucket_name)

In [None]:
s3_client.put_object(Body=pickle.dumps(kmeans_model), 
                          Bucket=bucket_name, 
                          Key="model-lambda")

In [None]:
model_pickle=s3_client.get_object(Bucket=bucket_name, 
              Key="model-lambda")['Body'].read()

In [None]:
pickle.loads(model_pickle).cluster_centers_