In [None]:
import json
import boto3
import pandas as pd

import psycopg2
import psycopg2.extras as extras
from sqlalchemy import create_engine
import os 

ipaddress = os.environ['ipaddress']
dbname = os.environ['dbname']
username = os.environ['username']
password = os.environ['password']
port = os.environ['port']

def lambda_handler(event, context):
    
    # Create the S3 object
    s3 = boto3.client('s3')
    obj = s3.get_object(
    Bucket = 'tasty-datalake-bucket',
    Key = 'tasty_data.csv')
    
    # Read data from the S3 object
    df = pd.read_csv(obj['Body'])
    
    #create table for tags counts
    df_tags_counts = table_tags(df)
    
    # A long string that contains the necessary Postgres login information
    postgres_str = f'postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'
    
    # Create the connection
    cnx = create_engine(postgres_str)
    
    # load table to DB
    df_tags_counts.to_sql('tags_counts_lam', con=cnx, index=False, if_exists='replace')
    
# create table tags_counts
def table_tags(df):
    
    df['tags_clean'] = df["tags_clean"].str.replace("'","")
    df['tags_clean'] = df['tags_clean'].apply(lambda x: x.strip('][').split(', '))
    
    # create dummy data from tags
    df_tags_dummies = df['tags_clean'].str.join('|').str.get_dummies()
    
    df_tags_dummies = pd.merge(df[['id']],
                 df_tags_dummies,
                 left_index=True, right_index=True,         
                 how='left')
                 
    # create new df with counts             
    tags_counts = df_tags_dummies.sum().sort_values(ascending=False)
    df_tags_counts = tags_counts.to_frame(name='count')
    df_tags_counts = df_tags_counts.reset_index()
    df_tags_counts = df_tags_counts.rename(columns={"index": "tag"})
    
    return df_tags_counts