In [None]:
import json
import boto3
import pandas as pd

import psycopg2
import psycopg2.extras as extras
from sqlalchemy import create_engine
import os 

ipaddress = os.environ['ipaddress']
dbname = os.environ['dbname']
username = os.environ['username']
password = os.environ['password']
port = os.environ['port']

def lambda_handler(event, context):
    
    # Create the S3 object
    s3 = boto3.client('s3')
    obj = s3.get_object(
    Bucket = 'tasty-datalake-bucket',
    Key = 'tasty_data.csv')
    
    # Read data from the S3 object
    df = pd.read_csv(obj['Body'])
    
    #create table for tags counts
    df_ingredients_counts = ingredients_tags(df)
    
    # A long string that contains the necessary Postgres login information
    postgres_str = f'postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'
    
    # Create the connection
    cnx = create_engine(postgres_str)
    
    # load table to DB
    df_ingredients_counts.to_sql('tags_counts_lambda', con=cnx, index=False, if_exists='replace')
    
# create table ingredients_counts
def ingredients_tags(df):
    
    df['ingredients'] = df['ingredients'].str.replace("'","")
    df['ingredients'] = df['ingredients'].apply(lambda x: x.strip('][').split(', '))
    
    # create dummy data from tags
    df_ingredients_dummies = df['ingredients'].str.join('|').str.get_dummies()
                 
    # create new df with counts             
    ingredients_counts = df_ingredients_dummies.sum().sort_values(ascending=False)
    df_ingredients_counts = ingredients_counts.to_frame(name='count')
    df_ingredients_counts = df_ingredients_counts.reset_index()
    df_ingredients_counts = df_ingredients_counts.rename(columns={"index": "ingredient"})
    
    # only ingredients which are used more than 5 times
    df_ingredients_counts2 = df_ingredients_counts.loc[df_ingredients_counts['count'] > 5]
    
    return df_ingredients_counts2