## Outline
* Sources:
    * https://www.kaggle.com/kingburrito666/cannabis-strains/downloads/cannabis-strains.zip/9
    * https://www.kaggle.com/tictactouka/cannabis
    
*Steps*
* Extract data into SQLite file, read tables into DataFrames with extracted columns
    * Intermediary step: transform those DFs by selecting columns, renaming, and possibly joining based on the foreign key relationships
* Extract data CSV file, read table into DataFrame with extracted columns
* Create 3 new DFs with columns from the extracted DFs
* Load the resulting DataFrames into postgres DB

In [2]:
import pandas as pd
from sqlalchemy import create_engine

# Extract data from SQLite File
* For reference: ~/classwork/10-Advanced-Data-Storage-and-Retrieval/1/Activities/03-Ins_ReadSQL/Solved/SQLIntoPandas.ipynb

### Import SQLite file

In [3]:
# Create the connection engine
engine = create_engine("sqlite:///Resources/strains.sqlite")
conn = engine.connect()

### Using SQL query, combine the strain name, medical effect, and average medical rating

In [83]:
medical_effects = pd.read_sql('SELECT Strains."name" as strain_name, \
	 MedicalEffects."medical_effect" as "medical_effects", ROUND(AVG(MedicalEffects.rating),2) as medical_rating \
FROM Strains \
INNER JOIN MedicalEffects \
ON Strains.id=MedicalEffects.strain_id \
GROUP BY name',conn)

In [84]:
medical_effects

Unnamed: 0,strain_name,medical_effects,medical_rating
0,00 Kush,Autoimmune Diseases and Inflammation,5.00
1,1024,Pain,4.00
2,2046,Epilepsy,4.50
3,4 Best 4 You,Psychiatric Symptoms,3.00
4,5G's Red x Sour Diesel,Nausea and Vomiting,5.00
5,8 Miles High,Anorexia and Cachexia,3.00
6,A.D.D.,Psychiatric Symptoms,5.00
7,AK Widow 47,Dependency and Withdrawal,2.50
8,AK-48,Anorexia and Cachexia,4.00
9,AK47,Nausea and Vomiting,1.84


# Extract data from cannabis.csv 

### Convert csv to DataFrame

In [68]:
file = 'Resources/cannabis.csv'
cannabis_df = pd.read_csv(file)

### Create processed DataFrame
* Extract columns: Strain, Rating, Effects, Flavor, Description
* Rename columns: strain_name, community_rating, effects, flavor, description

In [69]:
cannabis_cols = ["Strain", "Type", "Rating", "Effects", "Flavor", "Description"]
cannabis_transformed = cannabis_df[cannabis_cols].copy()

In [70]:
cannabis_transformed = cannabis_transformed.rename(columns={"Strain": "strain_name",
                                                            "Type":"type",
                                                            "Rating":"community_rating",
                                                            "Effects": "community_effects",
                                                            "Flavor":"flavor",
                                                            "Description":"description"
                                                           })

In [71]:
cannabis_transformed.drop_duplicates("strain_name", inplace=True)

In [72]:
cannabis_transformed = cannabis_transformed.reset_index()

In [73]:
cannabis_transformed=cannabis_transformed.rename(columns={'index':'id'})

In [74]:
cannabis_transformed['strain_name']=cannabis_transformed['strain_name'].str.replace('-',' ')

In [75]:
csv_strains = cannabis_transformed[['strain_name']]

# Transform resulting DataFrames into 3 new ones:
* strain_effects: strain_name, community_effects, medical_effects
* strain_ratings: strain_name, community_rating, medical_rating
* strain_description: strain_name, type, flavor, description

In [77]:
strains_combined = pd.merge(sql_strains,csv_strains, on='strain_name',how='inner').drop_duplicates()

In [78]:
strains_combined.set_index('strain_name').reset_index()

Unnamed: 0,strain_name
0,Grape Ape
1,Amnesia Haze
2,Blackberry Haze
3,Amnesia
4,Apple Jack
5,Afghan Skunk
6,Big White
7,Blueberry
8,Hawaiian Haze
9,Master Kush


In [86]:
#filter medical_effects by common strains 
sql_filtered = pd.merge(medical_effects,strains_combined,on="strain_name",how="inner")
sql_filtered.head()

Unnamed: 0,strain_name,medical_effects,medical_rating
0,1024,Pain,4.0
1,Acapulco Gold,Pain,5.0
2,Afghani,Pain,5.0
3,Alaskan Ice,Nausea and Vomiting,2.83
4,Amnesia,Movement Disorders,5.0


In [104]:
csv_effects = cannabis_transformed[['strain_name','community_effects']]
csv_ratings = cannabis_transformed[['strain_name','community_rating']]
sql_effects = medical_effects[['strain_name','medical_effects']]
sql_ratings = medical_effects[['strain_name','medical_rating']]
strain_effects = pd.merge(csv_effects, sql_effects, on="strain_name",how="inner")
strain_ratings = pd.merge(csv_ratings,sql_ratings,on="strain_name",how="inner")
strain_description = cannabis_transformed[['strain_name','type','flavor','description']]

In [105]:
strain_effects

Unnamed: 0,strain_name,community_effects,medical_effects
0,1024,"Uplifted,Happy,Relaxed,Energetic,Creative",Pain
1,Acapulco Gold,"Happy,Uplifted,Euphoric,Energetic,Relaxed",Pain
2,Afghani,"Relaxed,Sleepy,Happy,Euphoric,Hungry",Pain
3,Alaskan Ice,"Euphoric,Uplifted,Happy,Relaxed,Energetic",Nausea and Vomiting
4,Amnesia,"Happy,Euphoric,Energetic,Creative,Uplifted",Movement Disorders
5,Amnesia Haze,"Happy,Euphoric,Uplifted,Energetic,Creative",Nausea and Vomiting
6,Anesthesia,"Hungry,Sleepy,Relaxed,Happy,Euphoric",Pain
7,Aurora Indica,"Relaxed,Sleepy,Happy,Euphoric,Uplifted",Nausea and Vomiting
8,Bangi Haze,"Aroused,Uplifted,Euphoric,Hungry",Anorexia and Cachexia
9,Big Bang,"Relaxed,Sleepy,Happy,Euphoric,Aroused",Nausea and Vomiting


In [106]:
strain_ratings

Unnamed: 0,strain_name,community_rating,medical_rating
0,1024,4.4,4.00
1,Acapulco Gold,4.5,5.00
2,Afghani,4.3,5.00
3,Alaskan Ice,4.4,2.83
4,Amnesia,4.3,5.00
5,Amnesia Haze,4.3,4.00
6,Anesthesia,4.1,4.00
7,Aurora Indica,4.4,4.00
8,Bangi Haze,0.0,4.50
9,Big Bang,4.1,2.18


In [107]:
strain_description

Unnamed: 0,strain_name,type,flavor,description
0,100 Og,hybrid,"Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,98 White Widow,hybrid,"Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...
2,1024,sativa,"Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
3,13 Dawgs,hybrid,"Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
4,24K Gold,hybrid,"Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."
5,3 Bears Og,indica,,3 Bears OG by Mephisto Genetics is an autoflow...
6,3 Kings,hybrid,"Earthy,Sweet,Pungent","The 3 Kings marijuana strain, a holy trinity o..."
7,303 Og,indica,"Citrus,Pungent,Earthy",The indica-dominant 303 OG is a Colorado strai...
8,3D Cbd,sativa,"Earthy,Woody,Flowery",3D CBD from Snoop Dogg’s branded line of canna...
9,3X Crazy,indica,"Earthy,Grape,Sweet","Also known as Optimus Prime, the indica-domina..."


# LOAD: Create database and populate

### Create database connection

In [101]:
connection_string = "postgres:postgres@127.0.0.1/cannabis_db"
engine = create_engine(f'postgresql://{connection_string}')
conn=engine.connect()

In [102]:
# Confirm tables
engine.table_names()

[]

### Load DataFrames into DB

In [103]:
strain_description.to_sql(name='description', con=engine, if_exists='append', index=True)

In [109]:
strain_effects.to_sql(name='effects', con=engine, if_exists='append', index=True)

In [110]:
strain_ratings.to_sql(name='ratings', con=engine, if_exists='append', index=True)