In [None]:
import pandas as pd
import pyodbc
import pymssql
from pathlib import Path
from sqlalchemy import create_engine
import numpy as np
import urllib
import datetime as dt
import re
import requests
import mysql.connector
from time import sleep

In [None]:
# https://search.cannabis.ca.gov/

In [None]:
pd.options.display.max_columns = 100

In [None]:
# Start off by creating the data information for the files

current_date = dt.date(2021, 10, 6)

current_day = current_date.strftime("%d")
current_month = current_date.strftime("%m")
current_year = current_date.strftime("%Y")

### Create SQL Connection

In [None]:
# # This creates the local connection to SQL

# driver = "ODBC Driver 17 for SQL Server"
# engine = create_engine(f'mssql://LAPTOP-E6QKON1L/ca_cannabis?driver={driver}')
# engine_con = engine.connect()

In [None]:
# This creates the connection to the California Cannabis files in SQL

params = urllib.parse.quote_plus("DRIVER={ODBC Driver 17 for SQL Server};"
                                 "SERVER=bespoke-database-1.cmevrozrcs7c.us-west-2.rds.amazonaws.com;"
                                 "DATABASE=ca_cannabis;"
                                 "UID=admin;"
                                 "PWD=N19lrqxnurTUJLJT6GFe")
engine_con = create_engine("mssql+pyodbc:///?odbc_connect={}".format(params))

In [None]:
# This reads in the 3 old SQL tables that we're working with

ca_main_old = pd.read_sql("SELECT * FROM ca_main", engine_con)
ca_roll_old = pd.read_sql("SELECT * FROM ca_roll", engine_con)
ca_contact_old = pd.read_sql("SELECT * FROM ca_contact", engine_con)

### Read in the California Cannabis Portal Data

In [None]:
# Start by reading in and combining the two file types from the CCP site
# https://search.cannabis.ca.gov/
# Do not forget to choose All Data instead of only Visible Data

annual_df = pd.read_csv(f"license_repository/ccp_annual_{current_year}_{current_month}_{current_day}.csv")
provisional_df = pd.read_csv(f"license_repository/ccp_provisional_{current_year}_{current_month}_{current_day}.csv")

ccp_df = annual_df.append(provisional_df)

### Read in the API

In [None]:
# This is the curl code given by https://iservices.dca.ca.gov/docs/bcclicensereadservices#/
# curl -X GET --header "Accept: */*" --header "app_id: 6c4528f5" --header "app_key: f82d0e73e4b21beb466a0e10f19339dc" "https://iservices.dca.ca.gov/api/bcclicenseread/getAllBccLicenses"

# This is the curl code translated to Python with https://curl.trillworks.com/

headers = {
    'Accept': '*/*',
    'app_id': '6c4528f5',
    'app_key': 'f82d0e73e4b21beb466a0e10f19339dc',
}

response = requests.get('https://iservices.dca.ca.gov/api/bcclicenseread/getAllBccLicenses', headers=headers)

In [None]:
# This translates it to a Pandas dataframe

api_df = pd.DataFrame(response.json())

In [None]:
api_df.head()

### Combine API Data and CCP Data

In [None]:
# We start by just merging the two
# Since the API data is only for data from the BCC licencing authority, it will need to be recombined with the rest later

bcc_combined = ccp_df.merge(api_df, on='licenseNumber', how='inner', suffixes=('_ccp', '_api'))

In [None]:
bcc_combined.head()

In [None]:
# Then we seperate out what information we need

bcc_df = pd.DataFrame(bcc_combined['licenseNumber']).rename(columns={'licenseNumber': 'license_number'})

# bcc_df['license_category'] = bcc_combined['licensingAuthority']
bcc_df['license_description'] = bcc_combined['licenseType_ccp']
bcc_df['license_term'] = bcc_combined['licenseTerm']
bcc_df['license_status'] = bcc_combined['licenseStatus_ccp']

bcc_df['name_legal'] = bcc_combined['businessLegalName']
bcc_df['name_dba'] = bcc_combined['businessDbaName']

bcc_df['date_issue'] = bcc_combined['issueDate']
bcc_df['date_expiration'] = bcc_combined['expirationDate']

bcc_df['business_description'] = bcc_combined['activity']
bcc_df['business_structure'] = bcc_combined['businessStructure_api']

bcc_df['adult_medicinal'] = bcc_combined['licenseDesignation']

bcc_df['contact_email'] = bcc_combined['email']
bcc_df['contact_phone'] = bcc_combined['phone']
bcc_df['contact_website'] = bcc_combined['website']
bcc_df['business_owner'] = bcc_combined['businessOwner']
bcc_df['contact_street'] = bcc_combined['premiseStreetAddress']
bcc_df['contact_city'] = bcc_combined['premiseCity_api']
bcc_df['contact_county'] = bcc_combined['premiseCounty_api']
bcc_df['contact_state'] = 'CA'
bcc_df['contact_zip'] = bcc_combined['premiseZip']

### Clean Non BCC Data

In [None]:
## Start by removing the BCC rows from the CCP df

# bcc_removed = ccp_df[ccp_df['licensingAuthorityId'] != 'BCC']

# Welp, it seems they removed that column, so let's do it this way now

bcc_types_list = ['Commercial -  Distributor', 'Commercial -  Distributor - Transport Only', 'Commercial -  Event Organizer', 'Commercial -  Retailer', 'Commercial -  Retailer - Non-Storefront', 'Commercial -  Microbusiness']

bcc_removed = ccp_df[~ccp_df['licenseType'].isin(bcc_types_list)]

In [None]:
bcc_removed.licenseDesignation.value_counts()

In [None]:
non_bcc_df = pd.DataFrame(bcc_removed['licenseNumber']).rename(columns={'licenseNumber':'license_number'})

In [None]:
non_bcc_df

In [None]:
bcc_removed.columns

In [None]:
# non_bcc_df['license_category'] = bcc_removed['licensingAuthority']
non_bcc_df['license_description'] = bcc_removed['licenseType']
non_bcc_df['license_term'] = bcc_removed['licenseTerm']
non_bcc_df['license_status'] = bcc_removed['licenseStatus']

non_bcc_df['name_legal'] = bcc_removed['businessLegalName']
non_bcc_df['name_dba'] = bcc_removed['businessDbaName']

non_bcc_df['date_issue'] = bcc_removed['issueDate']
non_bcc_df['date_expiration'] = bcc_removed['expirationDate']

non_bcc_df['business_description'] = bcc_removed['activity']
non_bcc_df['business_structure'] = bcc_removed['businessStructure']

non_bcc_df['adult_medicinal'] = bcc_removed['licenseDesignation']

non_bcc_df['contact_email'] = bcc_removed['businessEmail']
non_bcc_df['contact_phone'] = bcc_removed['businessPhone']
non_bcc_df['contact_website'] = ''
non_bcc_df['business_owner'] = bcc_removed['businessOwnerName']
non_bcc_df['contact_street'] = bcc_removed['premiseStreetAddress']
non_bcc_df['contact_city'] = bcc_removed['premiseCity']
non_bcc_df['contact_county'] = bcc_removed['premiseCounty']
non_bcc_df['contact_state'] = 'CA'
non_bcc_df['contact_zip'] = ''

In [None]:
non_bcc_df

### Combine BCC and Non BCC

In [None]:
# This gives us the "df_all" CSV used in other sheets, so we can mostly copy over 

df_all = bcc_df.append(non_bcc_df)

In [None]:
df_all

In [None]:
df_all.adult_medicinal.value_counts()

#### Change Data

In [None]:
# # This changes the License Category section from initials to names

# df_all.license_category.replace('Bureau of Cannabis Control (BCC)', 'Bureau of Cannabis Control', inplace=True)
# df_all.license_category.replace('CalCannabis Cultivation Licensing (CCL)', 'Cannabis Cultivation License', inplace=True)
# df_all.license_category.replace('Manufactured Cannabis Safety Branch (MCSB)', 'Manufactured Cannabis License', inplace=True)

In [None]:
# This creates columns for Annual and Procisional type licenses by extracting from license_term

df_all['license_annual'] = df_all.license_term.replace('Annual', '1').replace('Provisional', '0')
df_all['license_provisional'] = df_all.license_term.replace('Provisional', '1').replace('Annual', '0')

In [None]:
# This creates columns for Medical and Adult Use type licenses by extracting from adult_medical

df_all['license_adult'] = df_all.adult_medicinal.str.contains('Adult').astype(int)
df_all['license_medical'] = df_all.adult_medicinal.str.contains('Med').astype(int)

In [None]:
# This removes the unnecessary information from the license_description column

# idk why inplace=True didn't work, but w/e, this'll do
df_all.license_description = df_all.license_description.str.replace("Commercial -  ", "")
df_all.license_description = df_all.license_description.str.replace("Cultivation -  ", "")
df_all.license_description = df_all.license_description.str.replace("Manufacturer - ", "")

# This is just a minor formating thing
df_all.license_description = df_all.license_description.str.replace(" - ", "-")

In [None]:
# This creates columns for each status type by creating boolian values and converting them to int

df_all['status_active'] = (df_all.license_status == 'Active').astype(int)
df_all['status_canceled'] = (df_all.license_status == 'Canceled').astype(int)
df_all['status_expired'] = (df_all.license_status == 'Expired').astype(int)
df_all['status_inactive'] = (df_all.license_status == 'Inactive').astype(int)
df_all['status_revoked'] = (df_all.license_status == 'Revoked').astype(int)
df_all['status_surrendered'] = (df_all.license_status == 'Surrendered').astype(int)
df_all['status_suspended'] = (df_all.license_status == 'Suspended').astype(int)

In [None]:
# This changes the date data to the correct datetime format
# It looks like the wrong format in Pandas, but when converted to csv it is in the correct format

df_all['date_issue'] = pd.to_datetime(df_all["date_issue"])
df_all['date_expiration'] = pd.to_datetime(df_all["date_expiration"])

In [None]:
# This may not be necessary, but makes the code a bit cleaner

df_all['business_description'] = df_all['business_description'].replace(to_replace='Data Not Available', value='')

In [None]:
# This changes the contact_phone into a number instead of a string

for n in range(len(df_all)):
    df_all['contact_phone'].iloc[n] = ''.join(re.findall(r'\d+', str(df_all['contact_phone'].iloc[n])))

In [None]:
df_all.business_owner.value_counts()

In [None]:
df_all.business_owner.iloc[-5].split(', ')

In [None]:
# This creates a list of the lists of business owners

list_of_business_owner = []

for n in range(len(df_all)):
    list_of_business_owner.append(df_all.business_owner.iloc[n].split(', '))

In [None]:
# This creates 2 seperate lists for contact owners 1 and 2, when there are multiple

owner_1 = []
owner_2 = []

for n in range(len(list_of_business_owner)):
    owner_1.append(list_of_business_owner[n][0])
    if len(list_of_business_owner[n]) == 1:
        owner_2.append('None')
    else:
        owner_2.append(list_of_business_owner[n][1])

In [None]:
# This converts the lists we just created into columns for the dataframe

df_all['contact_owner_1'] = owner_1
df_all['contact_owner_2'] = owner_2

#### Add Columns

In [None]:
# This creates the name_legal_clean column by using the name_legal column as a starting point and cleaning from there

df_all['name_legal_clean'] = df_all['name_legal']

df_all['name_legal_clean'] = df_all['name_legal_clean'].str.lower() 
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\b, inc.\b", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\binc.\b", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\binc\b", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\b, llc.\b", "") 
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\b, llc\b", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\b, l.l.c.\b", "") 
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\bllc\b", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\bcorp.\b", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\bcorp\b", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\bcorporation\b", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\bco.\b", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(r"\bco\b", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(",", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace(".", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.replace("'", "")
df_all['name_legal_clean'] = df_all['name_legal_clean'].str.strip()

df_all['is_current'] = 1

In [None]:
# Let's just give this a glance

df_all[['name_legal', 'name_legal_clean']].head()

In [None]:
# It seems that a few of the websittes given are actually addresses
# Ugh

df_all[df_all['contact_website'].str.contains('Street')]

## Add Data From SQL

In [None]:
# This reads in the 3 old SQL tables that we're working with

ca_main_old = pd.read_sql("SELECT * FROM ca_main", engine_con)
ca_roll_old = pd.read_sql("SELECT * FROM ca_roll", engine_con)
ca_contact_old = pd.read_sql("SELECT * FROM ca_contact", engine_con)

In [None]:
# Will probably change this to just one of the 3, but I need to see which of them matter

ca_main_old['in_main'] = 1
ca_roll_old['in_roll'] = 1
ca_contact_old['in_contact'] = 1

In [None]:
old_main = list(ca_main_old['license_number'].unique())

In [None]:
new_main = list(df_all['license_number'].unique())

In [None]:
len(old_main)

In [None]:
len(new_main)

In [None]:
for lic in old_main:
    if lic not in new_main:
        print(lic)

In [None]:
# Since the California database not longer has this info, we'll just use this code to help add it back in
bcc_license_types = list(ca_main_old[ca_main_old['license_category'] == 'Bureau of Cannabis Control']['license_description'].unique())
ccl_license_types = list(ca_main_old[ca_main_old['license_category'] == 'Cannabis Cultivation License']['license_description'].unique())
mcl_license_types = list(ca_main_old[ca_main_old['license_category'] == 'Manufactured Cannabis License']['license_description'].unique())

df_all['license_category'] = ''

bcc_all = df_all[df_all['license_description'].isin(bcc_license_types)]
ccl_all = df_all[df_all['license_description'].isin(ccl_license_types)]
mcl_all = df_all[df_all['license_description'].isin(mcl_license_types)]
# the_rest = df_all[~df_all['license_description'].isin(bcc_license_types.append(ccl_license_types).append(mcl_license_types))]

bcc_all['license_category'] = 'Bureau of Cannabis Control'
ccl_all['license_category'] = 'Cannabis Cultivation License'
mcl_all['license_category'] = 'Manufactured Cannabis License'


df_with_category = bcc_all.append(ccl_all).append(mcl_all)


# for n in range(len(df_all)):
#     if df_all['license_description'][n] in bcc_license_types:
#         df_all['license_category'][n] = 'Bureau of Cannabis Control'
#     elif df_all['license_description'][n] in ccl_license_types:
#         df_all['license_category'][n] = 'Cannabis Cultivation License'
#     elif df_all['license_description'][n] in mcl_license_types:
#         df_all['license_category'][n] = 'Manufactured Cannabis License'
#     else:
#         df_all['license_category'][n] = ''

In [None]:
bcc_license_types

In [None]:
if len(df_all) == len(df_with_category):
    df_all = df_with_category
else:
    print(error)

In [None]:
df_all.columns

In [None]:
ca_main_old.columns

### From CA Roll

In [None]:
# This adds the roll_up_id, company_roll_up, and in_roll columns

df_with_roll = df_all.merge(ca_roll_old, on='license_number', suffixes=('', '_sql'), how='left')

In [None]:
df_with_roll.in_roll.value_counts()

In [None]:
# This fills the in_roll columns

df_with_roll['in_roll'] = df_with_roll.in_roll.fillna(0).astype(int)

In [None]:
df_with_roll.in_roll.value_counts()

In [None]:
df_with_roll[df_with_roll.in_roll == 0]

### CA Contact

In [None]:
# Don't know if this matters, but the numbers were coiming up weird

ca_contact_old.contact_phone = ca_contact_old.contact_phone.astype(str)

for n in range(len(ca_contact_old)):
    ca_contact_old.contact_phone[n] = ca_contact_old.contact_phone[n][:10]
    
ca_contact_old.contact_phone.tail()

In [None]:
# This adds the needed info from the ca_contact database in SQL

df_with_contact = df_with_roll.merge(ca_contact_old[['license_number', 'contact_website', 'contact_owner_1', 'contact_owner_2', 'in_contact']], suffixes=('', '_sql'), on='license_number', how='left')

In [None]:
df_with_contact.in_contact.value_counts()

In [None]:
# This fills the in_contact columns

df_with_contact['in_contact'] = df_with_contact.in_contact.fillna(0).astype(int)

In [None]:
df_with_contact.in_contact.value_counts()

In [None]:
df_with_contact.in_roll.value_counts()

In [None]:
# df_with_contact.columns

In [None]:
df_with_contact

### CA Main

In [None]:
# This adds the needed columns from ca_main 

df_with_main = df_with_contact.merge(ca_main_old[['license_number', 'date_uploaded', 'in_main', 'is_current']], suffixes=('', '_sql'), on='license_number', how='left')

In [None]:
df_with_main.head()

In [None]:
# This will handle the is_current

current_licenses = df_all['license_number'].unique()

for n in range(len(df_with_main)):
    if df_with_main['license_number'][n] not in current_licenses:
        df_with_main['is_current'][n] = 0
    else:
        df_with_main['is_current'][n] = 1

df_with_main['is_current'] = df_with_main['is_current'].astype(int)

In [None]:
df_with_main.in_main.value_counts()

In [None]:
# This fills the 

df_with_main['in_main'] = df_with_main.in_main.fillna(0).astype(int)

In [None]:
df_with_main.in_main.value_counts()

In [None]:
df_with_main.in_contact.value_counts()

In [None]:
df_with_main.in_roll.value_counts()

In [None]:
df_with_main[df_with_main.license_number == 'CCL20-0000338']

In [None]:
df_with_main.head()

## Converting File to df_to_edit Format

In [None]:
df_with_sql = df_with_main

In [None]:
# Not fully sure what this is for, but I'm following the steps in the orignial file

df_with_sql['changed_contact_info'] = 0
df_with_sql['in_db'] = 0

In [None]:
# Just wanna check that theses are the same

differences = (df_with_sql['in_roll'] == df_with_sql['in_contact']).astype(int).sum() - len(df_with_sql)

if differences == 0:
    print(differences)
else:
    print(error)

In [None]:
# This adds the in_sql column by picking the max of in_roll and in_contact
# They are usually the same,  but this in just in case

df_with_sql['in_sql'] = df_with_sql['in_roll']

df_with_sql.in_sql.value_counts()

### Fixing the Website Columns

In [None]:
# I hate how many types of "None" there are and I'm fixing this

def clean_nones(column, none):
    column.replace(np.nan, none, inplace=True)
    column.replace('', none, inplace=True)
    column.replace('None', none, inplace=True)
    column.replace('none', none, inplace=True)
    column.replace('N/A', none, inplace=True)
    column.replace('n/a', none, inplace=True)
    column.replace('0', none, inplace=True)
    column.replace('Data Not Found', none, inplace=True)

In [None]:
# And then we run this for the two website columns so we can actually work with them

clean_nones(df_with_sql['contact_website'], 'None')
clean_nones(df_with_sql['contact_website_sql'], 'None')

In [None]:
for n in range(len(df_with_sql)):
    if df_with_sql['contact_website_sql'].iloc[n] == 'None':
        if '.co' in df_with_sql['contact_website'].iloc[n]:
            df_with_sql['contact_website_sql'].iloc[n] = df_with_sql['contact_website'].iloc[n]
        elif '.net' in df_with_sql['contact_website'].iloc[n]:
            df_with_sql['contact_website_sql'].iloc[n] = df_with_sql['contact_website'].iloc[n]
        elif '.org' in df_with_sql['contact_website'].iloc[n]:
            df_with_sql['contact_website_sql'].iloc[n] = df_with_sql['contact_website'].iloc[n]

In [None]:
df_with_sql[df_with_sql['contact_website'] != df_with_sql['contact_website_sql']][['contact_website', 'contact_website_sql']]

In [None]:
df_with_sql['contact_website'].value_counts()

In [None]:
df_with_sql['contact_website_sql'].value_counts()

In [None]:
df_with_sql['license_term'].value_counts()

### Creating the to_edit DataFrame

In [None]:
# First let's create the starter DataFrame

df_to_edit = pd.DataFrame()

In [None]:
# This is easier than removing all the unnneded columns manually

df_to_edit['license_number'] = df_with_sql['license_number']
df_to_edit['license_category'] = df_with_sql['license_category']
df_to_edit['license_description'] = df_with_sql['license_description']
df_to_edit['license_adult'] = df_with_sql['license_adult']
df_to_edit['license_medicinal'] = df_with_sql['license_medical']
df_to_edit['license_annual'] = df_with_sql['license_annual']
df_to_edit['license_provisional'] = df_with_sql['license_provisional']

df_to_edit['name_legal'] = df_with_sql['name_legal']
df_to_edit['name_dba'] = df_with_sql['name_dba']
df_to_edit['name_legal_clean'] = df_with_sql['name_legal_clean']
df_to_edit['roll_up_id'] = df_with_sql['roll_up_id']
df_to_edit['company_roll_up'] = df_with_sql['company_roll_up']
df_to_edit['date_issue'] = df_with_sql['date_issue']
df_to_edit['date_expiration'] = df_with_sql['date_expiration']

df_to_edit['status_active'] = df_with_sql['status_active']
df_to_edit['status_canceled'] = df_with_sql['status_canceled']
df_to_edit['status_expired'] = df_with_sql['status_expired']
df_to_edit['status_inactive'] = df_with_sql['status_inactive']
df_to_edit['status_revoked'] = df_with_sql['status_revoked']
df_to_edit['status_surrendered'] = df_with_sql['status_surrendered']
df_to_edit['status_suspended'] = df_with_sql['status_suspended']

df_to_edit['business_description'] = df_with_sql['business_description']
df_to_edit['business_company_type'] = df_with_sql['business_structure']

df_to_edit['contact_email'] = df_with_sql['contact_email']
df_to_edit['contact_phone'] = df_with_sql['contact_phone']
df_to_edit['contact_website'] = df_with_sql['contact_website_sql']
df_to_edit['contact_owner_1'] = df_with_sql['contact_owner_1']
df_to_edit['contact_owner_2'] = df_with_sql['contact_owner_2']
df_to_edit['contact_street'] = df_with_sql['contact_street']
df_to_edit['contact_city'] = df_with_sql['contact_city']
df_to_edit['contact_county'] = df_with_sql['contact_county']
df_to_edit['contact_state'] = df_with_sql['contact_state']
df_to_edit['contact_zip'] = df_with_sql['contact_zip']
df_to_edit['changed_contact_info'] = df_with_sql['changed_contact_info']

df_to_edit['date_uploaded'] = df_with_sql['date_uploaded']

df_to_edit['in_db'] = df_with_sql['in_main']
df_to_edit['in_sql'] = df_with_sql['in_sql']
df_to_edit['is_current'] = df_with_sql['is_current']

In [None]:
df_with_sql.in_sql.value_counts()

In [None]:
df_to_edit.in_db.value_counts()

In [None]:
df_to_edit.sort_values(by='date_issue', ascending=False).head()

In [None]:
# Sorting by name_legal_clean

df_to_edit.sort_values(by='name_legal_clean', inplace=True)

In [None]:
df_to_edit.reset_index(inplace=True)

In [None]:
df_to_edit.drop(axis='columns', labels='index', inplace=True)

In [None]:
df_to_edit.head()

In [None]:
df_to_edit.isnull().sum()

In [None]:
df_to_edit.head()

In [None]:
df_to_edit.shape

In [None]:
df_to_edit.drop_duplicates().shape

In [None]:
# Apparently there are a lot of duplicates

df_to_edit = df_to_edit.drop_duplicates()

## Convert to CSV and Edit

In [None]:
# # Date should change to Date entered at the begining of the notebook

df_to_edit.to_csv(f'edited_files/df_with_api_final_{current_year}_{current_month}_{current_day}_to_edit.csv', index=False)

# TASK LEFT BEFORE CONTINUING

In [None]:
# Make sure that the correct files are being read in, i.e. SQL instead of New Read ins or Visa Versa

##### Make Changes to the company_roll_up File in the CSV Before Reuploading

In [None]:
# This checks that the changes were made and prevets read in if they were not
if pd.read_csv(f'edited_files/df_with_api_final_{current_year}_{current_month}_{current_day}_to_edit.csv')['company_roll_up'].isnull().sum() > 0:
    print(error)
else:
    df_edited = pd.read_csv(f'edited_files/df_with_api_final_{current_year}_{current_month}_{current_day}_to_edit.csv', index_col=False)

In [None]:
df_edited.head()

In [None]:
# # This removes the Unnamed: 0 Column

# df_edited = df_edited.drop(axis='columns', labels='Unnamed: 0')

In [None]:
# This splits the edited dataframe into two seperate dataframes to work with

df_edited_fill = df_edited.dropna(subset=['roll_up_id'])
df_edited_null = df_edited[df_edited.roll_up_id.isnull()]

In [None]:
# This makes a list of the company_roll_ups of the licenses being added

names_missing = df_edited_null['company_roll_up'].unique()

In [None]:
names_missing

In [None]:
df_edited_null.head()

### Makes Changes to df_edited_null

In [None]:
# Making this list lets up group up new additions with the same company_roll_up name so they can get the id number

new_roll_ups = df_edited_null.company_roll_up.unique()
new_roll_ups

In [None]:
# This adds the new roll_up_ids

# Since we've started making the roll up ids start with CA-, this code removes them to make the value numeric
num_roll_list= []
for roll in df_edited_fill.roll_up_id:
    num_roll_list.append(int(roll[3:]))
    
max_roll = max(num_roll_list) + 1

for new in new_roll_ups:
    df_edited_null['roll_up_id'][df_edited_null['company_roll_up'] == new] = max_roll
    max_roll += 1
    
# This adds back on the CA- and the number of 0s needed back onto the roll up ids
roll_with_ca = []
for roll in df_edited_null['roll_up_id']:
    while len(str(roll)) < 5:
        roll = '0' + str(roll)
    roll = 'CA-' + roll
    roll_with_ca.append(roll)
df_edited_null['roll_up_id'] = roll_with_ca

In [None]:
df_edited_null.sort_values(by='roll_up_id')['roll_up_id']

In [None]:
# This reconnects the 
df_edited_all = df_edited_null.append(df_edited_fill)
df_edited_all = df_edited_all.sort_index()

In [None]:
df_edited_all['license_annual'].sum()

### New SQL Files

In [None]:
# This creates the dataframes for the new additions to the ca_roll and ca_contact databases

ca_roll_new = df_edited_all[['license_number', 'roll_up_id', 'company_roll_up']].reset_index().drop(columns=['index'])

ca_contact_new = df_edited_all[['license_number', 'contact_email', 'contact_phone', 'contact_website',
                                 'contact_owner_1', 'contact_owner_2', 'contact_street', 'contact_city',
                                 'contact_county', 'contact_state', 'contact_zip']].reset_index().drop(columns=['index'])

ca_main_new = df_edited_all[['license_number', 'license_category', 'license_description', 'license_adult', 'license_medicinal',
                              'license_annual', 'license_provisional', 'name_legal', 'name_dba', 'date_issue', 'date_expiration', 
                              'status_active', 'status_canceled', 'status_expired', 'status_inactive', 'status_revoked', 'status_surrendered', 'status_suspended',
                              'business_description', 'business_company_type', 'date_uploaded', 'in_db', 'is_current']]

In [None]:
df_edited_null.columns

#### Subtract the Old Data

In [None]:
# Let's read back in the SQL databse for ca_roll so we can add to it

sql_ca_roll = pd.read_sql("SELECT * FROM ca_cannabis.dbo.ca_roll", engine_con)

In [None]:
# This makes the lists of unique licenses in sql and new dataframe so we can find the uniques

sql_lic_list = list(sql_ca_roll.license_number.unique())
df_lic_list = list(df_edited_all.license_number.unique())

new_lic_list = [x for x in df_lic_list if x not in sql_lic_list]

In [None]:
# This creates a single column dataframe of all of the new licenses so we can join the other dataframes to it
new_lic_df = pd.DataFrame(new_lic_list, columns={'license_number'})

In [None]:
# And this creates the new dataframes for sending to sql

ca_contact_new = new_lic_df.merge(ca_contact_new, on='license_number', how='left')
ca_main_new = new_lic_df.merge(ca_main_new, on='license_number', how='left')
ca_roll_new = new_lic_df.merge(ca_roll_new, on='license_number', how='left')

In [None]:
# This adds the date to the date_uploaded columns
ca_main_new['date_uploaded'] = current_date

## Upload to SQL

### CA Roll

In [None]:
ca_roll_new.to_sql('ca_roll', con=engine_con, if_exists='append', index=False)

In [None]:
ca_roll_new

### CA Contact

In [None]:
ca_contact_new.head()

In [None]:
# # This changes the format of the phone number so that contact_phone can be changed to a float, and then changes it

# for n in range(len(ca_contact_new)):
#     ca_contact_new['contact_phone'].iloc[n] = ''.join(re.findall(r'\d+', str(ca_contact_new['contact_phone'].iloc[n])))
    
# ca_contact_new['contact_phone'] = ca_contact_new['contact_phone'].astype(float)

In [None]:
# This changes the format of the zip code so that contact_zip can be changed to a float, and then changes it

for n in range(len(ca_contact_new)):
    if len(re.findall(r'\d+', str(ca_contact_new['contact_zip'].iloc[n]))) == 0:
        ca_contact_new['contact_zip'].iloc[n] = 0
    else:
        ca_contact_new['contact_zip'].iloc[n] = ''.join(re.findall(r'\d+', str(ca_contact_new['contact_zip'].iloc[n])))
        
ca_contact_new['contact_zip'] = ca_contact_new['contact_zip'].astype(float)
ca_contact_new['contact_zip'] = ca_contact_new['contact_zip'].astype(int)

In [None]:
# This is just so I can test if the command will change the old info
# ca_contact_new['new_contact'] = 1

In [None]:
# This sends the file to the correct location, which is currently the test file
ca_contact_new.to_sql('ca_contact', con=engine_con, if_exists='append', index=False)

In [None]:
ca_contact_new

### CA Main

In [None]:
# I guess these weren't in datetime format
ca_main_new.date_expiration = pd.to_datetime(ca_main_new.date_expiration)
ca_main_new.date_issue = pd.to_datetime(ca_main_new.date_issue)
ca_main_new.date_uploaded = pd.to_datetime(ca_main_new.date_uploaded)

In [None]:
ca_main_new.dtypes

In [None]:
ca_main_new.to_sql('ca_main', con=engine_con, if_exists='append', index=False)

In [None]:
ca_main_new

### Create main_alter

In [None]:
sql_main = pd.read_sql("SELECT * FROM ca_main", engine_con)

In [None]:
sql_main.columns

In [None]:
df_main = df_edited[['license_number', 'license_category', 'license_description',
       'license_adult', 'license_medicinal', 'license_annual',
       'license_provisional', 'name_legal', 'name_dba', 'date_issue',
       'date_expiration', 'status_active', 'status_canceled', 'status_expired',
       'status_revoked', 'status_surrendered', 'status_suspended', 'status_inactive',
       'business_description', 'business_company_type', 'date_uploaded',
       'in_db', 'is_current']]

In [None]:
sql_main.dtypes

In [None]:
df_main.dtypes

In [None]:
# This changes all the column types to match the sql column types

# df_main.license_number.astype(object)
# df_main.license_category.astype(objec)
# df_main.license_description.astype(object)
df_main.license_adult = df_main.license_adult.astype(float)
df_main.license_medicinal = df_main.license_medicinal.astype(float)
df_main.license_annual = df_main.license_annual.astype(float)
df_main.license_provisional = df_main.license_provisional.astype(float)
# df_main.name_legal.astype(object)
# df_main.name_dba.astype(object)
df_main.date_issue = df_main.date_issue = pd.to_datetime(df_main.date_issue)
df_main.date_expiration = df_main.date_expiration = pd.to_datetime(df_main.date_expiration)
df_main.status_active = df_main.status_active.astype(float)
df_main.status_canceled = df_main.status_canceled.astype(float)
df_main.status_expired = df_main.status_expired.astype(float)
df_main.status_revoked = df_main.status_revoked.astype(float)
df_main.status_surrendered = df_main.status_surrendered.astype(float)
df_main.status_suspended = df_main.status_suspended.astype(float)
# df_main.business_description.astype(object)
# df_main.business_company_type.astype(object)
df_main.date_uploaded = pd.to_datetime(df_main.date_uploaded)
df_main.in_db = df_main.in_db.astype(float)

In [None]:
ca_main_old = pd.read_sql("SELECT * FROM ca_main", engine_con)

In [None]:
# This creates a list of all of the license_numbers of rows that belong in the main_alter csv

to_alter = []

for lic in df_main['license_number']:
    if list(df_main[df_main['license_number'] == lic]['license_description'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['license_description'])[0]:
        to_alter.append(lic)
        print(lic, 'description')
    elif list(df_main[df_main['license_number'] == lic]['license_category'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['license_category'])[0]:
        to_alter.append(lic)
        print(lic, 'category')
    elif list(df_main[df_main['license_number'] == lic]['license_adult'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['license_adult'])[0]:
        to_alter.append(lic)
        print(lic, 'adult')
    elif list(df_main[df_main['license_number'] == lic]['license_medicinal'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['license_medicinal'])[0]:
        to_alter.append(lic)
        print(lic, 'medical')
    elif list(df_main[df_main['license_number'] == lic]['license_annual'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['license_annual'])[0]:
        to_alter.append(lic)
        print(lic, 'annual')
    elif list(df_main[df_main['license_number'] == lic]['license_provisional'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['license_provisional'])[0]:
        to_alter.append(lic)
        print(lic, 'provisional')
    elif list(df_main[df_main['license_number'] == lic]['status_active'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['status_active'])[0]:
        to_alter.append(lic)
        print(lic, 'active')
    elif list(df_main[df_main['license_number'] == lic]['status_canceled'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['status_canceled'])[0]:
        to_alter.append(lic)
        print(lic, 'canceled')
    elif list(df_main[df_main['license_number'] == lic]['status_expired'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['status_expired'])[0]:
        to_alter.append(lic)
        print(lic, 'expired')
    elif list(df_main[df_main['license_number'] == lic]['status_revoked'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['status_revoked'])[0]:
        to_alter.append(lic)
        print(lic, 'revoked')
    elif list(df_main[df_main['license_number'] == lic]['status_surrendered'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['status_surrendered'])[0]:
        to_alter.append(lic)
        print(lic, 'surrendrered')
    elif list(df_main[df_main['license_number'] == lic]['status_suspended'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['status_suspended'])[0]:
        to_alter.append(lic)
        print(lic, 'suspended')
    elif list(df_main[df_main['license_number'] == lic]['status_inactive'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['status_inactive'])[0]:
        to_alter.append(lic)
        print(lic, 'inactive')
    elif list(df_main[df_main['license_number'] == lic]['date_expiration'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['date_expiration'])[0]:
        to_alter.append(lic)
        print(lic, 'date_expiration')
    elif list(df_main[df_main['license_number'] == lic]['is_current'])[0] != list(ca_main_old[ca_main_old['license_number'] == lic]['is_current'])[0]:
        to_alter.append(lic)
        print(lic, 'is_current')
    
    

# for lic in df_main['license_number']:
#     if lic not in list(sql_main['license_number']):
#         to_alter.append(lic)
#     elif lic in list(sql_main['license_number']):
#         if False in (sql_main[sql_main['license_number'] == lic].values == df_main[df_main['license_number'] == lic].values):
#             to_alter.append(lic)
#         else:
#             print(sql_main[sql_main['license_number'] == lic].values == df_main[df_main['license_number'] == lic].values)

In [None]:
df_for_alter = df_main[df_main['license_number'].isin(to_alter)]
len(df_for_alter)
df_for_alter.shape

In [None]:
print(stop)

In [None]:
# This replaces the ca_main_alter table 
df_for_alter.to_sql('ca_main_alter', con=engine_con, if_exists='replace', index=False)

In [None]:
# This code runs the query noramally run in SQL to update ca_main using ca_main_alter

ca_conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};'
                      'SERVER=bespoke-database-1.cmevrozrcs7c.us-west-2.rds.amazonaws.com;'
                      'DATABASE=ca_cannabis;'
                      'UID=admin;'
                      'PWD=N19lrqxnurTUJLJT6GFe;')

ca_cursor = ca_conn.cursor()

ca_cursor.execute('SELECT * FROM ca_cannabis.dbo.ca_main')

ca_cursor.execute('''
Update t1
SET t1.[license_category] = t2.[license_category],
    t1.[license_description] = t2.[license_description],
    
    t1.[license_adult] = t2.[license_adult],
    t1.[license_medicinal] = t2.[license_medicinal],
    t1.[license_annual] = t2.[license_annual],
    t1.[license_provisional] = t2.[license_provisional],
    
    t1.[date_expiration] = t2.[date_expiration],
    
    t1.[status_active] = t2.[status_active],
    t1.[status_canceled] = t2.[status_canceled],
    t1.[status_expired] = t2.[status_expired],
    t1.[status_revoked] = t2.[status_revoked],
    t1.[status_surrendered] = t2.[status_surrendered],
    t1.[status_suspended] = t2.[status_suspended],
    t1.[status_inactive] = t2.[status_inactive],
    
    t1.[is_current] = t2.[is_current]
    
FROM [ca_cannabis].[dbo].[ca_main] as t1
INNER JOIN [ca_cannabis].[dbo].[ca_main_alter] as t2
ON t1.[license_number] = t2.[license_number];
                  ''')

ca_conn.commit()

### Add to all_main

In [None]:
# This creates the connection to the all_main file in SQL

all_cannabis_params = urllib.parse.quote_plus("DRIVER={ODBC Driver 17 for SQL Server};"
                                 "SERVER=bespoke-database-1.cmevrozrcs7c.us-west-2.rds.amazonaws.com;"
                                 "DATABASE=all_cannabis;"
                                 "UID=admin;"
                                 "PWD=N19lrqxnurTUJLJT6GFe")
all_cannabis_con = create_engine("mssql+pyodbc:///?odbc_connect={}".format(all_cannabis_params))

In [None]:
# This reads in the all_main data and narrows it down to the relevent data
all_main_sql = pd.read_sql('all_main', con=all_cannabis_con)
ca_main_sql = all_main_sql[all_main_sql['us_state'] == 'CA']
ca_main_sql = ca_main_sql.reset_index().drop(columns='index')

In [None]:
# This re-reads in and merges the ca_annabis data in SQL
updated_ca_main = pd.read_sql('ca_main', con=engine_con)
updated_ca_roll = pd.read_sql('ca_roll', con=engine_con)
updated_ca_cannabis = updated_ca_main.merge(updated_ca_roll, on='license_number')

In [None]:
updated_ca_cannabis['license_status'] = ''
for n in range(len(updated_ca_cannabis)):
    if updated_ca_cannabis['status_active'][n] == 1:
        updated_ca_cannabis['license_status'][n] = 'Active'
    if updated_ca_cannabis['status_canceled'][n] == 1:
        updated_ca_cannabis['license_status'][n] = 'Canceled'
    if updated_ca_cannabis['status_expired'][n] == 1:
        updated_ca_cannabis['license_status'][n] = 'Expired'
    if updated_ca_cannabis['status_revoked'][n] == 1:
        updated_ca_cannabis['license_status'][n] = 'Revoked'
    if updated_ca_cannabis['status_surrendered'][n] == 1:
        updated_ca_cannabis['license_status'][n] = 'Surrendered'
    if updated_ca_cannabis['status_suspended'][n] == 1:
        updated_ca_cannabis['license_status'][n] = 'Suspended'
    if updated_ca_cannabis['status_inactive'][n] == 1:
        updated_ca_cannabis['license_status'][n] = 'Inactive'

In [None]:
ca_main_sql.head()

In [None]:
# This reformats the data for the all_main
new_all_main = updated_ca_cannabis[['license_number', 'roll_up_id', 'name_legal', 'license_status', 'is_current', 'license_description', 'date_expiration']]
new_all_main['state'] = 'CA'
new_all_main['license_sub_category'] = ''
for n in range(len(new_all_main)):
    if new_all_main['license_description'][n] in ['Specialty Cottage Indoor', 'Specialty Cottage Outdoor',
                        'Medium Outdoor', 'Medium Indoor', 'Small Outdoor', 'Small Indoor', 'Specialty Outdoor',
                        'Small Mixed-Light Tier 1', 'Specialty Cottage Mixed-Light Tier 1','Medium Mixed-Light Tier 1', 
                        'Small Mixed-Light Tier 2', 'Specialty Cottage Mixed-Light Tier 2', 'Medium Mixed-Light Tier 2',
                        'Specialty Mixed-Light Tier 1', 'Specialty Mixed-Light Tier 2', 'Specialty Outdoor', 'Specialty Indoor',
                        'Nursery']:
        new_all_main['license_sub_category'][n] = 'Cultivator'
    
    if new_all_main['license_description'][n] in ['Retailer', 'Retailer Nonstorefront', 'Retailer-Non-Storefront']:
        new_all_main['license_sub_category'][n] = 'Retailer'
    
    if new_all_main['license_description'][n] in ['Type 6', 'Type 7', 'Type S', 'Type N', 'Type P', 'Processor']:
        new_all_main['license_sub_category'][n] = 'Manufacturer'
    
    if new_all_main['license_description'][n] in ['Distributor', 'Medical Transporter', 'Retail Transporter',
                                                   'Medical Delivery', 'Retail Delivery', 'Distributor-Transport Only', ]:
        new_all_main['license_sub_category'][n] = 'Distributor'
    
    if new_all_main['license_description'][n] in ['Testing Laboratory']:
        new_all_main['license_sub_category'][n] = 'Laboratory'
    
    if new_all_main['license_description'][n] in ['Medical Research Development Cultivation', 'Medical Operator',
                                                  'Retail Operator', 'Medical Hospitality', 'Event Organizer']:
        new_all_main['license_sub_category'][n] = 'Other'
    
    if new_all_main['license_description'][n] in ['Microbusiness']:
        new_all_main['license_sub_category'][n] = 'Multiple'

In [None]:
# This takes out everything already there
new_main_for_sql = new_all_main[~new_all_main['license_number'].isin(list(ca_main_sql['license_number'].unique()))]

In [None]:
# Renaming the data, since we changed the column names
new_main_for_sql.rename(columns={
    'license_number':'license_number',
    'roll_up_id':'rollup_id',
    'name_legal':'legal_name',
    'license_status':'license_status',
    'is_current':'is_current',
    'license_description':'license_description',
    'state':'us_state',
    'license_sub_category':'license_category',
    'date_expiration':'expiration_date'
})

In [None]:
new_main_for_sql = new_main_for_sql.rename(columns={
    'roll_up_id': 'rollup_id',
    'license_sub_category': 'license_category',
    'state': 'us_state',
    'name_legal': 'legal_name',
    'date_expiration': 'expiration_date'
})

In [None]:
new_main_for_sql

In [None]:
# And this uploads the new licenses to SQL
new_main_for_sql.to_sql('all_main', con=all_cannabis_con, index=False, if_exists='append')

### Alter the CA data in all_main

In [None]:
# This re-reads in the California database to check for any need to alter
ca_main_sql_for_alter = pd.read_sql('ca_main', con=engine_con)
ca_roll_sql_for_alter = pd.read_sql('ca_roll', con=engine_con)
ca_sql_data = ca_main_sql_for_alter.merge(ca_roll_sql_for_alter, on='license_number')

In [None]:
# This reads in the all_main database and narrows in down to California licenses to do the same
all_main_sql_for_alter = pd.read_sql('all_main', con=all_cannabis_con)
ca_main_sql_for_alter = all_main_sql_for_alter[all_main_sql_for_alter['us_state'] == 'CA']
ca_main_sql_for_alter = ca_main_sql_for_alter.reset_index().drop(columns='index')

In [None]:
ca_sql_data

In [None]:
ca_sql_data

In [None]:
ca_sql_data['license_status'] = ''
for n in range(len(updated_ca_cannabis)):
    if ca_sql_data['status_active'][n] == 1:
        ca_sql_data['license_status'][n] = 'Active'
    if ca_sql_data['status_canceled'][n] == 1:
        ca_sql_data['license_status'][n] = 'Canceled'
    if ca_sql_data['status_expired'][n] == 1:
        ca_sql_data['license_status'][n] = 'Expired'
    if ca_sql_data['status_revoked'][n] == 1:
        ca_sql_data['license_status'][n] = 'Revoked'
    if ca_sql_data['status_surrendered'][n] == 1:
        ca_sql_data['license_status'][n] = 'Surrendered'
    if ca_sql_data['status_suspended'][n] == 1:
        ca_sql_data['license_status'][n] = 'Suspended'
    if ca_sql_data['status_inactive'][n] == 1:
        ca_sql_data['license_status'][n] = 'Inactive'

In [None]:
ca_sql_data['license_sub_category'] = ''
for n in range(len(ca_sql_data)):
    if ca_sql_data['license_description'][n] in ['Specialty Cottage Indoor', 'Specialty Cottage Outdoor',
                        'Medium Outdoor', 'Medium Indoor', 'Small Outdoor', 'Small Indoor', 'Specialty Outdoor',
                        'Small Mixed-Light Tier 1', 'Specialty Cottage Mixed-Light Tier 1','Medium Mixed-Light Tier 1', 
                        'Small Mixed-Light Tier 2', 'Specialty Cottage Mixed-Light Tier 2', 'Medium Mixed-Light Tier 2',
                        'Specialty Mixed-Light Tier 1', 'Specialty Mixed-Light Tier 2', 'Specialty Outdoor', 'Specialty Indoor',
                        'Nursery']:
        ca_sql_data['license_sub_category'][n] = 'Cultivator'
    
    if ca_sql_data['license_description'][n] in ['Retailer', 'Retailer Nonstorefront', 'Retailer-Non-Storefront']:
        ca_sql_data['license_sub_category'][n] = 'Retailer'
    
    if ca_sql_data['license_description'][n] in ['Type 6', 'Type 7', 'Type S', 'Type N', 'Type P', 'Processor']:
        ca_sql_data['license_sub_category'][n] = 'Manufacturer'
    
    if ca_sql_data['license_description'][n] in ['Distributor', 'Medical Transporter', 'Retail Transporter',
                                                   'Medical Delivery', 'Retail Delivery', 'Distributor-Transport Only', ]:
        ca_sql_data['license_sub_category'][n] = 'Distributor'
    
    if ca_sql_data['license_description'][n] in ['Testing Laboratory']:
        ca_sql_data['license_sub_category'][n] = 'Laboratory'
    
    if ca_sql_data['license_description'][n] in ['Medical Research Development Cultivation', 'Medical Operator',
                                                  'Retail Operator', 'Medical Hospitality', 'Event Organizer']:
        ca_sql_data['license_sub_category'][n] = 'Other'
    
    if ca_sql_data['license_description'][n] in ['Microbusiness']:
        ca_sql_data['license_sub_category'][n] = 'Multiple'


In [None]:
ca_sql_data = ca_sql_data.drop(columns='license_category')

In [None]:
ca_sql_data['license_sub_category']

In [None]:
# This cleans the data so the columns names match
ca_sql_for_alter = ca_main_sql_for_alter.rename(columns={
    'roll_up_id': 'rollup_id',
    'license_sub_category': 'license_category',
    'state': 'us_state',
    'name_legal': 'legal_name',
    'date_expiration': 'expiration_date'
})

ca_sql_data = ca_sql_data.rename(columns={
    'roll_up_id': 'rollup_id',
    'license_sub_category': 'license_category',
    'state': 'us_state',
    'name_legal': 'legal_name',
    'date_expiration': 'expiration_date'
})

In [None]:
ca_sql_data['license_category']

In [None]:
ca_all_main_alter_list = []

for lic in ca_sql_for_alter['license_number']:
    if list(ca_sql_for_alter[ca_sql_for_alter['license_number'] == lic]['rollup_id'])[0] != list(ca_sql_data[ca_sql_data['license_number'] == lic]['rollup_id'])[0]:
        print(lic, 'rollup')
        ca_all_main_alter_list.append(lic)
    elif list(ca_sql_for_alter[ca_sql_for_alter['license_number'] == lic]['license_category'])[0] != list(ca_sql_data[ca_sql_data['license_number'] == lic]['license_category'])[0]:
        print(lic, 'category')
        ca_all_main_alter_list.append(lic)
    elif list(ca_sql_for_alter[ca_sql_for_alter['license_number'] == lic]['is_current'])[0] != list(ca_sql_data[ca_sql_data['license_number'] == lic]['is_current'])[0]:
        print(lic, 'current')
        ca_all_main_alter_list.append(lic)
    elif list(ca_sql_for_alter[ca_sql_for_alter['license_number'] == lic]['legal_name'])[0] != list(ca_sql_data[ca_sql_data['license_number'] == lic]['legal_name'])[0]:
        print(lic, 'name')
        ca_all_main_alter_list.append(lic)
    elif list(ca_sql_for_alter[ca_sql_for_alter['license_number'] == lic]['rollup_id'])[0] != list(ca_sql_data[ca_sql_data['license_number'] == lic]['rollup_id'])[0]:
        print(lic, 'roll')
        ca_all_main_alter_list.append(lic)
    elif list(ca_sql_for_alter[ca_sql_for_alter['license_number'] == lic]['expiration_date'])[0] != list(ca_sql_data[ca_sql_data['license_number'] == lic]['expiration_date'])[0]:
        print(lic, 'expiration')
        ca_all_main_alter_list.append(lic)
    elif list(ca_sql_for_alter[ca_sql_for_alter['license_number'] == lic]['license_status'])[0] != list(ca_sql_data[ca_sql_data['license_number'] == lic]['license_status'])[0]:
        print(lic, 'status')
        ca_all_main_alter_list.append(lic)
        
# ca_all_main_alter_list

In [None]:
len(ca_all_main_alter_list)

In [None]:
ca_all_main_alter_df = ca_sql_data[ca_sql_data['license_number'].isin(ca_all_main_alter_list)]

In [None]:
ca_all_main_alter_df = ca_all_main_alter_df.rename(columns={
    'roll_up_id': 'rollup_id',
    'license_sub_category': 'license_category',
    'state': 'us_state',
    'name_legal': 'legal_name',
    'date_expiration': 'expiration_date'
})

In [None]:
ca_all_main_alter_df['us_state'] = 'CA'

In [None]:
ca_all_main_alter_df = ca_all_main_alter_df[['license_number', 'rollup_id', 'legal_name', 'license_status', 'is_current', 'license_description', 'expiration_date', 'us_state', 'license_category']]

In [None]:
# This sends the df of data to alter into a state specific sql table
ca_all_main_alter_df.to_sql('ca_to_alter', con=all_cannabis_con, index=False, if_exists='replace')

In [None]:
# And this code runs the query to update the sql_main table with the ca_to_alter table

all_conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};'
                      'SERVER=bespoke-database-1.cmevrozrcs7c.us-west-2.rds.amazonaws.com;'
                      'DATABASE=all_cannabis;'
                      'UID=admin;'
                      'PWD=N19lrqxnurTUJLJT6GFe;')

all_cursor = all_conn.cursor()

all_cursor.execute('SELECT * FROM all_cannabis.dbo.all_main')

all_cursor.execute('''
Update t1
SET t1.[license_status] = t2.[license_status],
    t1.[license_description] = t2.[license_description],
    t1.[license_category] = t2.[license_category],
    t1.[is_current] = t2.[is_current],
    t1.[rollup_id] = t2.[rollup_id],
    t1.[expiration_date] = t2.[expiration_date]
FROM [all_cannabis].[dbo].[all_main] as t1
INNER JOIN [all_cannabis].[dbo].[ca_to_alter] as t2
ON t1.[license_number] = t2.[license_number];
''')

all_conn.commit()

### Bespoke / Sweat Equity API

In [None]:
new_main = new_main_for_sql.append(ca_all_main_alter_df)

In [None]:
# First 2 are expiration_date
new_main['expiration_date'] = new_main['expiration_date'].astype(str)
new_main

In [None]:
new_main['is_current'].iloc[0] == 1

In [None]:
pd.set_option('display.max_colwidth', None)


In [None]:
new_main[new_main['license_number'] == 'C10-0000439-LIC']

In [None]:
# requests.post(
#             'https://bespoke-api-server-production.herokuapp.com/api/v1/licenses/bulk_update_licenses',
#             json= {
#           'company_licenses': [
              
#               {
#             'license_number': 'C10-0000439-LIC',
#             'Rollup ID': 'CA-01466',
#             'Legal Name': 'GREEN CROSS SOCIETY OF SOUTHERN CALIFORNIA CORPORATION',
#             'license_status': 'Active',
#             'Is Current': True,
#             'License Description': 'Retailer',
#             'Us State': 'CA',
#             'Expiration Date': '2022-07-15',
#             'License Category': 'Retailer'
#             },
#           ],
#         },
#             headers={'x-api-key': 'ASYNC-SERVER-API-KEY-6Lfy84qn9ew93ZwMXSYhdwqj'}
#         )

In [None]:
new_main = new_main.reset_index().drop(columns='index')

In [None]:
new_main.dtypes

In [None]:
new_main.shape

In [None]:
### This is just code for adding missing datapoints if I check and find any

# params = urllib.parse.quote_plus("DRIVER={ODBC Driver 17 for SQL Server};"
#                                  "SERVER=bespoke-database-1.cmevrozrcs7c.us-west-2.rds.amazonaws.com;"
#                                  "DATABASE=all_cannabis;"
#                                  "UID=admin;"
#                                  "PWD=N19lrqxnurTUJLJT6GFe")
# all_con = create_engine("mssql+pyodbc:///?odbc_connect={}".format(params))

# all_main = pd.read_sql('all_main', all_con)

# not_in_meta = pd.read_csv('../file_of_combined_data/not_in_meta.csv')

# new_main = all_main[all_main['license_number'].isin(list(not_in_meta['0']))]

# new_main = new_main.reset_index().drop(columns='index')

# new_main['expiration_date'] = new_main['expiration_date'].astype(str)

In [None]:
num_of_lic_sent = 0

while len(new_main) - num_of_lic_sent >= 10:

    n = num_of_lic_sent
    
    if new_main['is_current'].iloc[n] == 1:
        is_current_1 = True
    else:
        is_current_1 = False
            
    if new_main['is_current'].iloc[n+1] == 1:
        is_current_2 = True
    else:
        is_current_2 = False
          
    if new_main['is_current'].iloc[n+2] == 1:
        is_current_3 = True
    else:
        is_current_3 = False
            
    if new_main['is_current'].iloc[n+3] == 1:
        is_current_4 = True
    else:
        is_current_4 = False
            
    if new_main['is_current'].iloc[n+4] == 1:
        is_current_5 = True
    else:
        is_current_5 = False
            
    if new_main['is_current'].iloc[n+5] == 1:
        is_current_6 = True
    else:
        is_current_6 = False
            
    if new_main['is_current'].iloc[n+6] == 1:
        is_current_7 = True
    else:
        is_current_7 = False
            
    if new_main['is_current'].iloc[n+7] == 1:
        is_current_8 = True
    else:
        is_current_8 = False
            
    if new_main['is_current'].iloc[n+8] == 1:
        is_current_9 = True
    else:
        is_current_9 = False
            
    if new_main['is_current'].iloc[n+9] == 1:
        is_current_10 = True
    else:
        is_current_10 = False
            

    requests.post(
#             'https://bespoke-api-server-staging.herokuapp.com/api/v1/licenses/bulk_update_licenses',
            'https://bespoke-api-server-production.herokuapp.com/api/v1/licenses/bulk_update_licenses',
            json= {
          'company_licenses': [
              
              {
            'license_number': new_main['license_number'][n],
            'rollup_id': new_main['rollup_id'][n],
            'legal_name': new_main['legal_name'][n],
            'license_status': new_main['license_status'][n],
            'is_current': is_current_1,
            'license_description': new_main['license_description'][n],
            'us_state': new_main['us_state'][n],
            'expiration_date': new_main['expiration_date'][n],
            'license_category': new_main['license_category'][n]
            },
            
              
              {
            'license_number': new_main['license_number'][n+1],
            'rollup_id': new_main['rollup_id'][n+1],
            'legal_name': new_main['legal_name'][n+1],
            'license_status': new_main['license_status'][n+1],
            'is_current': is_current_2,
            'license_description': new_main['license_description'][n+1],
            'us_state': new_main['us_state'][n+1],
            'expiration_date': new_main['expiration_date'][n+1],
            'license_category': new_main['license_category'][n+1]
            },
              
              
              {
            'license_number': new_main['license_number'][n+2],
            'rollup_id': new_main['rollup_id'][n+2],
            'legal_name': new_main['legal_name'][n+2],
            'license_status': new_main['license_status'][n+2],
            'is_current': is_current_3,
            'license_description': new_main['license_description'][n+2],
            'us_state': new_main['us_state'][n+2],
            'expiration_date': new_main['expiration_date'][n+2],
            'license_category': new_main['license_category'][n+2]
            },

              
              {
            'license_number': new_main['license_number'][n+3],
            'rollup_id': new_main['rollup_id'][n+3],
            'legal_name': new_main['legal_name'][n+3],
            'license_status': new_main['license_status'][n+3],
            'is_current': is_current_4,
            'license_description': new_main['license_description'][n+3],
            'us_state': new_main['us_state'][n+3],
            'expiration_date': new_main['expiration_date'][n+3],
            'license_category': new_main['license_category'][n+3]
            },
              
              
              {
            'license_number': new_main['license_number'][n+4],
            'rollup_id': new_main['rollup_id'][n+4],
            'legal_name': new_main['legal_name'][n+4],
            'license_status': new_main['license_status'][n+4],
            'is_current': is_current_5,
            'license_description': new_main['license_description'][n+4],
            'us_state': new_main['us_state'][n+4],
            'expiration_date': new_main['expiration_date'][n+4],
            'license_category': new_main['license_category'][n+4]
            },
              
              
              {
            'license_number': new_main['license_number'][n+5],
            'rollup_id': new_main['rollup_id'][n+5],
            'legal_name': new_main['legal_name'][n+5],
            'license_status': new_main['license_status'][n+5],
            'is_current': is_current_6,
            'license_description': new_main['license_description'][n+5],
            'us_state': new_main['us_state'][n+5],
            'expiration_date': new_main['expiration_date'][n+5],
            'license_category': new_main['license_category'][n+5]
            },

              
              {
            'license_number': new_main['license_number'][n+6],
            'rollup_id': new_main['rollup_id'][n+6],
            'legal_name': new_main['legal_name'][n+6],
            'license_status': new_main['license_status'][n+6],
            'is_current': is_current_7,
            'license_description': new_main['license_description'][n+6],
            'us_state': new_main['us_state'][n+6],
            'expiration_date': new_main['expiration_date'][n+6],
            'license_category': new_main['license_category'][n+6]
            },
              
              
              {
            'license_number': new_main['license_number'][n+7],
            'rollup_id': new_main['rollup_id'][n+7],
            'legal_name': new_main['legal_name'][n+7],
            'license_status': new_main['license_status'][n+7],
            'is_current': is_current_8,
            'license_description': new_main['license_description'][n+7],
            'us_state': new_main['us_state'][n+7],
            'expiration_date': new_main['expiration_date'][n+7],
            'license_category': new_main['license_category'][n+7]
            },
              
              
              {
            'license_number': new_main['license_number'][n+8],
            'rollup_id': new_main['rollup_id'][n+8],
            'legal_name': new_main['legal_name'][n+8],
            'license_status': new_main['license_status'][n+8],
            'is_current': is_current_9,
            'license_description': new_main['license_description'][n+8],
            'us_state': new_main['us_state'][n+8],
            'expiration_date': new_main['expiration_date'][n+8],
            'license_category': new_main['license_category'][n+8]
            },

              
              {
            'license_number': new_main['license_number'][n+9],
            'rollup_id': new_main['rollup_id'][n+9],
            'legal_name': new_main['legal_name'][n+9],
            'license_status': new_main['license_status'][n+9],
            'is_current': is_current_10,
            'license_description': new_main['license_description'][n+9],
            'us_state': new_main['us_state'][n+9],
            'expiration_date': new_main['expiration_date'][n+9],
            'license_category': new_main['license_category'][n+9]
            },

              
          ],
        },
            headers={'x-api-key': 'ASYNC-SERVER-API-KEY-6Lfy84qn9ew93ZwMXSYhdwqj'}
#             {'x-api-key': '80030179832729980933'}
        )
        
    num_of_lic_sent += 10
    
    print(num_of_lic_sent)
        
    sleep(5)
        
    if len(new_main) - num_of_lic_sent < 10:
            
        for n in range(num_of_lic_sent, len(new_main)):
    
            if new_main['is_current'].iloc[n] == 1:
                is_current_1 = True
            else:
                is_current_1 = False
                    
        
        
for n in range(num_of_lic_sent, len(new_main)):
    
    requests.post('https://bespoke-api-server-production.herokuapp.com/api/v1/licenses/bulk_update_licenses',
              json= {
              'company_licenses': [
                                      {
                                    'license_number': new_main['license_number'].iloc[n],
                                    'rollup_id': new_main['rollup_id'].iloc[n],
                                    'legal_name': new_main['legal_name'].iloc[n],
                                    'license_status': new_main['license_status'].iloc[n],
                                    'is_current': is_current_1,
                                    'license_description': new_main['license_description'].iloc[n],
                                    'us_state': 'CA',
                                    'expiration_date': new_main['expiration_date'].iloc[n],
                                    'license_category': new_main['license_category'].iloc[n]
                                    },
                                  ],
                                },
                                    headers={'x-api-key': 'ASYNC-SERVER-API-KEY-6Lfy84qn9ew93ZwMXSYhdwqj'}
                                )
    print(n)

In [None]:
new_main

In [None]:
range(num_of_lic_sent, len(new_main))

In [None]:
print(stop)

### Check The Metabase

In [None]:
metabase_ca = pd.read_json('query_result_2021-09-30T16_29_55.806Z.json')

In [None]:
sql_ca = pd.read_sql('all_main', con=all_cannabis_con)

In [None]:
sql_ca = sql_ca[sql_ca['us_state'] == 'CA']

In [None]:
sql_ca['expiration_date'] = sql_ca['expiration_date'].astype(str)

In [None]:
other_changes = []

for lic in metabase_ca['License Number'].unique():
    if list(metabase_ca[metabase_ca['License Number'] == lic]['License Status'])[0] != list(sql_ca[sql_ca['license_number'] == lic]['license_status'])[0]:
        print(lic, 'status')
        other_changes.append(lic)
    elif list(metabase_ca[metabase_ca['License Number'] == lic]['Expiration Date'])[0] != list(sql_ca[sql_ca['license_number'] == lic]['expiration_date'])[0]:
        print(lic, 'expiration')
        other_changes.append(lic)
    elif list(metabase_ca[metabase_ca['License Number'] == lic]['Rollup ID'])[0] != list(sql_ca[sql_ca['license_number'] == lic]['rollup_id'])[0]:
        print(lic, 'roll')
        other_changes.append(lic)
    elif list(metabase_ca[metabase_ca['License Number'] == lic]['License Description'])[0] != list(sql_ca[sql_ca['license_number'] == lic]['license_description'])[0]:
        print(lic, 'description')
        other_changes.append(lic)
    elif list(metabase_ca[metabase_ca['License Number'] == lic]['License Category'])[0] != list(sql_ca[sql_ca['license_number'] == lic]['license_category'])[0]:
        print(lic, 'category')
        other_changes.append(lic)
    elif list(metabase_ca[metabase_ca['License Number'] == lic]['Is Current'])[0] != list(sql_ca[sql_ca['license_number'] == lic]['is_current'])[0]:
        print(lic, 'current')
        other_changes.append(lic)

In [None]:
len(other_changes)

In [None]:
other_changes

In [None]:
metabase_ca[metabase_ca['License Number'] == 'C9-0000281-LIC']

In [None]:
other_changes_df = sql_ca[sql_ca['license_number'].isin(other_changes)]

In [None]:
other_changes_df[other_changes_df['license_number'] == 'C11-0000602-LIC']

In [None]:
other_changes_df = other_changes_df.reset_index().drop(columns='index')

In [None]:
new_main = other_changes_df

In [None]:
new_main[new_main['license_number'] == 'C11-0000602-LIC']

In [None]:
new_main

In [None]:
# requests.post(
# #             'https://bespoke-api-server-staging.herokuapp.com/api/v1/licenses/bulk_update_licenses',
#             'https://bespoke-api-server-production.herokuapp.com/api/v1/licenses/bulk_update_licenses',
#             json= {
#           'company_licenses': [
              
#               {
#             'license_number': 'CDPH-10004633',
#             'rollup_id': 'CA-01424'
#             },
#           ],
#         },
#             headers={'x-api-key': 'ASYNC-SERVER-API-KEY-6Lfy84qn9ew93ZwMXSYhdwqj'}
# #             {'x-api-key': '80030179832729980933'}
#         )
              