In [None]:
import pandas as pd
import numpy as np
import urllib
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
import requests
import datetime as dt
from sqlalchemy import create_engine
import urllib
import pyodbc
import requests
import time

In [None]:
# https://opendata.mass-cannabis-control.com/Licensing-and-Applications/Public-View-based-on-License-and-Applications-Appr/hmwt-yiqy
# Click 'View Data' over the table view at the bottom
# Download CSV for Excel

In [None]:
# Change this date to match the date in the file being downloaded 
file_date = dt.date(2021, 10, 6)
file_year = file_date.year
file_month = file_date.strftime('%m')
file_day = file_date.strftime('%d')

In [None]:
pd.options.display.max_rows = 1000
pd.options.display.min_rows = 1000
pd.options.display.max_columns = 100

In [None]:
# Bespoke Engine
params = urllib.parse.quote_plus("DRIVER={ODBC Driver 17 for SQL Server};"
                                 "SERVER=bespoke-database-1.cmevrozrcs7c.us-west-2.rds.amazonaws.com;"
                                 "DATABASE=ma_cannabis;"
                                 "UID=admin;"
                                 "PWD=N19lrqxnurTUJLJT6GFe")
engine = create_engine("mssql+pyodbc:///?odbc_connect={}".format(params))
engine_con = engine.connect()

In [None]:
# This downloads the 
ma_cannabis = pd.read_csv(f"license_repositories/ma_approved_licenses_{file_year}_{file_month}_{file_day}.csv")

In [None]:
ma_cannabis.head()

## Rename and Clean Data

In [None]:
# This changes the column names to try to match the column names used in the other states in SQL
ma_cannabis = ma_cannabis.rename(columns={
    'business_name':'name_legal',
    'dba_name':'name_dba',
    'license_type':'license_description',
    'app_create_date':'application_date',
    'activity_date':'date_uploaded',
    'business_state':'contact_state',
    'establishment_county':'contact_county', #County currently blank for everyone
    'business_city':'contact_city',
    'business_zipcode':'contact_zip'
    
})

In [None]:
# Combined these columns real quick
ma_cannabis['business_address_2'] = ma_cannabis['business_address_2'].fillna('')
business_addresses = []
for n in range(len(ma_cannabis)):
    business_addresses.append(ma_cannabis['business_address_1'][n] + ' ' + ma_cannabis['business_address_2'][n])
ma_cannabis['contact_address'] = business_addresses

In [None]:
# Combined these columns real quick
ma_cannabis['mailing_address_2'] = ma_cannabis['mailing_address_2'].fillna('')
mail_addresses = []
for n in range(len(ma_cannabis)):
    mail_addresses.append(ma_cannabis['mailing_address_1'][n] + ' ' + ma_cannabis['mailing_address_2'][n])
ma_cannabis['mailing_address'] = mail_addresses

In [None]:
# Combined these columns real quick
ma_cannabis['establishment_address_2'] = ma_cannabis['establishment_address_2'].fillna('')
establishment_address = []
for n in range(len(ma_cannabis)):
    establishment_address.append(str(ma_cannabis['establishment_address_1'][n]) + ' ' + ma_cannabis['establishment_address_2'][n])
ma_cannabis['establishment_address'] = establishment_address

In [None]:
ma_cannabis['mailing_address']

In [None]:
# Change these to datetime
ma_cannabis['application_date'] = pd.to_datetime(ma_cannabis['application_date'])
ma_cannabis['date_uploaded'] = pd.to_datetime(ma_cannabis['date_uploaded'])

In [None]:
ma_cannabis['establishment_zip_code']

In [None]:
# Change the zipcodes to strings so 0s an be added
ma_cannabis['contact_zip'] = ma_cannabis['contact_zip'].astype(str)
ma_cannabis['mailing_zipcode'] = ma_cannabis['mailing_zipcode'].astype(str)
ma_cannabis['establishment_zip_code'] = ma_cannabis['establishment_zip_code'].fillna(0).astype(int).astype(str)

In [None]:
# This fixes all three zipcodes, even though we probably aren't using them all

contact_zip_list = []
for zipcode in ma_cannabis['contact_zip'].astype(str):
    while len(zipcode) < 5:
        zipcode = str('0' + zipcode)
    contact_zip_list.append(zipcode)
    
ma_cannabis['contact_zip'] = contact_zip_list

mail_zip_list = []
for zipcode in ma_cannabis['mailing_zipcode'].astype(str):
    while len(zipcode) < 5:
        zipcode = str('0' + zipcode)
    mail_zip_list.append(zipcode)
    
ma_cannabis['mailing_zipcode'] = mail_zip_list

establishment_zip_list = []
for zipcode in ma_cannabis['establishment_zip_code'].astype(int).astype(str):
    while len(zipcode) < 5:
        zipcode = str('0' + zipcode)
    establishment_zip_list.append(zipcode)
    
ma_cannabis['establishment_zip_code'] = establishment_zip_list

In [None]:
ma_cannabis['establishment_zip_code']

In [None]:
ma_cannabis['is_current'] = 1

### Add Rows/Merge SQL

In [None]:
# This reads in the old SQL files so they can be merged with the new 
old_sql_main = pd.read_sql('ma_main', engine_con)
old_sql_roll = pd.read_sql('ma_roll', engine_con)
old_sql_contact = pd.read_sql('ma_contact', engine_con)

In [None]:
# Then we merge in the roll up information from SQL
ma_cannabis = ma_cannabis.merge(old_sql_roll[['license_number' ,'company_roll_up', 'roll_up_id']], on='license_number', how='left')

### Reorder Columns

In [None]:
ma_cannabis.columns

In [None]:
ma_cannabis_cleaned = ma_cannabis[['license_number','license_description',  'name_legal', 'name_dba',
        'roll_up_id', 'company_roll_up', 
        'contact_address', 'contact_city', 'contact_county', 'contact_state', 'contact_zip', 
        'mailing_address', 'mailing_city', 'mailing_state', 'mailing_zipcode',
        'establishment_address', 'establishment_city', 'establishment_state', 'establishment_zip_code', 
        'date_uploaded', 
        'square_footage_establishment', 'abutters_count',
        'is_abutters_notified', 'massachusetts_business',
        'dba_registration_city', 'cooperative_total_canopy',
        'cooperative_cultivation', 'cultivation_tier',
        'establishment_activities', 'establishment_cultivation',
        'application_date', 'version',
        'disability_owned_business',
        'lesbian_gay_bisexual_and_transgender_owned_business',
        'minority_owned_business', 'veteran_owned_business',
        'woman_owned_business', 'not_a_dbe']]

In [None]:
ma_cannabis_cleaned = ma_cannabis_cleaned.sort_values(by='name_legal')
ma_cannabis_cleaned.head()

In [None]:
ma_cannabis_cleaned['is_current'] = 1

### To CSV for Edit

In [None]:
ma_cannabis_cleaned.to_csv(f'edited_files/df_to_edit_{file_year}_{file_month}_{file_day}.csv', index=False)

### Read Back In

In [None]:
if sum(pd.read_csv(f'edited_files/df_to_edit_{file_year}_{file_month}_{file_day}.csv')['company_roll_up'].isnull()) == 0:
    ma_cannabis_with_roll = pd.read_csv(f'edited_files/df_to_edit_{file_year}_{file_month}_{file_day}.csv')
else:
    print(error)

In [None]:
ma_cannabis_with_roll['roll_up_id'] = ma_cannabis_with_roll['roll_up_id'].fillna('')

### Add Roll Up Ids

In [None]:
ma_with_roll = ma_cannabis_with_roll[ma_cannabis_with_roll['roll_up_id'] != '']
ma_without_roll = ma_cannabis_with_roll[ma_cannabis_with_roll['roll_up_id'] == '']

In [None]:
ma_without_roll = ma_without_roll.reset_index().drop(columns='index')

In [None]:
# This whole section creates the new max roll up id and adds the new rolls to new companies

In [None]:
all_rolls_with_ma = list(ma_with_roll['roll_up_id'].unique())
all_rolls = []

for roll in all_rolls_with_ma:
    roll = int(str(roll)[3:])
    all_rolls.append(roll)
    
max_roll = max(all_rolls)

new_comp_rolls = list(ma_without_roll['company_roll_up'].unique())

for comp in new_comp_rolls:
    max_roll += 1
    for n in range(len(ma_without_roll)):
        if ma_without_roll['company_roll_up'][n] == comp:
            ma_without_roll['roll_up_id'][n] = max_roll
    
for n in range(len(ma_without_roll)):
    roll = str(ma_without_roll['roll_up_id'][n])
    while len(roll) < 5:
        roll = '0' + roll
    roll = 'MA-' + roll
    ma_without_roll['roll_up_id'][n] = roll

In [None]:
ma_with_roll_combined = ma_with_roll.append(ma_without_roll).sort_values(by='name_legal').reset_index().drop(columns='index')

In [None]:
ma_with_roll_combined.head()

### Compare to SQL

In [None]:
in_old_sql = list(old_sql_main['license_number'].unique())

In [None]:
ma_for_sql = ma_with_roll_combined[~ma_with_roll_combined['license_number'].isin(in_old_sql)]

In [None]:
ma_for_sql

### Edit is_current

In [None]:
old_sql_main

In [None]:
for lic in list(old_sql_main['license_number'].unique()):
    if lic not in list(ma_cannabis_with_roll['license_number'].unique()):
        print(lic)

In [None]:
ma_cannabis.columns

## Seperate Data for SQL

In [None]:
ma_main = ma_for_sql[['license_number', 'license_description',  'name_legal', 'name_dba', 'application_date', 'date_uploaded', 'is_current']]
ma_main.head()

In [None]:
ma_contact = ma_for_sql[['license_number',
                                    'establishment_address', 'establishment_city', 'establishment_state', 'establishment_zip_code',
                                    'contact_address', 'contact_city', 'contact_state', 'contact_zip', 
                                    'mailing_address', 'mailing_city', 'mailing_state', 'mailing_zipcode']]
ma_contact.head()

In [None]:
ma_roll = ma_for_sql[['license_number', 'roll_up_id', 'company_roll_up']]
ma_roll.head()

In [None]:
ma_contact['contact_state'].unique()

In [None]:
ma_contact['establishment_state'].unique()

In [None]:
ma_contact['mailing_state'].unique()

### Send to SQL

In [None]:
ma_main.to_sql(name='ma_main', con=engine_con, index=False, if_exists='append')

In [None]:
ma_roll.to_sql(name='ma_roll', con=engine_con, index=False, if_exists='append')

In [None]:
ma_contact.to_sql(name='ma_contact', con=engine_con, index=False, if_exists='append')

### Create main_alter and run query

In [None]:
for lic in list(old_sql_main['license_number'].unique()):
    if lic not in list(ma_cannabis['license_number'].unique()):
        print(lic)

In [None]:
ma_lic_for_alter = []

for lic in old_sql_main['license_number'].unique():
    if list(ma_cannabis[ma_cannabis['license_number'] == lic]['license_description'])[0] != list(old_sql_main[old_sql_main['license_number'] == lic]['license_description'])[0]:
        ma_lic_for_alter.append(lic)
    elif list(ma_cannabis[ma_cannabis['license_number'] == lic]['name_legal'])[0] != list(old_sql_main[old_sql_main['license_number'] == lic]['name_legal'])[0]:
        ma_lic_for_alter.append(lic)
    elif list(ma_cannabis[ma_cannabis['license_number'] == lic]['is_current'])[0] != list(old_sql_main[old_sql_main['license_number'] == lic]['is_current'])[0]:
        ma_lic_for_alter.append(lic)
        
ma_for_alter = ma_cannabis[ma_cannabis['license_number'].isin(ma_lic_for_alter)][['license_number', 'license_description',  'name_legal', 'name_dba', 'application_date', 'date_uploaded', 'is_current']]

In [None]:
ma_for_alter

In [None]:
ma_for_alter.to_sql('ma_main_alter', con=engine_con, index=False, if_exists='replace')

In [None]:
# This code runs the query noramally run in SQL to update ma_main using ma_main_alter

ma_conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};'
                      'SERVER=bespoke-database-1.cmevrozrcs7c.us-west-2.rds.amazonaws.com;'
                      'DATABASE=ma_cannabis;'
                      'UID=admin;'
                      'PWD=N19lrqxnurTUJLJT6GFe;')

ma_cursor = ma_conn.cursor()

ma_cursor.execute('SELECT * FROM ma_cannabis.dbo.ma_main')

ma_cursor.execute('''
Update t1
SET t1.[license_description] = t2.[license_description],
    t1.[name_legal] = t2.[name_legal],
    t1.[is_current] = t2.[is_current]
FROM [ma_cannabis].[dbo].[ma_main] as t1
INNER JOIN [ma_cannabis].[dbo].[ma_main_alter] as t2
ON t1.[license_number] = t2.[license_number];
                  ''')

ma_conn.commit()

### SQL all_main

In [None]:
# This creates the connection to the all_main file in SQL

all_cannabis_params = urllib.parse.quote_plus("DRIVER={ODBC Driver 17 for SQL Server};"
                                 "SERVER=bespoke-database-1.cmevrozrcs7c.us-west-2.rds.amazonaws.com;"
                                 "DATABASE=all_cannabis;"
                                 "UID=admin;"
                                 "PWD=N19lrqxnurTUJLJT6GFe")
all_cannabis_con = create_engine("mssql+pyodbc:///?odbc_connect={}".format(all_cannabis_params))

In [None]:
# This reads in the all_main data and narrows it down to the relevent data
all_main_sql = pd.read_sql('all_main', con=all_cannabis_con)
ma_main_sql = all_main_sql[all_main_sql['us_state'] == 'MA']
ma_main_sql = ma_main_sql.reset_index().drop(columns='index')

In [None]:
# This re-reads in and merges the ca_annabis data in SQL
updated_ma_main = pd.read_sql('ma_main', con=engine_con)
updated_ma_roll = pd.read_sql('ma_roll', con=engine_con)
updated_ma_cannabis = updated_ma_main.merge(updated_ma_roll, on='license_number')

In [None]:
updated_ma_cannabis['license_status'] = ''
updated_ma_cannabis['expiration_date'] = ''

In [None]:
new_all_main = updated_ma_cannabis[['license_number', 'roll_up_id', 'name_legal', 'license_status', 'is_current', 'license_description', 'expiration_date']]
new_all_main['us_state'] = 'MA'
new_all_main['license_category'] = ''

for n in range(len(new_all_main)):
    if new_all_main['license_description'][n] in ['Marijuana Cultivator', 'Craft Marijuana Cooperative']:
        new_all_main['license_category'][n] = 'Cultivator'
    if new_all_main['license_description'][n] in ['Marijuana Retailer']:
        new_all_main['license_category'][n] = 'Retailer'
    if new_all_main['license_description'][n] in ['Marijuana Product Manufacturer']:
        new_all_main['license_category'][n] = 'Manufacturer'
    if new_all_main['license_description'][n] in ['Marijuana Transporter with Other Existing ME License', 'Third Party Marijuana Transporter']:
        new_all_main['license_category'][n] = 'Distributor'
    if new_all_main['license_description'][n] in ['Independent Testing Laboratory']:
        new_all_main['license_category'][n] = 'Laboratory'
    if new_all_main['license_description'][n] in ['Medical Research Development Cultivation', 'Medical Operator', 'Retail Operator', 'Medical Hospitality']:
        new_all_main['license_category'][n] = 'Other'
    if new_all_main['license_description'][n] in ['Marijuana Microbusiness']:
        new_all_main['license_category'][n] = 'Multiple'

In [None]:
# This takes out everything already there
new_main_for_sql = new_all_main[~new_all_main['license_number'].isin(list(ma_main_sql['license_number'].unique()))]

In [None]:
new_main_for_sql = new_main_for_sql.rename(columns={'roll_up_id':'rollup_id', 'name_legal':'legal_name'})
new_main_for_sql

In [None]:
new_main_for_sql['expiration_date'] = np.nan
new_main_for_sql['expiration_date'] = pd.to_datetime(new_main_for_sql['expiration_date'])

In [None]:
# And this uploads the new licenses to SQL
new_main_for_sql.to_sql('all_main', con=all_cannabis_con, index=False, if_exists='append')

In [None]:
all_main_sql_for_alter = pd.read_sql('all_main', con=all_cannabis_con)
ma_main_sql_for_alter = all_main_sql_for_alter[all_main_sql_for_alter['us_state'] == 'MA']
ma_main_sql_for_alter = ma_main_sql_for_alter.reset_index().drop(columns='index')

In [None]:
len(ma_main_sql_for_alter)

In [None]:
len(new_all_main)

In [None]:
new_all_main.columns

In [None]:
for lic in new_all_main['license_number'].unique():
    if lic not in ma_main_sql_for_alter['license_number'].unique():
        print(lic)

In [None]:
# This creates a df of all the lciense numbers that will need to be altered

ma_lic_to_alter = []

for lic in list(new_all_main['license_number'].unique()):
    if list(new_all_main[new_all_main['license_number'] == lic]['is_current'])[0] != list(ma_main_sql_for_alter[ma_main_sql_for_alter['license_number'] == lic]['is_current'])[0]:
        ma_lic_to_alter.append(lic)
        print(lic, 'current')
    if list(new_all_main[new_all_main['license_number'] == lic]['roll_up_id'])[0] != list(ma_main_sql_for_alter[ma_main_sql_for_alter['license_number'] == lic]['rollup_id'])[0]:
        ma_lic_to_alter.append(lic)
        print(lic, 'roll')
    if list(new_all_main[new_all_main['license_number'] == lic]['name_legal'])[0] != list(ma_main_sql_for_alter[ma_main_sql_for_alter['license_number'] == lic]['legal_name'])[0]:
        ma_lic_to_alter.append(lic)
        print(lic, 'name')
    elif list(new_all_main[new_all_main['license_number'] == lic]['license_status'])[0] != list(ma_main_sql_for_alter[ma_main_sql_for_alter['license_number'] == lic]['license_status'])[0]:
        ma_lic_to_alter.append(lic)
        print(lic, 'status')
    elif list(new_all_main[new_all_main['license_number'] == lic]['license_description'])[0] != list(ma_main_sql_for_alter[ma_main_sql_for_alter['license_number'] == lic]['license_description'])[0]:
        ma_lic_to_alter.append(lic)
        print(lic, 'description')
    elif list(new_all_main[new_all_main['license_number'] == lic]['license_category'])[0] != list(ma_main_sql_for_alter[ma_main_sql_for_alter['license_number'] == lic]['license_category'])[0]:
        ma_lic_to_alter.append(lic)
        print(lic, 'category')
        
ma_all_main_for_alter = new_all_main[new_all_main['license_number'].isin(ma_lic_to_alter)]

In [None]:
ma_all_main_for_alter

In [None]:
print(stop)

In [None]:
# And this creates a table in the database so we can alter the needed columns
ma_all_main_for_alter.to_sql('ma_to_alter', con=all_cannabis_con, index=False, if_exists='replace')

In [None]:
# This code runs the query noramally run in SQL to update ma_main using ma_main_alter

all_conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};'
                      'SERVER=bespoke-database-1.cmevrozrcs7c.us-west-2.rds.amazonaws.com;'
                      'DATABASE=all_cannabis;'
                      'UID=admin;'
                      'PWD=N19lrqxnurTUJLJT6GFe;')

all_cursor = all_conn.cursor()

all_cursor.execute('SELECT * FROM all_cannabis.dbo.all_main')

all_cursor.execute('''
Update t1
SET t1.[legal_name] = t2.[name_legal],
    t1.[is_current] = t2.[is_current],
    t1.[rollup_id] = t2.[roll_up_id],
    t1.[license_description] = t2.[license_description],
    t1.[license_category] = t2.[license_category]
FROM [all_cannabis].[dbo].[all_main] as t1
INNER JOIN [all_cannabis].[dbo].[ma_to_alter] as t2
ON t1.[license_number] = t2.[license_number];
                  ''')

all_conn.commit()

In [None]:
ma_all_main_for_alter['license_description']

### Bespoke / Sweat Equity API

In [None]:
new_main = new_main_for_sql.append(ma_all_main_for_alter.rename(columns={'roll_up_id':'rollup_id', 'name_legal':'legal_name'}))

In [None]:
ma_all_main_for_alter

In [None]:
new_main = new_main.reset_index().drop(columns='index')
new_main

In [None]:
for n in range(0, len(new_main)):
    
        if new_main['is_current'].iloc[n] == 1:
            is_current_1 = True
        else:
            is_current_1 = False
                    
                    
        requests.post(
        'https://bespoke-api-server-production.herokuapp.com/api/v1/licenses/bulk_update_licenses',
        json= {
        'company_licenses': [
                            {
                        'license_number': new_main['license_number'].iloc[n],
                        'rollup_id': new_main['rollup_id'].iloc[n],
                        'legal_name': new_main['legal_name'].iloc[n],
                        'license_status': new_main['license_status'].iloc[n],
                        'is_current': is_current_1,
                        'license_description': new_main['license_description'].iloc[n],
                        'us_state': new_main['us_state'].iloc[n],
                        'expiration_date': new_main['expiration_date'].iloc[n],
                        'license_category': new_main['license_category'].iloc[n]
                                },
                              ],
                            },
            headers={'x-api-key': 'ASYNC-SERVER-API-KEY-6Lfy84qn9ew93ZwMXSYhdwqj'})
        
        time.sleep(5)