In [49]:
# import dependencies
import pandas as pd
from sqlalchemy import create_engine
import psycopg2

In [73]:
# import csv file into pandas
# upload to database order (foreign key references require tables be uploaded in ths order): 
# 1: admin_projects
# 2: issues_issue_types
# 3: issues_issue_subtypes
# 4: issues_issues
# note: will be named indicating order

file1 = "data/csv/admin_projects.csv"
file2 = "data/csv/issues_issue_types.csv"
file3 = "data/csv/issues_issue_subtypes.csv"
file4 = "data/csv/issues_issues.csv"
file5 = "static/zip.csv"

csv_data1 = pd.read_csv(file1)
csv_data2 = pd.read_csv(file2)
csv_data3 = pd.read_csv(file3)
csv_data4 = pd.read_csv(file4)
csv_data5 = pd.read_csv(file5, converters={'zip': lambda x: str(x)})

admin_projects_df_1 = pd.DataFrame(csv_data1)
issues_issue_types_df_2 = pd.DataFrame(csv_data2)
issues_issue_subtypes_df_3 = pd.DataFrame(csv_data3)
issues_issues_df_4 = pd.DataFrame(csv_data4)
zipcodes_as_lat_lon = pd.DataFrame(csv_data5)

#confirm databases are loaded
zipcodes_as_lat_lon.head(5)
#admin_projects_df_1.head(5)
#issues_issue_types_df_2.head(5)
#issues_issue_subtypes_df_3.head(5)
#issues_issues_df_4.head(5)





Unnamed: 0,zip,latitude,longitude,city,state,county,other_info
0,501,40.9223,-72.6371,Holtsville,NY,Suffolk,UNIQUE
1,544,40.9223,-72.6371,Holtsville,NY,Suffolk,UNIQUE
2,601,18.1653,-66.7226,Adjuntas,PR,Adjuntas,STANDARD
3,602,18.3931,-67.181,Aguada,PR,Aguada,STANDARD
4,603,18.4559,-67.1457,Aguadilla,PR,Aguadilla,STANDARD


In [None]:
# Add Data Munging here

In [75]:
clean_df_1 = admin_projects_df_1[admin_projects_df_1["postal_code"].notna()]
clean_df_1 = clean_df_1[clean_df_1["value"] > 5000000]
clean_df_1 = clean_df_1[clean_df_1["status"] == "active"]
clean_df_1.shape

#drop unneeded rows:
clean_df_1 = clean_df_1[['id','bim360_account_id','name','start_date','type','value','currency','status','job_number','city','state_or_province','postal_code','country','timezone','construction_type','contract_type','business_unit_id','last_sign_in','created_at']]
issues_issue_types_df_2 = issues_issue_types_df_2[['issue_type_id','bim360_account_id','bim360_project_id','issue_type','is_active']]
issues_issue_subtypes_df_3 = issues_issue_subtypes_df_3[['issue_subtype_id','bim360_account_id','bim360_project_id','issue_type_id','issue_subtype']]
issues_issues_df_4 = issues_issues_df_4[['issue_id','bim360_project_id','type_id','subtype_id','status','due_date','assignee_id','root_cause_id']]




#merge tables
#master_df = pd.merge(clean_df_1, issues_issue_types_df_2, left_on=['id', 'bim360_account_id'], right_on=['bim360_project_id', 'bim360_account_id'], how="inner")

#master_df.head(5)


In [76]:
rds_connection_string = "@localhost:5432/project2"

# Important: add password here
password = ""

engine = create_engine(f'postgresql+psycopg2://postgres:{password}{rds_connection_string}')

In [77]:
#confirm database connection and that the tables are present (if not, run schema sql file in database first)
engine.table_names()

['issues_issues',
 'issues_issue_types',
 'admin_projects',
 'issues_issue_subtypes']

In [80]:
#upload data in order specified in beginning


#important note: i removed the foreign key constraints for the time being from the schema for the tables when i
#                ran it in postgres (still in schema file though - at the bottom). Had some issues with FOREIGN
#                KEY CONSTRAINT violations when loading data... missing keys in some tables. so it may be best
#                to run schema without the constraints.


#clean_df_1.to_sql(name='admin_projects', con=engine, if_exists='append', index=False)

#issues_issue_types_df_2.to_sql(name='issues_issue_types', con=engine, if_exists='append', index=False)

#issues_issue_subtypes_df_3.to_sql(name='issues_issue_subtypes', con=engine, if_exists='append', index=False)

#issues_issues_df_4.to_sql(name='issues_issues', con=engine, if_exists='append', index=False)

#zipcodes_as_lat_lon.to_sql(name='zips', con=engine, if_exists='append', index=False)

In [None]:
from sqlalchemy import Table
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.expression import Executable, ClauseElement

class CreateView(Executable, ClauseElement):
    def __init__(self, name, select):
        self.name = name
        self.select = select

@compiles(CreateView)
def visit_create_view(element, compiler, **kw):
    return "CREATE VIEW %s AS %s" % (
         element.name,
         compiler.process(element.select, literal_binds=True)
         )

# test data
from sqlalchemy import MetaData, Column, Integer


metadata = MetaData(engine)
t = Table('t',
          metadata,
          Column('id', Integer, primary_key=True),
          Column('number', Integer))
t.create()
engine.execute(t.insert().values(id=1, number=3))
engine.execute(t.insert().values(id=9, number=-3))

# create view
createview = CreateView('viewname', t.select().where(t.c.id>5))
engine.execute(createview)

# reflect view and print result
v = Table('viewname', metadata, autoload=True)
for r in engine.execute(v.select()):
    print r

In [81]:
#confirm data is loaded

pd.read_sql_query('select * from zips', con=engine).head()

Unnamed: 0,zip,latitude,longitude,city,state,county,other_info
0,501,40.9223,-72.6371,Holtsville,NY,Suffolk,UNIQUE
1,544,40.9223,-72.6371,Holtsville,NY,Suffolk,UNIQUE
2,601,18.1653,-66.7226,Adjuntas,PR,Adjuntas,STANDARD
3,602,18.3931,-67.181,Aguada,PR,Aguada,STANDARD
4,603,18.4559,-67.1457,Aguadilla,PR,Aguadilla,STANDARD


In [None]:
#export to csv file

import csv
outfile = open('projects_zip.csv', 'wb')
outcsv = csv.writer(outfile)
records = session.query(MyModel).all()
[outcsv.writerow([getattr(curr, column.name) for column in MyTable.__mapper__.columns]) for curr in records]
# or maybe use outcsv.writerows(records)

outfile.close()

In [33]:
clean_df_1.shape

(197, 22)