<a href="https://colab.research.google.com/github/uninstallit/ati580_final_project/blob/edvin-1/ati580_vis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Visualization**

In [None]:
pip install dnspython

In [22]:
import plotly.graph_objects as go

class Histogram(object):

  def __init__(self, title_text, xaxis_text):
    self._title_text = title_text
    self._xaxis_text = xaxis_text

    self._fig = go.Figure()
    self._trace_count = 0
    self._orange = '#FF8C00'
    self._blue   = '#0000FF'

  def _get_color(self):
    if self._trace_count % 2 == 1:
      return self._orange
    if self._trace_count % 2 == 0:
      return self._blue

  def add_trace(self, data=[], label=""):
    if len(data)!=0:
      self._trace_count = self._trace_count + 1
      self._fig.add_trace(go.Histogram(
          x=data,
          # histnorm='probability density',
          name=label,
          marker_color=self._get_color(),
          opacity=1
          ))
      self.update_layout()
        
  def update_layout(self):
    self._fig.update_layout(
        barmode='stack',
        title=dict({
            'text': '<b>' + self._title_text + '</b>',
            'y':0.85,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font':dict({
                'color':"black",
                'size':14})}),
        xaxis_title_text=self._xaxis_text, 
        yaxis_title_text="Count",
        )
    
  def show_figure(self):
    self._fig.show()
      

## **Visualziation and Hypothesis Testing**

In [21]:
import pandas as pd
import numpy as np
import pymongo

# connect to database
mdb_client = pymongo.MongoClient("mongodb+srv://mdbUser:ati580@ati580-cluster.s5t5z.gcp.mongodb.net/POLICE_DATABASE?retryWrites=true&w=majority")
mdb_database   = mdb_client['POLICE_DATABASE'] 
mdb_collection = mdb_database['POLICE_INTERVIEWS']
mdb_client.testdb

# convert queries to numpy array
def query_to_numpy(filter, projection):
  cursor      = mdb_collection.find(filter, projection)
  dataframe   = pd.DataFrame(list(cursor))
  numpy_array = np.transpose(np.squeeze(dataframe.to_numpy()))
  return numpy_array

# create research queries
age_vs_chevy = query_to_numpy({
    "$and":[{"SubjectAge":{"$exists": True}}, 
            {"SubjectAge":{"$ne": ""}},
            {"SubjectAge":{"$ne": None}},
            {"SubjectAge":{"$ne": 0}},
            {"VehicleMake":"CHEVROLET"} ]}, 
            {"_id":0, "SubjectAge" : 1})

age_vs_ford = query_to_numpy({
    "$and":[{"SubjectAge":{"$exists": True}}, 
            {"SubjectAge":{"$ne": ""}},
            {"SubjectAge":{"$ne": None}},
            {"SubjectAge":{"$ne": 0}},
            {"VehicleMake":"FORD"} ]}, 
            {"_id":0, "SubjectAge" : 1})

# plot numerical 
hist = Histogram("age vs make", "Age")
hist.add_trace(data=age_vs_chevy, label="CHEVY")
hist.add_trace(data=age_vs_ford, label="FORD")
hist.show_figure()

vehicle_color = query_to_numpy({
    "$and":[{"VehicleColor":{"$exists": True}}, 
            {"VehicleColor":{"$ne": ""}},
            {"VehicleColor":{"$ne": None}},
            {"VehicleColor":{"$ne": 0}}, ]}, 
            {"_id":0, "VehicleColor" : 1})

# plot categorical
hist2 = Histogram("vehicle color", "Color")
hist2.add_trace(data=vehicle_color, label="Color")
hist2.show_figure()
