# Classification Exercises

### 1. In a jupyter notebook, classification_exercises.ipynb, use a python module (pydata or seaborn datasets) containing datasets as a source from the iris data. Create a pandas dataframe, df_iris, from this data.

- print the first 3 rows
- print the number of rows and columns (shape)
- print the column names
- print the data type of each column
- print the summary statistics for each of the numeric variables. 
    
   Would you recommend rescaling the data based on these statistics?

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df_iris = sns.load_dataset('iris')

In [None]:
type(df_iris)

In [None]:
#print the first 3 rows
df_iris.head(3) 

In [None]:
#print the number of rows and columns (shape)
df_iris.shape

In [None]:
#print the column names
df_iris.columns

In [None]:
#print the data type of each column
df_iris.dtypes

In [None]:
#print the summary statistics for each of the numeric variables.
df_iris.describe()

### Would you recommend rescaling the data based on these statistics?

### -Yes

### 2. Read the Table1_CustDetails table from the Excel_Exercises.xlsx file into a dataframe named df_excel.

- assign the first 100 rows to a new dataframe, df_excel_sample
- print the number of rows of your original dataframe
- print the first 5 column names
- print the column names that have a data type of object
- compute the range for each of the numeric variables.

In [None]:
 df_excel = pd.read_excel('Table1_CustDetails.xlsx', sheet_name='Table1_CustDetails')
    

In [None]:
 df_excel.head()

In [None]:
df_excel.shape

In [None]:
df_excel_sample = df_excel.head(100)

In [None]:
df_excel_sample.shape

In [None]:
df_excel_sample.columns[0:5]

In [None]:
df_excel_sample.dtypes

In [None]:
numerics = df_excel_sample.select_dtypes(include=['float64', 'int64'])

In [None]:
numerics.max() - numerics.min()

In [None]:
df_cols_object_type_bool = df_excel_sample.dtypes == 'object'
df_cols_object_type_bool

In [None]:
object_type_cols = df_excel_sample.dtypes[df_cols_object_type_bool]
object_type_cols

In [None]:
range_monthly_charges = df_excel.monthly_charges.max() - df_excel.monthly_charges.min()
range_monthly_charges

In [None]:
range_total_charges = df_excel.total_charges.max() - df_excel.total_charges.min()
range_total_charges

In [None]:
range_avg_monthly_charges = df_excel.avg_monthly_charge.max() - df_excel.avg_monthly_charge.min()
range_avg_monthly_charges

In [None]:
range_tenure = df_excel.tenure.max() - df_excel.tenure.min()
range_tenure

In [None]:
url = "https://gist.githubusercontent.com/ryanorsinger/3fce5a65b5fb8ab728af5192c7de857e/raw/a0422b7b73749842611742a1064e99088a47917d/clean_telco.csv"
df_excel = pd.read_csv(url, index_col="id")

# 3. Read the data from this google sheet into a dataframe, df_google

- print the first 3 rows
- print the number of rows and columns
- print the column names
- print the data type of each column
- print the summary statistics for each of the numeric variables
- print the unique values for each of your categorical variables

In [None]:
sheet_url = "https://docs.google.com/spreadsheets/d/1Uhtml8KY19LILuZsrDtlsHHDC9wuDGUSe8LTEwvdI5g/edit?usp=sharing"

csv_export_url = sheet_url.replace('/edit?usp=sharing', '/export?format=csv')

df_google = pd.read_csv(csv_export_url)
df_google.head(3)

In [None]:
df_google.shape

In [None]:
df_google.columns.tolist()

In [None]:
df_google.dtypes

In [None]:
df_google.describe().T

In [None]:
#print the unique values for each of your categorical variables

for column in df_google.select_dtypes(include = 'object').columns:
        print(f"Values in the {column} column:")
        print(df_google[column].value_counts())
        print('----')
        print ()

In [None]:
df_google.Survived.unique()

In [None]:
df_google.Pclass.unique()

In [None]:
df_google.Sex.unique()

In [None]:
df_google.SibSp.unique()

In [None]:
df_google.Parch.unique()

In [None]:
df_google.Cabin.unique()

In [None]:
df_google.Embarked.unique()

In [None]:
def get_db_url(host, user, password, database):
        
    url = f'mysql+pymysql://{user}:{password}@{host}/{database}'
    
    return url

def get_titanic_data():
    from env import host, user, password

    database = 'titanic_db'

    query = "SELECT * FROM passengers;"

    df = pd.read_sql(query, get_db_url(host,user, password, 'titanic_db'))

    return df

In [None]:
df = get_titanic_data()

In [None]:
df

In [None]:
def get_iris_data():
    from env import host, user, password
       
    database = 'iris_db'

    query = "SELECT * FROM species;"

    df = pd.read_sql(query, get_db_url(host,user, password, 'iris_db'))

    return df

In [None]:
df_iris = get_iris_data()
df_iris

# Data Preparation Exercises

### 1. Use the function defined in acquire.py to load the iris data.

In [None]:
import pandas as pd
import acquire 

df_iris = acquire.get_iris_data()
df_iris

### 2. Drop the species_id and measurement_id columns.

In [None]:
df_iris = df_iris.drop(columns = ['species_id', 'measurement_id', 'Unnamed: 0'])

In [None]:
df_iris

### 3. Rename the species_name column to just species.

In [None]:
df_iris = df_iris.rename(columns={"species_name": "species"})

In [None]:
df_iris

### 4. Create dummy variables of the species name.

In [None]:
df_dummy = pd.get_dummies(df_iris[['species']])
df_iris = pd.concat([df_iris, df_dummy], axis = 1)
df_iris.head()


### 5. Create a function named prep_iris that accepts the untransformed iris data, and returns the data with the transformations above applied.

In [None]:
def prep_iris(df):
    
    import pandas as pd
    import acquire 

    df_iris = acquire.get_iris_data()
    
    df_iris = df_iris.drop(columns = ['species_id', 'measurement_id', 'Unnamed: 0'])
    
    df_iris = df_iris.rename(columns={"species_name": "species"})
    
    df_dummy = pd.get_dummies(df_iris[['species']])
    
    return pd.concat([df_iris, df_dummy], axis = 1)
    

In [1]:
import prepare

In [2]:
prepare.prep_iris()

Unnamed: 0,species,sepal_length,sepal_width,petal_length,petal_width,species_setosa,species_versicolor,species_virginica
0,setosa,5.1,3.5,1.4,0.2,1,0,0
1,setosa,4.9,3.0,1.4,0.2,1,0,0
2,setosa,4.7,3.2,1.3,0.2,1,0,0
3,setosa,4.6,3.1,1.5,0.2,1,0,0
4,setosa,5.0,3.6,1.4,0.2,1,0,0
...,...,...,...,...,...,...,...,...
145,virginica,6.7,3.0,5.2,2.3,0,0,1
146,virginica,6.3,2.5,5.0,1.9,0,0,1
147,virginica,6.5,3.0,5.2,2.0,0,0,1
148,virginica,6.2,3.4,5.4,2.3,0,0,1
