## 1. Download the Iris data set from the UCI Machine Learning Repository.

In [1]:
!ls

bezdekIris.data		 Index	    iris.names
data-slicing-iris.ipynb  iris.data  iris.zip


In [2]:
!cat iris.names

1. Title: Iris Plants Database
	Updated Sept 21 by C.Blake - Added discrepency information

2. Sources:
     (a) Creator: R.A. Fisher
     (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
     (c) Date: July, 1988

3. Past Usage:
   - Publications: too many to mention!!!  Here are a few.
   1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
      Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
      to Mathematical Statistics" (John Wiley, NY, 1950).
   2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
      (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
   3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
      Structure and Classification Rule for Recognition in Partially Exposed
      Environments".  IEEE Transactions on Pattern Analysis and Machine
      Intelligence, Vol. PAMI-2, No. 1, 67-71.
      -- Results:
         -- very low misclassifica

## 2. Load the data using Pandas and then write a function that outputs the descriptive stats for each numeric feature while the categorical variable is held fixed.

## 3. Run this function for each of the four numeric variables in the Iris data set.

In [3]:
import pandas as pd

# According with the doc
columns = [ 'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'] 

df = pd.read_csv('iris.data', names=columns)
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
df['class'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [5]:
df[df['class'] == 'Iris-versicolor'].drop(columns=['class']).mean()

sepal_length    5.936
sepal_width     2.770
petal_length    4.260
petal_width     1.326
dtype: float64

In [6]:
descriptive_stats_iris = {}

for cl in df['class'].unique():
    class_df = df[df['class'] == cl].drop(columns=['class'])
    descriptive_stats_iris[cl] = {'mean':class_df.mean(), 'stddev':class_df.std()}

print(descriptive_stats_iris)

{'Iris-setosa': {'mean': sepal_length    5.006
sepal_width     3.418
petal_length    1.464
petal_width     0.244
dtype: float64, 'stddev': sepal_length    0.352490
sepal_width     0.381024
petal_length    0.173511
petal_width     0.107210
dtype: float64}, 'Iris-versicolor': {'mean': sepal_length    5.936
sepal_width     2.770
petal_length    4.260
petal_width     1.326
dtype: float64, 'stddev': sepal_length    0.516171
sepal_width     0.313798
petal_length    0.469911
petal_width     0.197753
dtype: float64}, 'Iris-virginica': {'mean': sepal_length    6.588
sepal_width     2.974
petal_length    5.552
petal_width     2.026
dtype: float64, 'stddev': sepal_length    0.635880
sepal_width     0.322497
petal_length    0.551895
petal_width     0.274650
dtype: float64}}


In [7]:
def slice_iris(df, feature):
    """ Function for calculating descriptive stats on slices of the Iris dataset."""
    for cls in df["class"].unique():
        df_temp = df[df["class"] == cls]
        mean = df_temp[feature].mean()
        stddev = df_temp[feature].std()
        print(f"Class: {cls}")
        print(f"{feature} mean: {mean:.4f}")
        print(f"{feature} stddev: {stddev:.4f}")
    print()


slice_iris(df, "sepal_length")
slice_iris(df, "sepal_width")
slice_iris(df, "petal_length")
slice_iris(df, "petal_width")

Class: Iris-setosa
sepal_length mean: 5.0060
sepal_length stddev: 0.3525
Class: Iris-versicolor
sepal_length mean: 5.9360
sepal_length stddev: 0.5162
Class: Iris-virginica
sepal_length mean: 6.5880
sepal_length stddev: 0.6359

Class: Iris-setosa
sepal_width mean: 3.4180
sepal_width stddev: 0.3810
Class: Iris-versicolor
sepal_width mean: 2.7700
sepal_width stddev: 0.3138
Class: Iris-virginica
sepal_width mean: 2.9740
sepal_width stddev: 0.3225

Class: Iris-setosa
petal_length mean: 1.4640
petal_length stddev: 0.1735
Class: Iris-versicolor
petal_length mean: 4.2600
petal_length stddev: 0.4699
Class: Iris-virginica
petal_length mean: 5.5520
petal_length stddev: 0.5519

Class: Iris-setosa
petal_width mean: 0.2440
petal_width stddev: 0.1072
Class: Iris-versicolor
petal_width mean: 1.3260
petal_width stddev: 0.1978
Class: Iris-virginica
petal_width mean: 2.0260
petal_width stddev: 0.2747

