<p><b>Perform the following operations using Python on the Iris data sets</b></p>
<ol>
    <li>Create data subsets for different species</li>
    <li>Merge two subsets</li>
    <li>Sort Data Petal Length</li>
    <li>Transposing Data</li>
    <li>Melting Data to long format</li>
    <li>Casting data to wide format</li>
</ol>

In [35]:
import pandas as pd

In [36]:
headers = ["sepal length(cm)", "sepal width(cm)", "petal length(cm)", "petal width(cm)", "class"]

In [33]:
df = pd.read_csv("datasets/Iris/iris.csv", names=headers, header=None)

In [34]:
df

Unnamed: 0,sepal length(cm),sepal width(cm),petal length(cm),petal width(cm),class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [9]:
df.describe()

Unnamed: 0,sepal length(cm),sepal width(cm),petal length(cm),petal width(cm),class
count,150.0,150.0,150.0,150.0,150
unique,36.0,24.0,44.0,23.0,3
top,5.0,3.0,1.5,0.2,Iris-setosa
freq,10.0,26.0,14.0,27.0,50


In [10]:
df.isna().sum()

sepal length(cm)    0
sepal width(cm)     0
petal length(cm)    0
petal width(cm)     0
class               0
dtype: int64

<h3>1. Create data subsets for different species</h3>

In [11]:
df['class'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [12]:
no_of_iris_setosa = (df['class'] == "Iris-setosa").sum()
no_of_iris_versicolor = (df['class'] == "Iris-versicolor").sum()
no_of_iris_virginica = (df['class'] == "Iris-virginica").sum()

print(f"No of Iris-setosa: {no_of_iris_setosa}")
print(f"No of Iris-versicolor: {no_of_iris_versicolor}")
print(f"No of Iris-verginica: {no_of_iris_virginica}")

No of Iris-setosa: 50
No of Iris-versicolor: 50
No of Iris-verginica: 50


In [13]:
iris_setosa_df = df[df['class'] == "Iris-setosa"]
iris_versicolor_df = df[df['class'] == "Iris-versicolor"]
iris_virginica_df = df[df['class'] == "Iris-virginica"]

<h3>2. Merge two subsets (Iris-setosa and Iris-versicolor)</h3>

In [14]:
merged_df = pd.concat([iris_setosa_df, iris_versicolor_df])

In [15]:
merged_df

Unnamed: 0,sepal length(cm),sepal width(cm),petal length(cm),petal width(cm),class
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
94,5.7,3.0,4.2,1.2,Iris-versicolor
95,5.7,2.9,4.2,1.3,Iris-versicolor
96,6.2,2.9,4.3,1.3,Iris-versicolor
97,5.1,2.5,3.0,1.1,Iris-versicolor


In [16]:
merged_df.shape

(100, 5)

<h3>3. Sort Data Petal Length</h3>

<h4>Converting data type of patel length from string to float</h4>

In [17]:
df['petal length(cm)'].dtype

dtype('O')

In [18]:
df['petal length(cm)'] =  pd.to_numeric(df['petal length(cm)'])

In [19]:
sorted_df = df.sort_values(by="petal length(cm)")

In [20]:
sorted_df

Unnamed: 0,sepal length(cm),sepal width(cm),petal length(cm),petal width(cm),class
21,4.6,3.6,1.0,0.2,Iris-setosa
12,4.3,3.0,1.1,0.1,Iris-setosa
13,5.8,4.0,1.2,0.2,Iris-setosa
34,5.0,3.2,1.2,0.2,Iris-setosa
35,5.5,3.5,1.3,0.2,Iris-setosa
...,...,...,...,...,...
130,7.9,3.8,6.4,2.0,Iris-virginica
104,7.6,3.0,6.6,2.1,Iris-virginica
121,7.7,2.8,6.7,2.0,Iris-virginica
116,7.7,3.8,6.7,2.2,Iris-virginica


<h3>4. Transposing Data</h3>

In [21]:
transposed_df = df.transpose()

In [22]:
transposed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,140,141,142,143,144,145,146,147,148,149
sepal length(cm),4.9,4.7,4.6,5.0,5.4,4.6,5.0,4.4,4.9,5.4,...,6.9,5.8,6.8,6.7,6.7,6.3,6.5,6.2,5.9,5.1
sepal width(cm),3.0,3.2,3.1,3.6,3.9,3.4,3.4,2.9,3.1,3.7,...,3.1,2.7,3.2,3.3,3.0,2.5,3.0,3.4,3.0,3.5
petal length(cm),1.4,1.3,1.5,1.4,1.7,1.4,1.5,1.4,1.5,1.5,...,5.1,5.1,5.9,5.7,5.2,5.0,5.2,5.4,5.1,1.4
petal width(cm),0.2,0.2,0.2,0.2,0.4,0.3,0.2,0.2,0.1,0.2,...,2.3,1.9,2.3,2.5,2.3,1.9,2.0,2.3,1.8,0.2
class,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa,...,Iris-virginica,Iris-virginica,Iris-virginica,Iris-virginica,Iris-virginica,Iris-virginica,Iris-virginica,Iris-virginica,Iris-virginica,Iris-setosa


<h3>5. Melting data into long format</h3>

In [23]:
melted_df = pd.melt(df, id_vars=['class'], var_name="Petal Properties", value_name="value")

In [25]:
melted_df

Unnamed: 0,class,Petal Properties,value
0,Iris-setosa,sepal length(cm),4.9
1,Iris-setosa,sepal length(cm),4.7
2,Iris-setosa,sepal length(cm),4.6
3,Iris-setosa,sepal length(cm),5.0
4,Iris-setosa,sepal length(cm),5.4
...,...,...,...
595,Iris-virginica,petal width(cm),1.9
596,Iris-virginica,petal width(cm),2.0
597,Iris-virginica,petal width(cm),2.3
598,Iris-virginica,petal width(cm),1.8


<h3>6. Casting data into long format</h3>

In [26]:
temp_df = df.iloc[[1, 51, 101], :]
temp_df

Unnamed: 0,sepal length(cm),sepal width(cm),petal length(cm),petal width(cm),class
1,4.7,3.2,1.3,0.2,Iris-setosa
51,6.9,3.1,4.9,1.5,Iris-versicolor
101,7.1,3.0,5.9,2.1,Iris-virginica


In [30]:
pivoted_df = temp_df.pivot(index="class",columns="sepal width(cm)")
pivoted_df

Unnamed: 0_level_0,sepal length(cm),sepal length(cm),sepal length(cm),petal length(cm),petal length(cm),petal length(cm),petal width(cm),petal width(cm),petal width(cm)
sepal width(cm),3.0,3.1,3.2,3.0,3.1,3.2,3.0,3.1,3.2
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Iris-setosa,,,4.7,,,1.3,,,0.2
Iris-versicolor,,6.9,,,4.9,,,1.5,
Iris-virginica,7.1,,,5.9,,,2.1,,
