# Wine

### Introduction:

This exercise is a adaptation from the UCI Wine dataset.
The only pupose is to practice deleting data with pandas.

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data). 

### Step 3. Assign it to a variable called wine

In [3]:
#!git clone https://github.com/PiotrJZielinski/ml-throwdown-datasets
#!ls  'ml-throwdown-datasets/'  
wine = pd.read_csv('../03_Grouping/ml-throwdown-datasets/wine.csv')

wine.head()

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline,Class
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065,one
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050,one
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185,one
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480,one
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735,one


### Step 4. Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns

In [4]:
wine = wine.drop(wine.columns[[0,3,6,8,11,12,13]], axis = 1)

wine.head()

Unnamed: 0,Malic acid,Ash,Magnesium,Total phenols,Nonflavanoid phenols,Color intensity,Hue
0,1.71,2.43,127,2.8,0.28,5.64,1.04
1,1.78,2.14,100,2.65,0.26,4.38,1.05
2,2.36,2.67,101,2.8,0.3,5.68,1.03
3,1.95,2.5,113,3.85,0.24,7.8,0.86
4,2.59,2.87,118,2.8,0.39,4.32,1.04


### Step 5. Assign the columns as below:

The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it):  
1) alcohol  
2) malic_acid  
3) alcalinity_of_ash  
4) magnesium  
5) flavanoids  
6) proanthocyanins  
7) hue 

In [5]:
wine.columns = ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'magnesium', 'flavanoids', 'proanthocyanins', 'hue']
wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,1.71,2.43,127,2.8,0.28,5.64,1.04
1,1.78,2.14,100,2.65,0.26,4.38,1.05
2,2.36,2.67,101,2.8,0.3,5.68,1.03
3,1.95,2.5,113,3.85,0.24,7.8,0.86
4,2.59,2.87,118,2.8,0.39,4.32,1.04


### Step 6. Set the values of the first 3 rows from alcohol as NaN

In [6]:
wine.iloc[0:3, 0] = np.nan
wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,,2.43,127,2.8,0.28,5.64,1.04
1,,2.14,100,2.65,0.26,4.38,1.05
2,,2.67,101,2.8,0.3,5.68,1.03
3,1.95,2.5,113,3.85,0.24,7.8,0.86
4,2.59,2.87,118,2.8,0.39,4.32,1.04


### Step 7. Now set the value of the rows 3 and 4 of magnesium as NaN

In [7]:
wine.iloc[2:4, 3] = np.nan
wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,,2.43,127,2.8,0.28,5.64,1.04
1,,2.14,100,2.65,0.26,4.38,1.05
2,,2.67,101,,0.3,5.68,1.03
3,1.95,2.5,113,,0.24,7.8,0.86
4,2.59,2.87,118,2.8,0.39,4.32,1.04


### Step 8. Fill the value of NaN with the number 10 in alcohol and 100 in magnesium

In [8]:
wine.alcohol.fillna(10, inplace = True)

wine.magnesium.fillna(100, inplace = True)

wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,10.0,2.43,127,2.8,0.28,5.64,1.04
1,10.0,2.14,100,2.65,0.26,4.38,1.05
2,10.0,2.67,101,100.0,0.3,5.68,1.03
3,1.95,2.5,113,100.0,0.24,7.8,0.86
4,2.59,2.87,118,2.8,0.39,4.32,1.04


### Step 9. Count the number of missing values

In [9]:
wine.isnull().sum()

alcohol              0
malic_acid           0
alcalinity_of_ash    0
magnesium            0
flavanoids           0
proanthocyanins      0
hue                  0
dtype: int64

### Step 10.  Create an array of 10 random numbers up until 10

In [10]:
random = np.random.randint(10, size = 10)
random

array([3, 1, 7, 7, 1, 7, 7, 7, 8, 5])

### Step 11.  Use random numbers you generated as an index and assign NaN value to each of cell.

In [11]:
wine.alcohol[random] = np.nan
wine.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,10.0,2.43,127,2.8,0.28,5.64,1.04
1,,2.14,100,2.65,0.26,4.38,1.05
2,10.0,2.67,101,100.0,0.3,5.68,1.03
3,,2.5,113,100.0,0.24,7.8,0.86
4,2.59,2.87,118,2.8,0.39,4.32,1.04
5,,2.45,112,3.27,0.34,6.75,1.05
6,1.87,2.45,96,2.5,0.3,5.25,1.02
7,,2.61,121,2.6,0.31,5.05,1.06
8,,2.17,97,2.8,0.29,5.2,1.08
9,1.35,2.27,98,2.98,0.22,7.22,1.01


### Step 12.  How many missing values do we have?

In [12]:
wine.isnull().sum()

alcohol              5
malic_acid           0
alcalinity_of_ash    0
magnesium            0
flavanoids           0
proanthocyanins      0
hue                  0
dtype: int64

### Step 13. Delete the rows that contain missing values

In [13]:
wine = wine.dropna(axis = 0, how = "any")
wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,10.0,2.43,127,2.8,0.28,5.64,1.04
2,10.0,2.67,101,100.0,0.3,5.68,1.03
4,2.59,2.87,118,2.8,0.39,4.32,1.04
6,1.87,2.45,96,2.5,0.3,5.25,1.02
9,1.35,2.27,98,2.98,0.22,7.22,1.01


### Step 14. Print only the non-null values in alcohol

In [14]:
mask = wine.alcohol.notnull()
mask

0      True
2      True
4      True
6      True
9      True
10     True
11     True
12     True
13     True
14     True
15     True
16     True
17     True
18     True
19     True
20     True
21     True
22     True
23     True
24     True
25     True
26     True
27     True
28     True
29     True
30     True
31     True
32     True
33     True
34     True
       ... 
148    True
149    True
150    True
151    True
152    True
153    True
154    True
155    True
156    True
157    True
158    True
159    True
160    True
161    True
162    True
163    True
164    True
165    True
166    True
167    True
168    True
169    True
170    True
171    True
172    True
173    True
174    True
175    True
176    True
177    True
Name: alcohol, Length: 173, dtype: bool

In [15]:
wine.alcohol[mask]

0      10.00
2      10.00
4       2.59
6       1.87
9       1.35
10      2.16
11      1.48
12      1.73
13      1.73
14      1.87
15      1.81
16      1.92
17      1.57
18      1.59
19      3.10
20      1.63
21      3.80
22      1.86
23      1.60
24      1.81
25      2.05
26      1.77
27      1.72
28      1.90
29      1.68
30      1.50
31      1.66
32      1.83
33      1.53
34      1.80
       ...  
148     3.24
149     3.90
150     3.12
151     2.67
152     1.90
153     3.30
154     1.29
155     5.19
156     4.12
157     3.03
158     1.68
159     1.67
160     3.83
161     3.26
162     3.27
163     3.45
164     2.76
165     4.36
166     3.70
167     3.37
168     2.58
169     4.60
170     3.03
171     2.39
172     2.51
173     5.65
174     3.91
175     4.28
176     2.59
177     4.10
Name: alcohol, Length: 173, dtype: float64

### Step 15.  Reset the index, so it starts with 0 again

In [16]:
wine = wine.reset_index(drop = True)
wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,10.0,2.43,127,2.8,0.28,5.64,1.04
1,10.0,2.67,101,100.0,0.3,5.68,1.03
2,2.59,2.87,118,2.8,0.39,4.32,1.04
3,1.87,2.45,96,2.5,0.3,5.25,1.02
4,1.35,2.27,98,2.98,0.22,7.22,1.01


### BONUS: Create your own question and answer it.