# Pandas Tutorial
![](https://pythonawesome.com/content/images/2018/05/pandas-logo.png)

In [2]:
import pandas as pd

## Creating DataFrames

In [2]:
data = pd.DataFrame(
    {
        "Car": ["BMW", "Mercedes", "Buggati", None],
        "Cargo Company": ["TCS", "DHL", "Fedex", "UPS"],
    },
    index=[1, 10, 100, 1000],
)
data

Unnamed: 0,Car,Cargo Company
1,BMW,TCS
10,Mercedes,DHL
100,Buggati,Fedex
1000,,UPS


In [3]:
data = pd.DataFrame(
    [["BMW", "TCS"], ["Mercedes", "DHL"], ["Buggati", "Fedex"], [None, "UPS"]],
    columns=["Car", "Cargo Company"],
    index=[1, 10, 100, 1000],
)
data

Unnamed: 0,Car,Cargo Company
1,BMW,TCS
10,Mercedes,DHL
100,Buggati,Fedex
1000,,UPS


## Creating Variables

In [4]:
variable = pd.Series(["Germany", "USA", "Dubai", "Austria"], name="Country")
variable

0    Germany
1        USA
2      Dubai
3    Austria
Name: Country, dtype: object

In [5]:
# Adding new variable or column to dataframe...
data["country"] = variable
data

Unnamed: 0,Car,Cargo Company,country
1,BMW,TCS,USA
10,Mercedes,DHL,
100,Buggati,Fedex,
1000,,UPS,


In [6]:
variable = pd.Series(
    ["Germany", "USA", "Dubai", "Austria"], name="Country", index=[1, 10, 100, 1000]
)
variable

1       Germany
10          USA
100       Dubai
1000    Austria
Name: Country, dtype: object

In [7]:
data["country"] = variable
data

Unnamed: 0,Car,Cargo Company,country
1,BMW,TCS,Germany
10,Mercedes,DHL,USA
100,Buggati,Fedex,Dubai
1000,,UPS,Austria


### loc vs iloc

In [8]:
data.Car.loc[0:10]

1          BMW
10    Mercedes
Name: Car, dtype: object

In [9]:
data.Car.iloc[0:10]

1            BMW
10      Mercedes
100      Buggati
1000        None
Name: Car, dtype: object

## Reading csv file

In [10]:
# writing a dataframe to csv file..
data.to_csv("car_data.csv")

In [11]:
data = pd.read_csv('winemag-data_first150k.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude


In [7]:
# Now it wouldn't allocate index because we already have
data = pd.read_csv("winemag-data_first150k.csv", index_col=0)
data.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude


## Selecting & Indexing

In [13]:
# select a column
country = data["country"]
country[:10]

0        US
1     Spain
2        US
3        US
4    France
5     Spain
6     Spain
7     Spain
8        US
9        US
Name: country, dtype: object

In [14]:
# select a specific row
country = data.loc[10]
country

country                                                    Italy
description    Elegance, complexity and structure come togeth...
designation                                   Ronco della Chiesa
points                                                        95
price                                                         80
province                                      Northeastern Italy
region_1                                                  Collio
region_2                                                     NaN
variety                                                 Friulano
winery                                          Borgo del Tiglio
Name: 10, dtype: object

In [15]:
# select a specific rows and columns
data.loc[10:15, ["country", "points"]]

Unnamed: 0,country,points
10,Italy,95
11,US,95
12,US,95
13,France,95
14,US,95
15,US,95


In [16]:
data.iloc[10:15, [0, 5]]

Unnamed: 0,country,province
10,Italy,Northeastern Italy
11,US,Oregon
12,US,Oregon
13,France,Southwest France
14,US,Oregon


In [17]:
# select specific rows of specific columns
# loc selects row index values assigned to index of dataframe
reviews = data.description.loc[20:25]
reviews

20    Heitz has made this stellar rosé from the rare...
21    Alluring, complex and powerful aromas of grill...
22    Tarry blackberry and cheesy oak aromas are app...
23    The apogee of this ambitious winery's white wi...
24    San Jose-based producer Adam Comartin heads 1,...
25    Yields were down in 2015, but intensity is up,...
Name: description, dtype: object

In [18]:
# it just selects according to the rows without considering index number
reviews = data.description.iloc[20:25]
reviews

20    Heitz has made this stellar rosé from the rare...
21    Alluring, complex and powerful aromas of grill...
22    Tarry blackberry and cheesy oak aromas are app...
23    The apogee of this ambitious winery's white wi...
24    San Jose-based producer Adam Comartin heads 1,...
Name: description, dtype: object

In [19]:
data.loc[data.price >= 1500, ["country", "points"]]

Unnamed: 0,country,points
13318,US,91
34920,France,99
34922,France,98


In [20]:
data.description[data.price >= 1500]

13318    The nose on this single-vineyard wine from a s...
34920    A big, powerful wine that sums up the richness...
34922    A massive wine for Margaux, packed with tannin...
Name: description, dtype: object

In [21]:
data.description.loc[data.price >= 1500]

13318    The nose on this single-vineyard wine from a s...
34920    A big, powerful wine that sums up the richness...
34922    A massive wine for Margaux, packed with tannin...
Name: description, dtype: object

In [22]:
data.price.loc[data.price == 4.0].count()

18

In [23]:
data.description.loc[((data.price == 4.0) & (data.country.isin(["Spain", "USA"])))]

34415     This opens with standard cherry and berry arom...
34682     Nice on the nose, this has a leafy note and a ...
102035    This opens with standard cherry and berry arom...
102332    Nice on the nose, this has a leafy note and a ...
Name: description, dtype: object

## Data Information

In [24]:
data.describe()

Unnamed: 0,points,price
count,150930.0,137235.0
mean,87.888418,33.131482
std,3.222392,36.322536
min,80.0,4.0
25%,86.0,16.0
50%,88.0,24.0
75%,90.0,40.0
max,100.0,2300.0


In [25]:
data.points.median()

88.0

In [26]:
data.points.max()

100

In [27]:
# unique values for winery column...
data.winery.unique()

array(['Heitz', 'Bodega Carmen Rodríguez', 'Macauley', ..., 'Screwed',
       'Red Bucket', 'White Knot'], dtype=object)

In [28]:
# number of unique values for winery column...
data.winery.nunique()

14810

In [29]:
len(data.winery.unique())

14810

In [30]:
data.winery.value_counts()

Williams Selyem                374
Testarossa                     274
DFJ Vinhos                     258
Chateau Ste. Michelle          225
Columbia Crest                 217
Kendall-Jackson                216
Concha y Toro                  216
Trapiche                       205
Bouchard Père & Fils           203
Kenwood                        191
De Loach                       189
Joseph Drouhin                 189
Georges Duboeuf                188
Cameron Hughes                 172
Wines & Winemakers             169
Albert Bichot                  167
Robert Mondavi                 166
Louis Latour                   154
Dry Creek Vineyard             153
Morgan                         153
D'Arenberg                     153
Concannon                      151
Martin Ray                     149
Errazuriz                      148
L'Ecole No. 41                 144
Gary Farrell                   144
Olivier Leflaive               143
Montes                         142
Waterbrook          

## Advanced Data Operations and Selection 

In [31]:
# Centering a variable around mean or making that variable normal(used in Normal Distribution)
centered_price = data["price"] - data.price.mean()
centered_price.head(5)

0    201.868518
1     76.868518
2     56.868518
3     31.868518
4     32.868518
Name: price, dtype: float64

In [32]:
# select wine with the highest points to price ratio
highest_points_price = (data.points / data.price).idxmax()
data.loc[highest_points_price]

country                                                       US
description    There's a lot going on in this Merlot, which i...
designation                                                  NaN
points                                                        86
price                                                          4
province                                              California
region_1                                              California
region_2                                        California Other
variety                                                   Merlot
winery                                                    Bandit
Name: 25645, dtype: object

In [33]:
# check reviews whose description contains some particular words like 'tropical'
selection_type = data.description.map(lambda desc: "tropical" in desc)
selection_type

0         False
1         False
2         False
3         False
4         False
5         False
6         False
7         False
8         False
9         False
10        False
11        False
12        False
13        False
14        False
15        False
16        False
17        False
18        False
19        False
20        False
21        False
22        False
23        False
24        False
25        False
26        False
27        False
28        False
29        False
          ...  
150900    False
150901    False
150902    False
150903    False
150904    False
150905    False
150906    False
150907    False
150908    False
150909    False
150910    False
150911    False
150912    False
150913    False
150914    False
150915    False
150916    False
150917    False
150918    False
150919    False
150920    False
150921    False
150922    False
150923    False
150924    False
150925    False
150926    False
150927    False
150928    False
150929    False
Name: description, Lengt

In [34]:
selection_type_index = data.description.map(lambda desc: "tropical" in desc).index
selection_type_index

Int64Index([     0,      1,      2,      3,      4,      5,      6,      7,
                 8,      9,
            ...
            150920, 150921, 150922, 150923, 150924, 150925, 150926, 150927,
            150928, 150929],
           dtype='int64', length=150930)

In [35]:
selection_type_count = data.description.map(lambda desc: "tropical" in desc).sum()
selection_type_count

4135

In [37]:
data.iloc[selection_type_index]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude
5,Spain,"Deep, dense and pure from the opening bell, th...",Numanthia,95,73.0,Northern Spain,Toro,,Tinta de Toro,Numanthia
6,Spain,Slightly gritty black-fruit aromas include a s...,San Román,95,65.0,Northern Spain,Toro,,Tinta de Toro,Maurodos
7,Spain,Lush cedary black-fruit aromas are luxe and of...,Carodorum Único Crianza,95,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
8,US,This re-named vineyard was formerly bottled as...,Silice,95,65.0,Oregon,Chehalem Mountains,Willamette Valley,Pinot Noir,Bergström
9,US,The producer sources from two blocks of the vi...,Gap's Crown Vineyard,95,60.0,California,Sonoma Coast,Sonoma,Pinot Noir,Blue Farm


In [38]:
data.iloc[selection_type_index]["description"]

0         This tremendous 100% varietal wine hails from ...
1         Ripe aromas of fig, blackberry and cassis are ...
2         Mac Watson honors the memory of a wine once ma...
3         This spent 20 months in 30% new French oak, an...
4         This is the top wine from La Bégude, named aft...
5         Deep, dense and pure from the opening bell, th...
6         Slightly gritty black-fruit aromas include a s...
7         Lush cedary black-fruit aromas are luxe and of...
8         This re-named vineyard was formerly bottled as...
9         The producer sources from two blocks of the vi...
10        Elegance, complexity and structure come togeth...
11        From 18-year-old vines, this supple well-balan...
12        A standout even in this terrific lineup of 201...
13        This wine is in peak condition. The tannins an...
14        With its sophisticated mix of mineral, acid an...
15        First made in 2006, this succulent luscious Ch...
16        This blockbuster, powerhouse o

In [39]:
# Create a series that has count of two types of wine 'Heitz' and Ponzy
# count 'Heitz'
heitz_count = data.winery.map(lambda wine: "Heitz" in wine).sum()
ponzi_count = data.winery.map(lambda wine: "Ponzi" in wine).sum()
wine_count = pd.Series([heitz_count, ponzi_count], index=["Heitz", "Ponzi"])
wine_count
# Here apply can also be used inplace of map...

Heitz     3
Ponzi    78
dtype: int64

In [40]:
heitz_count = data.winery[data.winery == "Heitz"].count()
ponzi_count = data.winery[data.winery == "Ponzi"].count()
wine_count = pd.Series([heitz_count, ponzi_count], index=["Heitz", "Ponzi"])
wine_count

Heitz     3
Ponzi    74
dtype: int64

In [41]:
temp_data = pd.DataFrame(
    {"Bank": ["Habib City Limited", "City", "United City Limited", "MEEZAN City"]}
)
temp_data

Unnamed: 0,Bank
0,Habib City Limited
1,City
2,United City Limited
3,MEEZAN City


In [42]:
temp_data.Bank.map({"City": "Bank"})

0     NaN
1    Bank
2     NaN
3     NaN
Name: Bank, dtype: object

In [43]:
def replaceFunc(row):
    return row["Bank"].replace("City", "Bank")


temp_data.apply(replaceFunc, axis=1)

0     Habib Bank Limited
1                   Bank
2    United Bank Limited
3            MEEZAN Bank
dtype: object

In [44]:
temp_data.apply(lambda row: row["Bank"].replace("City", "Bank"), axis=1)

0     Habib Bank Limited
1                   Bank
2    United Bank Limited
3            MEEZAN Bank
dtype: object

In [46]:
temp_data.Bank.apply(lambda row: row.replace("City", "Bank"))

0     Habib Bank Limited
1                   Bank
2    United Bank Limited
3            MEEZAN Bank
Name: Bank, dtype: object

## Grouping and Sorting 

In [47]:
# groupby clause create groups of rows of data based on certain column name
# in that output each group is ordered alphabateically according to the values of that column...
# .get_group('group_name') to get the group...
# .first() to get the first group

In [48]:
# It will give the size of each group...
data.groupby("country").size()

country
Albania                       2
Argentina                  5631
Australia                  4957
Austria                    3057
Bosnia and Herzegovina        4
Brazil                       25
Bulgaria                     77
Canada                      196
Chile                      5816
China                         3
Croatia                      89
Cyprus                       31
Czech Republic                6
Egypt                         3
England                       9
France                    21098
Georgia                      43
Germany                    2452
Greece                      884
Hungary                     231
India                         8
Israel                      630
Italy                     23478
Japan                         2
Lebanon                      37
Lithuania                     8
Luxembourg                    9
Macedonia                    16
Mexico                       63
Moldova                      71
Montenegro                    2


In [54]:
# Find most expensive winery of each country...(print in descending order...)
data.groupby("country").price.max().sort_index(ascending=False)

country
Uruguay                     60.0
Ukraine                     13.0
US-France                   50.0
US                        2013.0
Turkey                     120.0
Tunisia                      NaN
Switzerland                 38.0
Spain                      770.0
South Korea                 16.0
South Africa               145.0
Slovenia                    90.0
Slovakia                    16.0
Serbia                      42.0
Romania                    320.0
Portugal                   980.0
New Zealand                125.0
Morocco                     35.0
Montenegro                  10.0
Moldova                     42.0
Mexico                     108.0
Macedonia                   25.0
Luxembourg                  50.0
Lithuania                   10.0
Lebanon                     51.0
Japan                       24.0
Italy                      900.0
Israel                     150.0
India                       20.0
Hungary                    764.0
Greece                     120.0
Ge

In [55]:
# Find maximum and minimum price for a variety of wine...
data.groupby("variety").price.agg([max, min])

Unnamed: 0_level_0,max,min
variety,Unnamed: 1_level_1,Unnamed: 2_level_1
Agiorgitiko,65.0,8.0
Aglianico,130.0,6.0
Aidani,27.0,27.0
Airen,10.0,8.0
Albana,66.0,8.0
Albariño,110.0,10.0
Albarossa,40.0,40.0
Albarín,15.0,15.0
Aleatico,50.0,30.0
Alfrocheiro,40.0,11.0


In [58]:
data.sort_values(by=["price", "variety"])

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
34415,Spain,This opens with standard cherry and berry arom...,,84,4.0,Levante,Yecla,,Cabernet Sauvignon,Terrenal
102035,Spain,This opens with standard cherry and berry arom...,,84,4.0,Levante,Yecla,,Cabernet Sauvignon,Terrenal
118347,US,"Light and earthy, this wine-in-a-box is clean ...",,84,4.0,California,California,California Other,Cabernet Sauvignon,Bandit
1858,US,"Sweet and fruity, this canned wine feels soft ...",Unoaked,83,4.0,California,California,California Other,Chardonnay,Pam's Cuties
119285,US,"Packaged in a cute yellow recyclable Tetrapak,...",,84,4.0,California,California,California Other,Chardonnay,Bandit
90546,Argentina,Clean as anyone should reasonably expect given...,,85,4.0,Mendoza Province,Mendoza,,Malbec,Toca Diamonte
36716,Argentina,"Crimson in color but also translucent, with a ...",Red,84,4.0,Mendoza Province,Mendoza,,Malbec-Syrah,Broke Ass
91766,Argentina,"Crimson in color but also translucent, with a ...",Red,84,4.0,Mendoza Province,Mendoza,,Malbec-Syrah,Broke Ass
25645,US,"There's a lot going on in this Merlot, which i...",,86,4.0,California,California,California Other,Merlot,Bandit
48655,US,"There's a lot going on in this Merlot, which i...",,86,4.0,California,California,California Other,Merlot,Bandit


In [62]:
# Show average review points given to a winery...
data.groupby("winery").points.mean().sort_values(ascending=False)

winery
Sloan                           100.000000
Mascarello Giuseppe e Figlio     99.000000
Domaine des Lambrays             98.000000
Château Climens                  97.111111
Au Sommet                        97.000000
Ovid                             97.000000
Cardinale                        97.000000
Gandona                          97.000000
Château d'Yquem                  96.875000
Harlan Estate                    96.625000
Château Pétrus                   96.615385
Semper                           96.000000
Araujo                           96.000000
Screaming Eagle                  96.000000
Bryant Family                    96.000000
Tenuta dell'Ornellaia            95.878788
Clos de la Coulée de Serrant     95.750000
The Maiden                       95.750000
Salon                            95.714286
Château Léoville Barton          95.631579
Ornellaia                        95.500000
Masseto                          95.500000
Krug                             95.363636
Vieu

In [60]:
data.columns

Index(['country', 'description', 'designation', 'points', 'price', 'province',
       'region_1', 'region_2', 'variety', 'winery'],
      dtype='object')

In [67]:
# Find the no of wines most common in a country of each variety in that country...
data.groupby(["country", "variety"]).winery.count().sort_values(ascending=False)

country       variety                       
US            Pinot Noir                        10340
              Cabernet Sauvignon                 9178
              Chardonnay                         8127
France        Bordeaux-style Red Blend           4960
US            Syrah                              4274
              Zinfandel                          3789
Italy         Red Blend                          3717
US            Merlot                             3630
              Red Blend                          3352
France        Chardonnay                         2892
Italy         Sangiovese                         2722
US            Sauvignon Blanc                    2586
Portugal      Portuguese Red                     2216
Italy         Nebbiolo                           2190
Spain         Tempranillo                        2145
Argentina     Malbec                             2145
Germany       Riesling                           2094
France        Pinot Noir             

## Data Types & Missing Data 

In [80]:
data.price.dtype

dtype('float64')

In [82]:
temp = data.price.astype("float32")
temp.dtype

dtype('float32')

In [83]:
temp = temp.astype("float64")
temp.dtype

dtype('float64')

In [108]:
data[data.isna().any(axis=1)]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude
5,Spain,"Deep, dense and pure from the opening bell, th...",Numanthia,95,73.0,Northern Spain,Toro,,Tinta de Toro,Numanthia
6,Spain,Slightly gritty black-fruit aromas include a s...,San Román,95,65.0,Northern Spain,Toro,,Tinta de Toro,Maurodos
7,Spain,Lush cedary black-fruit aromas are luxe and of...,Carodorum Único Crianza,95,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
10,Italy,"Elegance, complexity and structure come togeth...",Ronco della Chiesa,95,80.0,Northeastern Italy,Collio,,Friulano,Borgo del Tiglio
13,France,This wine is in peak condition. The tannins an...,Château Montus Prestige,95,90.0,Southwest France,Madiran,,Tannat,Vignobles Brumont
17,Spain,"Nicely oaked blackberry, licorice, vanilla and...",6 Años Reserva Premium,95,80.0,Northern Spain,Ribera del Duero,,Tempranillo,Valduero
18,France,Coming from a seven-acre vineyard named after ...,Le Pigeonnier,95,290.0,Southwest France,Cahors,,Malbec,Château Lagrézette
21,Spain,"Alluring, complex and powerful aromas of grill...",Prado Enea Gran Reserva,95,79.0,Northern Spain,Rioja,,Tempranillo Blend,Muga


In [109]:
data.price.fillna(data.price.mean())

0         235.000000
1         110.000000
2          90.000000
3          65.000000
4          66.000000
5          73.000000
6          65.000000
7         110.000000
8          65.000000
9          60.000000
10         80.000000
11         48.000000
12         48.000000
13         90.000000
14        185.000000
15         90.000000
16        325.000000
17         80.000000
18        290.000000
19         75.000000
20         24.000000
21         79.000000
22        220.000000
23         60.000000
24         45.000000
25         57.000000
26         62.000000
27        105.000000
28         60.000000
29         60.000000
             ...    
150900     13.000000
150901     12.000000
150902     10.000000
150903      7.000000
150904     10.000000
150905     13.000000
150906     65.000000
150907     52.000000
150908     65.000000
150909     52.000000
150910     38.000000
150911     37.000000
150912     65.000000
150913     30.000000
150914     25.000000
150915     30.000000
150916     65

In [113]:
data.region_2.fillna("Not a null")

0                            Napa
1                      Not a null
2                          Sonoma
3               Willamette Valley
4                      Not a null
5                      Not a null
6                      Not a null
7                      Not a null
8               Willamette Valley
9                          Sonoma
10                     Not a null
11              Willamette Valley
12              Willamette Valley
13                     Not a null
14              Willamette Valley
15              Willamette Valley
16                           Napa
17                     Not a null
18                     Not a null
19                         Sonoma
20                           Napa
21                     Not a null
22                     Not a null
23                  Central Coast
24                  Central Coast
25                     Not a null
26                     Not a null
27              Willamette Valley
28                  Central Coast
29            

In [114]:
data.price.replace(4.0, 4.005)

0         235.0
1         110.0
2          90.0
3          65.0
4          66.0
5          73.0
6          65.0
7         110.0
8          65.0
9          60.0
10         80.0
11         48.0
12         48.0
13         90.0
14        185.0
15         90.0
16        325.0
17         80.0
18        290.0
19         75.0
20         24.0
21         79.0
22        220.0
23         60.0
24         45.0
25         57.0
26         62.0
27        105.0
28         60.0
29         60.0
          ...  
150900     13.0
150901     12.0
150902     10.0
150903      7.0
150904     10.0
150905     13.0
150906     65.0
150907     52.0
150908     65.0
150909     52.0
150910     38.0
150911     37.0
150912     65.0
150913     30.0
150914     25.0
150915     30.0
150916     65.0
150917     30.0
150918     38.0
150919     37.0
150920     19.0
150921     38.0
150922      NaN
150923     30.0
150924     70.0
150925     20.0
150926     27.0
150927     20.0
150928     52.0
150929     15.0
Name: price, Length: 150

In [3]:
data = pd.DataFrame(
    [["BMW", "TCS"], ["Mercedes", "DHL"], ["Buggati", "Fedex"], [None, "UPS"]],
    columns=["Car", "Cargo Company"],
    index=[1, 10, 100, 1000],
)
data

Unnamed: 0,Car,Cargo Company
1,BMW,TCS
10,Mercedes,DHL
100,Buggati,Fedex
1000,,UPS


In [4]:
data.drop(["Car"], axis=1)

Unnamed: 0,Cargo Company
1,TCS
10,DHL
100,Fedex
1000,UPS


In [5]:
data.drop_duplicates()

Unnamed: 0,Car,Cargo Company
1,BMW,TCS
10,Mercedes,DHL
100,Buggati,Fedex
1000,,UPS


In [15]:
data.columns

Index(['country', 'description', 'designation', 'points', 'price', 'province',
       'region_1', 'region_2', 'variety', 'winery'],
      dtype='object')

In [18]:
data1 = pd.DataFrame([data["points"], data["price"]]).transpose()

In [19]:
data1.head()

Unnamed: 0,points,price
0,96.0,235.0
1,96.0,110.0
2,96.0,90.0
3,96.0,65.0
4,95.0,66.0
