URL: https://www.youtube.com/watch?v=1uVWjdAbgBg

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('max_columns', 50)
%matplotlib inline

In [3]:
s = pd.Series([7, 'Heisenberg', 3.14, -1789710578, 'Happy Eating!'])
s

0                7
1       Heisenberg
2             3.14
3      -1789710578
4    Happy Eating!
dtype: object

In [5]:
s = pd.Series([7, 'Heisenberg', 3.14, -1789710578, 'Happy Eating!'],
              index=['A', 'Z', 'C', 'Y', 'E'])
s

A                7
Z       Heisenberg
C             3.14
Y      -1789710578
E    Happy Eating!
dtype: object

In [7]:
d = {'Chicago': 1000, 'New York': 1300, 'Portland': 900, 'San Francisco': 1100,
     'Austin': 450, 'Boston': None}
cities = pd.Series(d)
cities

Austin            450.0
Boston              NaN
Chicago          1000.0
New York         1300.0
Portland          900.0
San Francisco    1100.0
dtype: float64

In [8]:
cities['Chicago']

1000.0

In [9]:
cities[['Chicago', 'Portland', 'San Francisco']]

Chicago          1000.0
Portland          900.0
San Francisco    1100.0
dtype: float64

In [10]:
cities[cities < 1000]

Austin      450.0
Portland    900.0
dtype: float64

In [11]:
cities < 1000

Austin            True
Boston           False
Chicago          False
New York         False
Portland          True
San Francisco    False
dtype: bool

In [13]:
less_than_1000 = cities < 1000
print(less_than_1000)
print('\n')
print(cities[less_than_1000])

Austin            True
Boston           False
Chicago          False
New York         False
Portland          True
San Francisco    False
dtype: bool


Austin      450.0
Portland    900.0
dtype: float64


In [14]:
print('Old value:', cities['Chicago'])
cities['Chicago'] = 1400
print('New value:', cities['Chicago'])

Old value: 1000.0
New value: 1400.0


In [16]:
print(cities[cities < 1000])
print('\n')
cities[cities < 1000] = 750

print (cities[cities < 1000])

Austin      450.0
Portland    900.0
dtype: float64


Austin      750.0
Portland    750.0
dtype: float64


In [17]:
print('Seattle' in cities)
print('San Francisco' in cities)

False
True


In [20]:
print(cities)
print(cities / 3)

Austin            750.0
Boston              NaN
Chicago          1400.0
New York         1300.0
Portland          750.0
San Francisco    1100.0
dtype: float64
Austin           250.000000
Boston                  NaN
Chicago          466.666667
New York         433.333333
Portland         250.000000
San Francisco    366.666667
dtype: float64


In [21]:
np.square(cities)

Austin            562500.0
Boston                 NaN
Chicago          1960000.0
New York         1690000.0
Portland          562500.0
San Francisco    1210000.0
dtype: float64

In [22]:
print(cities[['Chicago', 'New York', 'Portland']])
print('\n')
print(cities[['Austin', 'New York']])
print('\n')
print(cities[['Chicago', 'New York', 'Portland']] + cities[['Austin', 'New York']])

Chicago     1400.0
New York    1300.0
Portland     750.0
dtype: float64


Austin       750.0
New York    1300.0
dtype: float64


Austin         NaN
Chicago        NaN
New York    2600.0
Portland       NaN
dtype: float64


In [23]:
cities.notnull()

Austin            True
Boston           False
Chicago           True
New York          True
Portland          True
San Francisco     True
dtype: bool

In [24]:
print(cities.isnull())
print('\n')
print(cities[cities.isnull()])

Austin           False
Boston            True
Chicago          False
New York         False
Portland         False
San Francisco    False
dtype: bool


Boston   NaN
dtype: float64


In [25]:
data = {'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
        'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions', 'Lions', 'Lions'],
        'wins': [11, 8, 10, 15, 11, 6, 10, 4],
        'losses': [5, 8, 6, 1, 5, 10, 6, 12]}
football = pd.DataFrame(data, columns=['year', 'team', 'wins', 'losses'])
football

Unnamed: 0,year,team,wins,losses
0,2010,Bears,11,5
1,2011,Bears,8,8
2,2012,Bears,10,6
3,2011,Packers,15,1
4,2012,Packers,11,5
5,2010,Lions,6,10
6,2011,Lions,10,6
7,2012,Lions,4,12


In [26]:
!head -n 5 data/mariano-rivera.csv

Year,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Awards
1995,25,NYY,AL,5,3,0.625,5.51,19,10,2,0,0,0,67,71,43,41,11,30,0,51,2,1,0,301,84,5.15,1.507,9.5,1.5,4,6.9,1.7,
1996,26,NYY,AL,8,3,0.727,2.09,61,0,14,0,0,5,107.2,73,25,25,1,34,3,130,2,0,1,425,240,1.88,0.994,6.1,0.1,2.8,10.9,3.82,CYA-3MVP-12
1997,27,NYY,AL,6,4,0.6,1.88,66,0,56,0,0,43,71.2,65,17,15,5,20,6,68,0,0,2,301,239,2.96,1.186,8.2,0.6,2.5,8.5,3.4,ASMVP-25
1998,28,NYY,AL,3,0,1,1.91,54,0,49,0,0,36,61.1,48,13,13,3,17,1,36,1,0,0,246,233,3.48,1.06,7,0.4,2.5,5.3,2.12,


In [27]:
from_csv = pd.read_csv('data/mariano-rivera.csv')
from_csv.head()

Unnamed: 0,Year,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Awards
0,1995,25,NYY,AL,5,3,0.625,5.51,19,10,2,0,0,0,67.0,71,43,41,11,30,0,51,2,1,0,301,84,5.15,1.507,9.5,1.5,4.0,6.9,1.7,
1,1996,26,NYY,AL,8,3,0.727,2.09,61,0,14,0,0,5,107.2,73,25,25,1,34,3,130,2,0,1,425,240,1.88,0.994,6.1,0.1,2.8,10.9,3.82,CYA-3MVP-12
2,1997,27,NYY,AL,6,4,0.6,1.88,66,0,56,0,0,43,71.2,65,17,15,5,20,6,68,0,0,2,301,239,2.96,1.186,8.2,0.6,2.5,8.5,3.4,ASMVP-25
3,1998,28,NYY,AL,3,0,1.0,1.91,54,0,49,0,0,36,61.1,48,13,13,3,17,1,36,1,0,0,246,233,3.48,1.06,7.0,0.4,2.5,5.3,2.12,
4,1999,29,NYY,AL,4,3,0.571,1.83,66,0,63,0,0,45,69.0,43,15,14,2,18,3,52,3,1,2,268,257,2.92,0.884,5.6,0.3,2.3,6.8,2.89,ASCYA-3MVP-14


In [31]:
hank = pd.read_clipboard()
hank

Unnamed: 0,Year,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos,Awards
0,1954,20,MLN,NL,122,509,468,58,131,27,6,13,69,2,2,28,39,0.28,0.322,0.447,0.769,104,209,13,3,6,4,,*79,RoY-4
1,1955 ★,21,MLN,NL,153,665,602,105,189,37,9,27,106,3,1,49,61,0.314,0.366,0.54,0.906,141,325,20,3,7,4,5.0,*974,"AS,MVP-9"
2,1956 ★,22,MLN,NL,153,660,609,106,200,34,14,26,92,2,4,37,54,0.328,0.365,0.558,0.923,151,340,21,2,5,7,6.0,*9,"AS,MVP-3"
3,1957 ★,23,MLN,NL,151,675,615,118,198,27,6,44,132,1,1,57,58,0.322,0.378,0.6,0.978,166,369,13,0,0,3,15.0,*98,"AS,MVP-1"
4,1958 ★,24,MLN,NL,153,664,601,109,196,34,4,30,95,4,1,59,49,0.326,0.386,0.546,0.931,152,328,21,1,0,3,16.0,*98,"AS,MVP-3,GG"
5,1959 ★,25,MLN,NL,154,693,629,116,223,46,7,39,123,8,0,51,54,0.355,0.401,0.636,1.037,182,400,19,4,0,9,17.0,*98/5,"AS,MVP-3,GG"
6,1960 ★,26,MLN,NL,153,664,590,102,172,20,11,40,126,16,7,60,63,0.292,0.352,0.566,0.919,156,334,8,2,0,12,13.0,*9/84,"AS,MVP-11,GG"
7,1961 ★,27,MLN,NL,155,671,603,115,197,39,10,34,120,21,9,56,64,0.327,0.381,0.594,0.974,163,358,16,2,1,9,20.0,*89/5,"AS,MVP-8"
8,1962 ★,28,MLN,NL,156,667,592,127,191,28,6,45,128,15,7,66,73,0.323,0.39,0.618,1.008,170,366,14,3,0,6,14.0,*89/3,"AS,MVP-6"
9,1963 ★,29,MLN,NL,161,714,631,121,201,29,4,44,130,31,5,78,94,0.319,0.391,0.586,0.977,179,370,11,0,0,5,18.0,*9,"AS,MVP-3"


In [32]:
url = 'https://raw.github.com/gjreda/best-sandwiches/master/data/best-sandwiches-geocode.tsv'

# fetch the text from the URL and read it into a DataFrame
from_url = pd.read_table(url, sep='\t')
from_url.head(3)

Unnamed: 0,rank,sandwich,restaurant,description,price,address,city,phone,website,full_address,formatted_address,lat,lng
0,1,BLT,Old Oak Tap,The B is applewood smoked&mdash;nice and snapp...,$10,2109 W. Chicago Ave.,Chicago,773-772-0406,theoldoaktap.com,"2109 W. Chicago Ave., Chicago","2109 West Chicago Avenue, Chicago, IL 60622, USA",41.895734,-87.67996
1,2,Fried Bologna,Au Cheval,Thought your bologna-eating days had retired w...,$9,800 W. Randolph St.,Chicago,312-929-4580,aucheval.tumblr.com,"800 W. Randolph St., Chicago","800 West Randolph Street, Chicago, IL 60607, USA",41.884672,-87.647754
2,3,Woodland Mushroom,Xoco,Leave it to Rick Bayless and crew to come up w...,$9.50.,445 N. Clark St.,Chicago,312-334-3688,rickbayless.com,"445 N. Clark St., Chicago","445 North Clark Street, Chicago, IL 60654, USA",41.890602,-87.630925
