# Tabular Data with Pandas

New data structures
* Series
* DataFrame

A Series is like a list/dict hybrid, with:
* something like a list index, called an "integer position"
* something like a dict key, called an "index"

"i" => integer position

In [1]:
import pandas
pandas.Series

pandas.core.series.Series

In [2]:
import pandas as pd
pd.Series

pandas.core.series.Series

In [4]:
s = pd.Series({"w":9, "x":8, "y":7})
s

w    9
x    8
y    7
dtype: int64

In [5]:
s["z"] = 6
s

w    9
x    8
y    7
z    6
dtype: int64

In [6]:
dict(s)

{'w': 9, 'x': 8, 'y': 7, 'z': 6}

In [7]:
list(s)

[9, 8, 7, 6]

In [8]:
s.at["y"] # use index (loc is the more complete version)

7

In [10]:
s.iat[2] # int pos (iloc is the more complete version)

7

In [14]:
s.iloc[:2] # iloc slice ends are EXCLUSIVE

w    9
x    8
dtype: int64

In [16]:
s.loc[:"y"] # loc slice ends are INCLUSIVE

w    9
x    8
y    7
dtype: int64

In [17]:
s

w    9
x    8
y    7
z    6
dtype: int64

In [18]:
s[["w", "z"]]

w    9
z    6
dtype: int64

In [19]:
t = pd.Series([9,8,7,6])
t

0    9
1    8
2    7
3    6
dtype: int64

In [23]:
t.loc[1]

8

In [22]:
t.iloc[1]

8

In [24]:
u = t.iloc[2:]
u

2    7
3    6
dtype: int64

In [32]:
# u[-1] # is it an int pos or an index?  pandas guesses wrong when the index contains ints

In [34]:
u.iat[-1]

6

In [33]:
s[-1]

6

# Element-wise Operations

In [35]:
s = pd.Series({"w":9, "x":8, "y":7, "z":6})
t = pd.Series({"x":100, "y":0})

In [37]:
s

w    9
x    8
y    7
z    6
dtype: int64

In [38]:
t

x    100
y      0
dtype: int64

In [41]:
s + t # do element-wise addition

w      NaN
x    108.0
y      7.0
z      NaN
dtype: float64

In [42]:
[1, 2, 3] + [7, 8, 9] # sequence concatenation

[1, 2, 3, 7, 8, 9]

In [43]:
s * t

w      NaN
x    800.0
y      0.0
z      NaN
dtype: float64

In [45]:
s / 10

w    0.9
x    0.8
y    0.7
z    0.6
dtype: float64

# Boolean Indexing (aka Fancy Indexing)

In [46]:
s = pd.Series([-3, 5, 2, 10, 8, -9])
s

0    -3
1     5
2     2
3    10
4     8
5    -9
dtype: int64

In [49]:
s.loc[[2, 4, 5]]

2    2
4    8
5   -9
dtype: int64

In [50]:
b = pd.Series([True, True, False, False, True, True])
b

0     True
1     True
2    False
3    False
4     True
5     True
dtype: bool

In [51]:
s[b]

0   -3
1    5
4    8
5   -9
dtype: int64

In [54]:
b = s > 0
b

0    False
1     True
2     True
3     True
4     True
5    False
dtype: bool

In [55]:
s[b]

1     5
2     2
3    10
4     8
dtype: int64

In [57]:
s[s > 0] # give me all the positive numbers

1     5
2     2
3    10
4     8
dtype: int64

In [58]:
s[s < 0]

0   -3
5   -9
dtype: int64

In [62]:
x = 6
x % 2 == 1

False

In [67]:
s[s % 2 == 1] # give me all the odd numbers

0   -3
1    5
5   -9
dtype: int64

In [74]:
# get all words that are completely upper cases
t = pd.Series(["APPLE", "banana", "Cat", "DOG", "elephant"])
t

0       APPLE
1      banana
2         Cat
3         DOG
4    elephant
dtype: object

In [73]:
t.str.upper()

0       APPLE
1      BANANA
2         CAT
3         DOG
4    ELEPHANT
dtype: object

In [76]:
t[t == t.str.upper()]

0    APPLE
3      DOG
dtype: object

In [77]:
t[t == t.str.lower()]

1      banana
4    elephant
dtype: object

In [79]:
# want all values between 1 and 6
s

0    -3
1     5
2     2
3    10
4     8
5    -9
dtype: int64

In [80]:
s[s >= 1]

1     5
2     2
3    10
4     8
dtype: int64

In [82]:
s[s <= 6]

0   -3
1    5
2    2
5   -9
dtype: int64

In [86]:
s[(s >= 1) & (s <= 6)] # & is "and" for pandas

1    5
2    2
dtype: int64

In [89]:
# want all values less than 1, or greater than 6
s[(s < 1) | (s > 6)] # | is "or" for pandas

0    -3
3    10
4     8
5    -9
dtype: int64