# Lesson 18a: Pandas - Data Series

## Import libraries

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math as math

## Basics

In [13]:
# List:

cities = ["Paris", "Berlin", "London", "Warsaw"]
cities

['Paris', 'Berlin', 'London', 'Warsaw']

In [14]:
# Changing a list into a series (column of data):

pd.Series(cities)

# Note that the index is automatic.

0     Paris
1    Berlin
2    London
3    Warsaw
dtype: object

In [15]:
prime_numbers = (2,3,5,7,11,13,17,19,23)
pd.Series(prime_numbers)

0     2
1     3
2     5
3     7
4    11
5    13
6    17
7    19
8    23
dtype: int64

In [16]:
# Dictionary (or hash table):

Spielberg_filmography = {"Jaws": 1975, "1941": 1979, "Indiana Jones and the Raiders of the Lost Ark": 1981,
                        "E. T. the Extra-Terrestial": 1982}
pd.Series(Spielberg_filmography)

# note that there are bo indexes here, but there are labels from the dictionary, which act as indexes.

Jaws                                             1975
1941                                             1979
Indiana Jones and the Raiders of the Lost Ark    1981
E. T. the Extra-Terrestial                       1982
dtype: int64

## Atributes of Series

Atributes - the properties that a given object has and we want to extract them. 
To get it we use: series.atribute

In [17]:
cities_series = pd.Series(cities)
cities_series

0     Paris
1    Berlin
2    London
3    Warsaw
dtype: object

In [19]:
# Examples of the atributes:

cities_series.size

4

In [20]:
cities_series.is_unique

True

In [21]:
cities_series.is_monotonic

False

In [22]:
cities_series.index

RangeIndex(start=0, stop=4, step=1)

In [23]:
cities_series.values

array(['Paris', 'Berlin', 'London', 'Warsaw'], dtype=object)

## Methods on Series

Method - are used to manipulate on the object or to change it. To apply it we use: series.method()

In [25]:
# Declare the object:

monotonic_list = (1,4,22,57)
monotonic_series = pd.Series(monotonic_list)
monotonic_series

0     1
1     4
2    22
3    57
dtype: int64

In [26]:
# Examples of methods:
monotonic_series.sum()

84

In [27]:
monotonic_series.min()

1

In [28]:
monotonic_series.max()

57

In [29]:
monotonic_series.mean()

21.0

In [32]:
monotonic_series.keys()  # Returns how the index was built.

RangeIndex(start=0, stop=4, step=1)

In [33]:
monotonic_series.tolist()  # changes the object into a list.

[1, 4, 22, 57]

In [35]:
monotonic_series.add(10)  # Adds 10 to each element of the series.

0    11
1    14
2    32
3    67
dtype: int64

In [36]:
# Methods do not modify the original series but they create a copy on which they act. 
# If I want to keep a result of the method I need to define a new object:

new_series = monotonic_series.add(10)
new_series

0    11
1    14
2    32
3    67
dtype: int64

In [41]:
currencies = ["eur", "usd", "eur", "pln", "eur"]
countries = ["Germany", "USA", "Portugal", "Poland", "Frnace"]

cur_countries = pd.Series(countries,currencies) 
# In this way I created a series where currency acts as an index. Note that the order is important.
# Similarly, I can use:

cur_countries2 = pd.Series(data = countries, index = currencies)
# This is equivalent to: pd.Series(index = currencies, data = countries) - the order does not matter.

cur_countries2

eur     Germany
usd         USA
eur    Portugal
pln      Poland
eur      Frnace
dtype: object

In [42]:
# By using Shift+Tab you can get access to the parameters that a given method has.

## Choosing some data imposing a condition

### Condition - where

In [44]:
# By using a condition (with a method "where()") - in this way we work on values and not indexes.

numbers = (3,5,7,14,36)

# Note that it does not work for a list: "numbers > 10" will return error. But it works for a series:

numbers_series = pd.Series(numbers)

numbers_series > 10

0    False
1    False
2    False
3     True
4     True
dtype: bool

In [45]:
numbers_series.where(numbers_series > 10)

0     NaN
1     NaN
2     NaN
3    14.0
4    36.0
dtype: float64

In [47]:
numbers_series.where(numbers_series > 10, other = 0)

0     0
1     0
2     0
3    14
4    36
dtype: int64

In [48]:
numbers_series.where(numbers_series > 10).dropna() 
# We excluded all values that do not satisfy the condition. 
# Note that it is nicely done by attaching another method to our object.
# Note also that the original object is not changed.

3    14.0
4    36.0
dtype: float64

In [50]:
# To change the original object we can use the parameter "inplace=True".
numbers_series.where(numbers_series > 10, inplace = True)
numbers_series

0     NaN
1     NaN
2     NaN
3    14.0
4    36.0
dtype: float64

In [57]:
# and then we can drop the undefined values:
numbers_series.dropna(inplace = True)
numbers_series

# Note that although the chain rule works for methods to attach one after the other, 
# 2 methods with "inplace=True" cannot be combined.

3    14.0
4    36.0
dtype: float64

In [75]:
# To combine two conditions we have to define these conditions separately 
# and then put them in the method "where()"

n = (1,3,6,9,15,33,58)
n_series = pd.Series(n)

n1 = n_series > 10
n2 = n_series % 2 == 1

n_series.where(n1 & n2) # Note it works only when I use the operator "&" (which works on bites) and not "and".


0     NaN
1     NaN
2     NaN
3     NaN
4    15.0
5    33.0
6     NaN
dtype: float64

### Condition - between

In [83]:
n_between = n_series.between(3,12)
n_to_show = n_series.where(n_between).dropna()
n_to_show

1    3.0
2    6.0
3    9.0
dtype: float64

## Filtering

In [56]:
# Filtering - working on indexes of our data in the series.

numbers_series.filter(items=[3])

# Note that the index of the original series is kept in memory.

3    14.0
dtype: float64