# Learning the Pandas Library

In [1]:
import pandas as pd
pd.__version__

'1.4.4'

## Series

In [None]:
# pandas Series are 1D indexed arrays of data
mySeries = pd.Series([0, 1, 2, 4.2, "hi"])

print("mySeries:", mySeries, end="\n\n")

print("values:", mySeries.values, end="\n\n")

print("index:", mySeries.index)
print("index-start:", mySeries.index.start, end="\n\n")

print("access index 0:", mySeries[0], end="\n\n")

print("access range 0-1:", mySeries[0:2].values, end="\n\n")

print("reverse mySeries:", mySeries[::-1].values, end="\n\n")

## we can explicitly list indices, so they can be thought of as a python dictionary
mySeries = pd.Series([0, 1, 2, 4.2, "hi"], index=[2, 4, 6, 8, 'a'])

print("explicitly indexed:", mySeries)

print("access index 'a':", mySeries['a'])

## DataFrames

In [11]:
# DataFrames are two dimensional arrays, with flexible columns and rows

ice_cream_price_dict = {'Chocolate': 2.4, 'Vanilla': 2.1, 'Coffee': 2.6, 'Coconut': 1.8}
ice_cream_prices = pd.Series(ice_cream_price_dict)

ice_cream_popularity_dict = {'Chocolate': 82, 'Vanilla': 94, 'Coffee': 71, 'Coconut': 57}
ice_cream_popularities = pd.Series(ice_cream_popularity_dict)

ice_cream = pd.DataFrame({'price': ice_cream_prices, 'popularity': ice_cream_popularities})

print(ice_cream, end="\n\n")
print("indexes:", ice_cream.index, end="\n\n")
print("columns:", ice_cream.columns, end="\n\n")
print("prices:", ice_cream['price'], end="\n\n")
print("popularity:", ice_cream['popularity'], end="\n\n")

# if keys are missing from a column, then it will be filled in by NaN
# DataFrames can be created from:
#   - from a single Series object
#   - from a list of dicts
#   - from a dictionary of Series objects
#   - from a two-dimesnional NumPy array
#   - from a NumPy structured array

           price  popularity
Chocolate    2.4          82
Vanilla      2.1          94
Coffee       2.6          71
Coconut      1.8          57

indexes: Index(['Chocolate', 'Vanilla', 'Coffee', 'Coconut'], dtype='object')

columns: Index(['price', 'popularity'], dtype='object')

prices: Chocolate    2.4
Vanilla      2.1
Coffee       2.6
Coconut      1.8
Name: price, dtype: float64

popularity: Chocolate    82
Vanilla      94
Coffee       71
Coconut      57
Name: popularity, dtype: int64



## Index

In [23]:
# pandas Index is an immutable array/ordered set

ind = pd.Index([1, 2, 3, 5, 8])
print(ind)
print("index 0:", ind[0])
print("size:", ind.size)
print("shape:", ind.shape)

print()

ind1 = pd.Index([1, 4, 8])
print("intersection:", ind.intersection(ind1))
print("union:", ind.union(ind1))
print("symmetric difference:", ind.difference(ind1))

Int64Index([1, 2, 3, 5, 8], dtype='int64')
index 0: 1
size: 5
shape: (5,)

intersection: Int64Index([1, 8], dtype='int64')
union: Int64Index([1, 2, 3, 4, 5, 8], dtype='int64')
symmetric difference: Int64Index([2, 3, 5], dtype='int64')


## Combining Data

In [1]:
## use for Series and DataFrames 
# pd.concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
#           keys=None, levels=None, names=None, verify_integrity=False,
#           copy=True)
## it preserves indices, however, even if they may repeat. use verify_integrity, keys, or ignore_index to prevent this

## joining:
#pd.merge()


## aggregates:
# count()	Total number of items
# first(), last()	First and last item
# mean(), median()	Mean and median
# min(), max()	Minimum and maximum
# std(), var()	Standard deviation and variance
# mad()	Mean absolute deviation
# prod()	Product of all items
# sum()	Sum of all items
# df.groupby('key') (split, apply, combine)
# aggregate()
# apply()
# pivot_table()