In [1]:
## so PANDAS is used for processing data before you feed the data into the AI model

import pandas as pd

# A Pandas series is a one-dimensional array-like object
groceries = pd.Series(data=[20, 6, "Yes", "No"], index=["eggs", "apples", "milk", "bread"])

In [2]:
print(groceries)

eggs       20
apples      6
milk      Yes
bread      No
dtype: object


In [3]:
# shape of Pandas series
groceries.shape

(4,)

In [4]:
# show dimension
groceries.ndim

1

In [7]:
# show size
groceries.size

4

In [8]:
# show index
groceries.index

Index(['eggs', 'apples', 'milk', 'bread'], dtype='object')

In [9]:
# checking if key 'banana' is in INDEX
"banana" in groceries

False

In [10]:
"bread" in groceries

True

In [17]:
## Accessing and Deleting Elements in Pandas Series


# We access elements in Groceries using index labels:

# We use a single index label
print('How many eggs do we need to buy:', groceries['eggs'])
print()

# we can access multiple index labels
print('Do we need milk and bread:\n', groceries[['milk', 'bread']]) 
print()

# we use loc to access multiple index labels
print('How many eggs and apples do we need to buy:\n', groceries.loc[['eggs', 'apples']]) 
print()

# We access elements in Groceries using numerical indices:

# we use multiple numerical indices
print('How many eggs and apples do we need to buy:\n',  groceries[[0, 1]]) 
print()

# We use a negative numerical index
print('Do we need bread:\n', groceries[[-1]]) 
print()

# We use a single numerical index
print('How many eggs do we need to buy:', groceries[0]) 
print()
# we use iloc to access multiple numerical indices
print('Do we need milk and bread:\n', groceries.iloc[[2, 3]]) 

How many eggs do we need to buy: 20

Do we need milk and bread:
 milk     Yes
bread     No
dtype: object

How many eggs and apples do we need to buy:
 eggs      20
apples     6
dtype: object

How many eggs and apples do we need to buy:
 eggs      20
apples     6
dtype: object

Do we need bread:
 bread    No
dtype: object

How many eggs do we need to buy: 20

Do we need milk and bread:
 milk     Yes
bread     No
dtype: object


In [21]:
# Modifying data

print('Original Grocery List:\n', groceries)

# We change the number of eggs to 2
groceries['eggs'] = 2

# We display the changed grocery list
print()
print('Modified Grocery List:\n', groceries)

Original Grocery List:
 eggs        2
apples      6
milk      Yes
bread      No
dtype: object

Modified Grocery List:
 eggs        2
apples      6
milk      Yes
bread      No
dtype: object


In [23]:
# Deleting items from Pandas Series

# We display the original grocery list
print('Original Grocery List:\n', groceries)

# We remove apples from our grocery list. The drop function removes elements out of place
print()
print('We remove apples (out of place):\n', groceries.drop('apples'))
print()
print('Grocery List after removing apples out of place:\n', groceries)

Original Grocery List:
 eggs        2
apples      6
milk      Yes
bread      No
dtype: object

We remove apples (out of place):
 eggs       2
milk     Yes
bread     No
dtype: object

Grocery List after removing apples out of place:
 eggs        2
apples      6
milk      Yes
bread      No
dtype: object


In [24]:
# We display the original grocery list
print('Original Grocery List:\n', groceries)

# We remove apples from our grocery list in place by setting the inplace keyword to True
groceries.drop('apples', inplace = True)

# When we remove elements in place the original Series its modified. To see this
# we display our grocery list again
print()
print('Grocery List after removing apples in place:\n', groceries)

Original Grocery List:
 eggs        2
apples      6
milk      Yes
bread      No
dtype: object

Grocery List after removing apples in place:
 eggs       2
milk     Yes
bread     No
dtype: object


In [29]:
## Arithmetic Oprations on Pandas Series

# We create a Pandas Series that stores a grocery list of just fruits
fruits = pd.Series( data=[10, 6, 3], index = ['apples', 'oranges', 'bananas'])

print('Original grocery list of fruits:\n ', fruits)

# We perform basic element-wise operations using arithmetic symbols
print()
print('fruits + 2:\n', fruits + 2) # We add 2 to each item in fruits

Original grocery list of fruits:
  apples     10
oranges     6
bananas     3
dtype: int64

fruits + 2:
 apples     12
oranges     8
bananas     5
dtype: int64


In [30]:
print('fruits + 2:\n', fruits - 2)

fruits + 2:
 apples     8
oranges    4
bananas    1
dtype: int64


In [32]:
print('fruits + 2:\n', fruits * 2)

fruits + 2:
 apples     20
oranges    12
bananas     6
dtype: int64


In [33]:
print('fruits + 2:\n', fruits / 2)

fruits + 2:
 apples     5.0
oranges    3.0
bananas    1.5
dtype: float64


In [39]:
# We apply different mathematical functions to all elements of fruits
import numpy as np

# We print fruits for reference
print('Original grocery list of fruits:\n', fruits)

print()
print('EXP(X) = \n', np.exp(fruits))

Original grocery list of fruits:
 apples     10
oranges     6
bananas     3
dtype: int64

EXP(X) = 
 apples     22026.465795
oranges      403.428793
bananas       20.085537
dtype: float64


In [36]:
print('EXP(X) = \n', np.sqrt(fruits))

EXP(X) = 
 apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64


In [40]:
# We raise all elements of fruits to the power of 2
print('POW(X,2) =\n',np.power(fruits,2)) 

POW(X,2) =
 apples     100
oranges     36
bananas      9
dtype: int64


In [54]:
# Pandas also allows us to only apply arithmetic operations on selected items in our fruits grocery list. 

print('Original grocery list of fruits:\n ', fruits)
print()

# We add 2 only to the bananas
print('Amount of bananas + 2 = ', fruits['bananas'] + 2)
print()

# We subtract 2 from apples
print('Amount of apples - 2 = ', fruits.iloc[0] - 2)
print()

# We multiply apples and oranges by 2
print('We double the amount of apples and oranges:\n', fruits[['apples', 'oranges']] * 2)
print()

# We divide apples and oranges by 2
print('We half the amount of apples and oranges:\n', fruits.loc[['apples', 'oranges']] / 2)


Original grocery list of fruits:
  apples     10
oranges     6
bananas     3
dtype: int64

Amount of bananas + 2 =  5

Amount of apples - 2 =  8

We double the amount of apples and oranges:
 apples     20
oranges    12
dtype: int64

We half the amount of apples and oranges:
 apples     5.0
oranges    3.0
dtype: float64


In [9]:
import pandas as pd
distance_from_sun = [149.6, 1433.5, 227.9, 108.2, 778.6]

planets = ['Earth','Saturn', 'Mars','Venus', 'Jupiter']

dist_planets = pd.Series( data=distance_from_sun, index=planets )
print(dist_planets)
print("\n")

time_light = dist_planets/18
print(time_light)
print("\n")

close_planets = time_light[time_light < 40]
print(close_planets)
print("\n")

Earth       149.6
Saturn     1433.5
Mars        227.9
Venus       108.2
Jupiter     778.6
dtype: float64


Earth       8.311111
Saturn     79.638889
Mars       12.661111
Venus       6.011111
Jupiter    43.255556
dtype: float64


Earth     8.311111
Mars     12.661111
Venus     6.011111
dtype: float64




In [22]:
## Panda DATA FRAME

items = {'Bob' : pd.Series( data=[245, 25, 55], index=['bike', 'pants', 'watch'] ), 
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])
        }

print( type(items) )

# We create a Pandas DataFrame by passing it a dictionary of Pandas Series
shopping_carts = pd.DataFrame(items)

# display the DataFrame
shopping_carts

<class 'dict'>


Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


In [20]:
# create a Dictionary of Panda Series without indexes
data = {'Bob': pd.Series([245, 25, 55]),
        'Alice': pd.Series([40,110, 500,45])
       }

# create a DataFrame
df = pd.DataFrame(data)
# print df
df

Unnamed: 0,Bob,Alice
0,245.0,40
1,25.0,110
2,55.0,500
3,,45


In [23]:
# print some information about the shopping_carts
print('shopping_carts has shape:', shopping_carts.shape)
print('shopping_carts has dimension:', shopping_carts.ndim)
print('shopping_carts has a total of:', shopping_carts.size, 'elements')
print()
print('The data in shopping_carts is:\n', shopping_carts.values)
print()
print('The row index in shopping_carts is:', shopping_carts.index)
print()
print('The column index in shopping_carts is:', shopping_carts.columns)

shopping_carts has shape: (5, 2)
shopping_carts has dimension: 2
shopping_carts has a total of: 10 elements

The data in shopping_carts is:
 [[245. 500.]
 [ nan  40.]
 [ nan 110.]
 [ 25.  45.]
 [ 55.  nan]]

The row index in shopping_carts is: Index(['bike', 'book', 'glasses', 'pants', 'watch'], dtype='object')

The column index in shopping_carts is: Index(['Bob', 'Alice'], dtype='object')


In [25]:
# create DataFrame with just Bob's data
bob_shopping_cart = pd.DataFrame(items, columns=['Bob'])
bob_shopping_cart

# create DataFrame with item selected for both Alice and Bob
sel_shopping_cart = pd.DataFrame(items, index = ['pants', 'book'])
sel_shopping_cart


Unnamed: 0,Bob,Alice
pants,25.0,45
book,,40


In [27]:
# We Create a DataFrame that only has selected items for Alice
alice_sel_shopping_cart = pd.DataFrame(items, index = ['glasses', 'bike'], columns = ['Alice'])
alice_sel_shopping_cart

Unnamed: 0,Alice
glasses,110
bike,500
