In [2]:
#Write a Python program to perform the following using the three fundamental Pandas datastructures: the Series, DataFrame, and Index.

import pandas as pd

In [25]:
#a) Series as generalized NumPy array

data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=['a', 'b', 'c', 'd'])
data


a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [15]:
#b) Series as specialized dictionary
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [24]:
#c) Constructing Series objects
import numpy as np   
import pandas as pd   
Series=pd.Series(data=[2,4,6,8,10,12,14])      
print("index:",Series.index)
print("shape:",Series.shape)
print("dtype:",Series.dtype)
print("size:",Series.size)
print("empty:",Series.empty)
print("hasnans:",Series.hasnans)
print("nbytes:",Series.nbytes)
print("ndim:",Series.ndim)


index: RangeIndex(start=0, stop=7, step=1)
shape: (7,)
dtype: int64
size: 7
empty: False
hasnans: False
nbytes: 56
ndim: 1


In [19]:
#d) DataFrame as a generalized NumPy array
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [16]:
states = pd.DataFrame({'population': population,
                       'area': area})
states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [17]:
#e) DataFrame as specialized dictionary

states['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [31]:
#f) Constructing DataFrame objects:
#i. From a single Series object.
pd.DataFrame(population, columns=['population'])


Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [32]:
#ii. From a list of dicts.
data = [{'a': i, 'b': 2 * i}
        for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [33]:
#iii. From a dictionary of Series objects
pd.DataFrame({'population': population,
              'area': area})


Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [34]:
#iv. From a two-dimensional NumPy array.
pd.DataFrame(np.random.rand(3, 2),
             columns=['foo', 'bar'],
             index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.983789,0.317771
b,0.691952,0.490791
c,0.851536,0.421588


In [38]:
#v. From a NumPy structured array.
A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])
print(A)
pd.DataFrame(A)

[(0, 0.) (0, 0.) (0, 0.)]


Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0


In [44]:
ind = pd.Index([2, 3, 5, 7, 11])

#g) Index as immutable array

print(ind[1])
print(ind[::2])
ind[1] = 0



3
Int64Index([2, 5, 11], dtype='int64')


TypeError: Index does not support mutable operations

In [52]:
#h) Index as ordered set.
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])
print("intersection:",indA & indB)
print(" union:",indA | indB)
print("symmetric difference",indA ^ indB  )


intersection: Int64Index([3, 5, 7], dtype='int64')
 union: Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')
symmetric difference Int64Index([1, 2, 9, 11], dtype='int64')


  print("intersection:",indA & indB)
  print(" union:",indA | indB)
  print("symmetric difference",indA ^ indB  )


In [53]:
#i) Data Selection in Series:
import pandas as pd

#i. Series as dictionary
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [54]:
#ii. Series as one-dimensional array
# slicing by explicit index
data['a':'c']


a    0.25
b    0.50
c    0.75
dtype: float64

In [55]:
#iii. Indexers: loc, iloc, and ix
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
data

1    a
3    b
5    c
dtype: object

In [57]:
# explicit index when indexing
print(data[1])
# implicit index when slicing
data[1:3]

a


3    b
5    c
dtype: object

In [61]:
#Loc
print(data.loc[1])
data.loc[1:3]

a


1    a
3    b
dtype: object

In [62]:
#iloc
print(data.iloc[1])
data.iloc[1:3]

b


3    b
5    c
dtype: object

In [3]:
#j) Data Selection in DataFrame
        #i. DataFrame as a dictionary
area = pd.Series({'California': 423967, 'Texas': 695662,
                  'New York': 141297, 'Florida': 170312,
                  'Illinois': 149995})
pop = pd.Series({'California': 38332521, 'Texas': 26448193,
                 'New York': 19651127, 'Florida': 19552860,
                 'Illinois': 12882135})
data = pd.DataFrame({'area':area, 'pop':pop})
data

Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


In [20]:
#ii. DataFrame as two-dimensional array
import numpy as np
import pandas as pd

df = pd.DataFrame({'color': ['red', 'blue', 'black'] * 2, 
                   'vehicle': ['car', 'truck'] * 3, 
                   'value': np.arange(1,7)})
df

Unnamed: 0,color,vehicle,value
0,red,car,1
1,blue,truck,2
2,black,car,3
3,red,truck,4
4,blue,car,5
5,black,truck,6
