In [1]:
import numpy as np
import pandas as pd

In [2]:
from pandas import Series, DataFrame

SERIES- a one-dimensional array-like object containing a sequence of values (of similar types to NumPy types) of the same type and an associated array of data labels, called its index. 

In [3]:
obj=pd.Series([4,7,-5,3])

In [4]:
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [5]:
obj.array

<PandasArray>
[4, 7, -5, 3]
Length: 4, dtype: int64

In [6]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
obj2 = pd.Series([4, 7, -5, 3], index=["d", "b", "a", "c"])

In [8]:
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [9]:
obj2[obj2 > 0]

d    4
b    7
c    3
dtype: int64

In [10]:
obj2 * 2

d     8
b    14
a   -10
c     6
dtype: int64

In [11]:
"b" in obj2

True

In [12]:
"e" in obj2


False

In [13]:
sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}

In [14]:
obj3=pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [15]:
obj3.to_dict()

{'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}

In [16]:
states = ["California", "Ohio", "Oregon", "Texas"]

In [17]:
obj4 = pd.Series(sdata, index=states) 

In [18]:
obj4 

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [19]:
pd.isna(obj4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [20]:
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [21]:
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [22]:
obj3+obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [23]:
obj4.name="population"
obj4.index.name="state"

In [24]:
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

In [27]:
obj.index=["Bob","Steve","Jeff","Ryan"] #overwriting the indexes
obj

Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64

DATAFRAME: A DataFrame represents a rectangular table of data and contains an ordered, named collection of columns, each of which can be a different value type (numeric, string, Boolean, etc.). The DataFrame has both a row and column index; it can be thought of as a dictionary of Series all sharing the same index.


In [28]:
data = {"state": ["Ohio", "Ohio", "Ohio", "Nevada", "Nevada", "Nevada"],
        "year": [2000, 2001, 2002, 2001, 2002, 2003],
        "pop": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)

In [29]:
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [30]:
frame2 = pd.DataFrame(data, columns=["year", "state", "pop", "debt"])  #If you pass a column that isn’t contained in the dictionary, it will appear with missing values in the result
frame2


Unnamed: 0,year,state,pop,debt
0,2000,Ohio,1.5,
1,2001,Ohio,1.7,
2,2002,Ohio,3.6,
3,2001,Nevada,2.4,
4,2002,Nevada,2.9,
5,2003,Nevada,3.2,


In [32]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [33]:
#A column in a DataFrame can be retrieved as a Series either by dictionary-like notation or by using the dot attribute notation

frame2["state"]

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object

In [35]:
frame2.state

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object

In [37]:
frame2.loc[1]

year     2001
state    Ohio
pop       1.7
debt      NaN
Name: 1, dtype: object

In [38]:
frame2["debt"] = 16.5 #Columns can be modified by assignment. For example, the empty debt column could be assigned a scalar value or an array of values


In [39]:
frame2

Unnamed: 0,year,state,pop,debt
0,2000,Ohio,1.5,16.5
1,2001,Ohio,1.7,16.5
2,2002,Ohio,3.6,16.5
3,2001,Nevada,2.4,16.5
4,2002,Nevada,2.9,16.5
5,2003,Nevada,3.2,16.5


In [41]:
val = pd.Series([-1.2, -1.5, -1.7], index=[2, 4, 5])
frame2["debt"]=val
frame2

Unnamed: 0,year,state,pop,debt
0,2000,Ohio,1.5,
1,2001,Ohio,1.7,
2,2002,Ohio,3.6,-1.2
3,2001,Nevada,2.4,
4,2002,Nevada,2.9,-1.5
5,2003,Nevada,3.2,-1.7


In [42]:
frame2["eastern"] = frame2["state"] == "Ohio" #Assigning a column that doesnt exist will create a new column
frame2

Unnamed: 0,year,state,pop,debt,eastern
0,2000,Ohio,1.5,,True
1,2001,Ohio,1.7,,True
2,2002,Ohio,3.6,-1.2,True
3,2001,Nevada,2.4,,False
4,2002,Nevada,2.9,-1.5,False
5,2003,Nevada,3.2,-1.7,False


In [43]:
del frame2["eastern"]
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [45]:
populations = {"Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6},
                   "Nevada": {2001: 2.4, 2002: 2.9}}
frame3=pd.DataFrame(populations)
frame3

Unnamed: 0,Ohio,Nevada
2000,1.5,
2001,1.7,2.4
2002,3.6,2.9


In [46]:
frame3.T

Unnamed: 0,2000,2001,2002
Ohio,1.5,1.7,3.6
Nevada,,2.4,2.9


In [47]:
pd.DataFrame(populations, index=[2001, 2002, 2003])

Unnamed: 0,Ohio,Nevada
2001,1.7,2.4
2002,3.6,2.9
2003,,


In [48]:
frame3.index.name="year"
frame2.columns.name="state"
frame3

Unnamed: 0_level_0,Ohio,Nevada
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,1.5,
2001,1.7,2.4
2002,3.6,2.9


In [49]:
frame3.to_numpy() #converting it to a 2D array

array([[1.5, nan],
       [1.7, 2.4],
       [3.6, 2.9]])

In [51]:
frame2.to_numpy() #If the DataFrame’s columns are different data types, the data type of the returned array will be chosen to accommodate all of the columns

array([[2000, 'Ohio', 1.5, nan],
       [2001, 'Ohio', 1.7, nan],
       [2002, 'Ohio', 3.6, -1.2],
       [2001, 'Nevada', 2.4, nan],
       [2002, 'Nevada', 2.9, -1.5],
       [2003, 'Nevada', 3.2, -1.7]], dtype=object)

INDEX OBJECTS:

In [52]:
obj = pd.Series(np.arange(3), index=["a", "b", "c"])
index=obj.index
index

Index(['a', 'b', 'c'], dtype='object')

In [53]:
index[1:]

Index(['b', 'c'], dtype='object')

In [54]:
index[1]="d" #Index objects are immutable and thus can’t be modified by the user

TypeError: Index does not support mutable operations

In [56]:
labels = pd.Index(np.arange(3))
obj2 = pd.Series([1.5, -2.5, 0], index=labels)
obj2

0    1.5
1   -2.5
2    0.0
dtype: float64

In [57]:
obj2.index is labels

True