# Other notes on DataFrame

The index, on the one hand, indicates the difference in rows, while the column names indicate the difference in columns.



In [1]:
import numpy as np
import pandas as pd

In [11]:
data = np.array([['', 'Col1', 'Col2', 'Col3'], 
                ['Row1', 1, 2, 3],
                ['Row2', 4, 5, 6],
                ['Row3', 7, 8, 9]])
data

array([['', 'Col1', 'Col2', 'Col3'],
       ['Row1', '1', '2', '3'],
       ['Row2', '4', '5', '6'],
       ['Row3', '7', '8', '9']], dtype='<U4')

In [12]:
df = pd.DataFrame(data=data[1:, 1:], # all data except first row + first col
                 index=data[1:, 0], # all rows, cols 0
                 columns=data[0, 1:]) # row 0, all cols
df

Unnamed: 0,Col1,Col2,Col3
Row1,1,2,3
Row2,4,5,6
Row3,7,8,9


In [13]:
my_2darr = np.array([[1,2,3], [4,5,6]])
my_2darr

array([[1, 2, 3],
       [4, 5, 6]])

In [14]:
my_dict = {
    1: ['1', '3'],
    2: ['1', '2'],
    3: ['2', '4']
}
my_dict

{1: ['1', '3'], 2: ['1', '2'], 3: ['2', '4']}

In [29]:
df0 = pd.DataFrame(data=[1,2,3,4], index=range(0,4), columns=['A'])
df0

Unnamed: 0,A
0,1
1,2
2,3
3,4


In [32]:
df1 = pd.DataFrame(data=[[1, 2], [2,3], [3,4], [4,5]], index=range(0,4), columns=['A', 'B'])
df1

Unnamed: 0,A,B
0,1,2
1,2,3
2,3,4
3,4,5


In [33]:
info = {"Belgium":"Brussels", "India":"New Delhi", "United Kingdom":"London", "United States":"Washington"}
info

{'Belgium': 'Brussels',
 'India': 'New Delhi',
 'United Kingdom': 'London',
 'United States': 'Washington'}

In [39]:
df_info = pd.DataFrame(info.items())
df_info

Unnamed: 0,0,1
0,Belgium,Brussels
1,India,New Delhi
2,United Kingdom,London
3,United States,Washington


In [40]:
df_info.index

RangeIndex(start=0, stop=4, step=1)

### Create Series from dict.

The keys of dictionary will become the index of the series. The indexes are auto sorted.

In [34]:
my_series = pd.Series(info)
my_series

Belgium             Brussels
India              New Delhi
United Kingdom        London
United States     Washington
dtype: object

In [38]:
my_series.index

Index(['Belgium', 'India', 'United Kingdom', 'United States'], dtype='object')

In [54]:
df2 = pd.DataFrame(np.array([[1,2,3,4], [4,5,6,7], [np.nan, 9, 2, np.nan]]), columns=['c1', 'c2', 'c3', 'c4'])
df2

Unnamed: 0,c1,c2,c3,c4
0,1.0,2.0,3.0,4.0
1,4.0,5.0,6.0,7.0
2,,9.0,2.0,


In [55]:
# dimension
df2.shape

(3, 4)

In [56]:
# height

len(df2)

3

In [58]:
# also gives the height but excludes NaN

df2['c1'].count()

2

In [59]:
list(df2.columns.values)

['c1', 'c2', 'c3', 'c4']

In [60]:
df2.iloc[0][0] # index by number

1.0

In [61]:
df2.loc[0]['c2']

2.0

In [62]:
df2.at[0, 'c3']

3.0

In [64]:
df2.iat[0, 1] # index by number

2.0