In [115]:
import numpy as np
import pandas as pd

## How to create dataframe

In [116]:
data = np.array([["","col1","col2"],["row1","1","2"],["row2","3","4"]])

In [117]:
print(pd.DataFrame(data=data[1:,1:],
                  index=data[1:,0],
                  columns=data[0,1:]))

     col1 col2
row1    1    2
row2    3    4


data[0:,0:] means all rows starting from 0 and all columns starting from 0. 

In [118]:
data_f = pd.DataFrame(data = data[0:,0:], index = ["a","b","c"], columns = [1,2,3])
data_f.head()

Unnamed: 0,1,2,3
a,,col1,col2
b,row1,1,2
c,row2,3,4


In [119]:
# Take a 2D array as input to your DataFrame 
my_2darray = np.array([[1, 2, 3], [4, 5, 6]])
print(pd.DataFrame(my_2darray))



   0  1  2
0  1  2  3
1  4  5  6


In [120]:
# Take a dictionary as input to your DataFrame 
my_dict = {1: ['1', '3'], 2: ['1', '2'], 3: ['2', '4']}
print(pd.DataFrame(my_dict))


   1  2  3
0  1  1  2
1  3  2  4


In [121]:
# Take a DataFrame as input to your DataFrame 
my_df = pd.DataFrame(data=[4,5,6,7], index=range(0,4), columns=['A'])
print(pd.DataFrame(my_df))



   A
0  4
1  5
2  6
3  7


In [122]:
# Take a Series as input to your DataFrame
my_series = pd.Series({"United Kingdom":"London", "India":"New Delhi", "United States":"Washington", "Belgium":"Brussels"})
print(pd.DataFrame(my_series))


                         0
Belgium           Brussels
India            New Delhi
United Kingdom      London
United States   Washington


Check that data in square brackets in np array is displayed as seperate rows while when converting dictionary into dataframe data in square brackets is displayed as a column.
Check that when we are converting series into dataframe the data is unsorted and the index is labeled 

## How To Select an Index or Column From a Pandas DataFrame

In [123]:
df2 = pd.DataFrame(np.array([[1,2,3],[4,5,6],[7,8,9]]), columns = ["a","b","c"])
df2

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [124]:
# Using `iloc[]`
print(df2.iloc[0][1])

# Using `loc[]`
print(df2.loc[1]['b'])

# Using `at[]`
print(df2.at[0,'c'])

# Using `iat[]`
print(df2.iat[0,0])

2
5
3
1


Most of the time we only use iloc(that is index base location) and loc(labelled base location). You can either access the values by calling them by their label or by their position in the index or column.
When you want to select complete rows or column by iloc and loc theres a slight different we only use one square bracket as compared to when we are selecting single values

In [125]:
df2.loc[:,"b"]

0    2
1    5
2    8
Name: b, dtype: int32

In [126]:
df2.iloc[:,1]

0    2
1    5
2    8
Name: b, dtype: int32

In [127]:
df2.loc[:2,"a"]

0    1
1    4
2    7
Name: a, dtype: int32

In [128]:
df2.iloc[:2,0]

0    1
1    4
Name: a, dtype: int32

One thing to note is that while using index based location(df2.iloc) the last row 2 is excluded it means from rows 0 to rows 1 will be displayed and 2nd row index will be excluded.

##  How To Add an Index, Row or Column to a Pandas DataFrame

Adding a index to a dataframe:
If you dont specify which column to use as index the dataframe will automatically add a numeric based inedx starting from 0 to the end of rows.
However you can set any of your column as index by using set_index



In [129]:
df2.set_index("c", inplace = True)

In [130]:
df2


Unnamed: 0_level_0,a,b
c,Unnamed: 1_level_1,Unnamed: 2_level_1
3,1,2
6,4,5
9,7,8


we have to add inplace = True so that the changes takes place on the df2 dataframe otherwise pandas will automatically create a new dataframe with index of c column and keep the original dataframe as it is.

## Resetting the index to default 

In [131]:
df2 = df2.reset_index(level=0, drop=True)


In [132]:
df2

Unnamed: 0,a,b
0,1,2
1,4,5
2,7,8


### Adding a column in dataframe

In [133]:
df2["c"] = [3,6,9]
df2

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [138]:
df2.loc[:, "d"] = pd.Series(['5', '6','7'])

In [139]:
df2

Unnamed: 0,a,b,c,d
0,1.0,2.0,3.0,5.0
1,4.0,5.0,6.0,6.0
2,7.0,8.0,9.0,7.0
3,1.0,2.0,3.0,


## Adding rows to a dataframe

In [140]:
df2.loc[3,:] = [1,2,3,4]


In [141]:
df2

Unnamed: 0,a,b,c,d
0,1.0,2.0,3.0,5
1,4.0,5.0,6.0,6
2,7.0,8.0,9.0,7
3,1.0,2.0,3.0,4


## How to delete rows  and columns

In [142]:
# Drop the column with label 'A'                  
df2.drop('d', axis=1, inplace=True)


In [143]:
df2

Unnamed: 0,a,b,c
0,1.0,2.0,3.0
1,4.0,5.0,6.0
2,7.0,8.0,9.0
3,1.0,2.0,3.0


#Drop row with index 3

In [144]:
df2.drop(df2.index[3], inplace= True)

In [145]:
df2

Unnamed: 0,a,b,c
0,1.0,2.0,3.0
1,4.0,5.0,6.0
2,7.0,8.0,9.0


## How to format data in dataframes