# Pandas DataFrame
* A DataFrame is a two dimensional data structure where the data is arranged in a tabular format in rows and columns.
* Features of Dataframe
  * Columns can be of different data types
  * Size of dataframe can be changed
  * Axes(rows and columns) are labeled
  * Arithmetic operations can be performed on rows and columns

# DataFrame Creation
NOTE : Note that every column of the data frame is a pandas Series.

## Creating a dataframe from a list

In [1]:
import pandas as pd
list_1  = [1, 2, 3, 4]
# As no column name is passed, by default it returns ‘0’ as column name
df = pd.DataFrame(data=list_1)
print(df)

   0
0  1
1  2
2  3
3  4


## DataFrame with column name

In [2]:
import numpy as np
arr_2 = np.random.randint(10, 50, size=(2, 3))
print(arr_2)
df = pd.DataFrame(arr_2, columns=['A', 'B', 'C'])
print(df)

[[17 32 33]
 [48 36 48]]
    A   B   C
0  17  32  33
1  48  36  48


## Creating a dataframe from a dictionary

In [3]:
import numpy as np
dict_3 = {'one': [11, 22, 33, 44], 
         'two': [1, 2, 3, 4]}
df_2 = pd.DataFrame(dict_3)
df_2

Unnamed: 0,one,two
0,11,1
1,22,2
2,33,3
3,44,4


## Changing Index while creating dataframe

In [4]:
arr_2 = np.random.randint(10, 50, size=(2, 3))
print(arr_2)
df_1 = pd.DataFrame(arr_2, columns=['A', 'B', 'C'], index=['index_D', 'index_E'])
print(df_1)

[[11 20 18]
 [27 44 36]]
          A   B   C
index_D  11  20  18
index_E  27  44  36


# Reading data from csv file


In [5]:
df=pd.read_csv('data/bigmarket1.csv')
df

df.rename(
    columns={"Month": "A", "Store": "B", "Sales": "C"},
    inplace=True,
)
df

df.columns= ["X", "Y", "Z"]
df.set_axis(["P", "Q", "R"], axis="columns", inplace=True)
df



Unnamed: 0,P,Q,R
0,Jan,A,31037
1,Jan,B,20722
2,Jan,C,24557
3,Jan,D,34649
4,Jan,E,29795
5,Feb,A,29133
6,Feb,B,22695
7,Feb,C,28312
8,Feb,D,31454
9,Feb,E,46267


In [27]:
df=pd.read_csv('data/bigmarket.txt', sep='\t')
df

Unnamed: 0,Month,Store,Sales
0,Jan,A,31037
1,Jan,B,20722
2,Jan,C,24557
3,Jan,D,34649
4,Jan,E,29795
5,Feb,A,29133
6,Feb,B,22695
7,Feb,C,28312
8,Feb,D,31454
9,Feb,E,46267


In [39]:
df.iloc[1:3, 1:2]

Unnamed: 0,Store
1,B
2,C


# Reading data from xlsx file

In [7]:
df=pd.read_excel('data/Supermarket.xlsx')
df.tail(3)

Unnamed: 0,Day,Store,Percentage
22,Friday,C,77
23,Friday,D,89
24,Friday,E,68


# Reading data from json file



In [8]:
df=pd.read_json('data/bigmarket.json')
df

Unnamed: 0,Month,Store,Sales
0,Jan,A,31037
1,Jan,B,20722
2,Jan,C,24557
3,Jan,D,34649
4,Jan,E,29795
5,Feb,A,29133
6,Feb,B,22695
7,Feb,C,28312
8,Feb,D,31454
9,Feb,E,46267


In [9]:
## Read from 1 sheet in excel file
df=pd.read_excel('data/sales_transactions.xlsx', sheet_name='Sheet1')
df

Unnamed: 0,account,name,order,sku,quantity,unit price,ext price
0,383080,Will LLC,10001,B1-20000,7,33.69,235.83
1,383080,Will LLC,10001,B1-86481,3,35.99,107.97
2,412290,Jerde-Hilpert,10005,S1-06532,48,55.82,2679.36
3,412290,Jerde-Hilpert,10005,S1-47412,44,78.91,3472.04
4,412290,Jerde-Hilpert,10005,S1-27722,36,25.42,915.12
5,218895,Kulas Inc,10006,S1-27722,32,95.66,3061.12
6,218895,Kulas Inc,10006,B1-33087,23,22.55,518.65
7,218895,Kulas Inc,10006,B1-20000,-1,72.18,-72.18


In [10]:
import json
data = json.load(open('data/bigmarket.json','r'))
data

[{'Month': 'Jan', 'Store': 'A', 'Sales': 31037},
 {'Month': 'Jan', 'Store': 'B', 'Sales': 20722},
 {'Month': 'Jan', 'Store': 'C', 'Sales': 24557},
 {'Month': 'Jan', 'Store': 'D', 'Sales': 34649},
 {'Month': 'Jan', 'Store': 'E', 'Sales': 29795},
 {'Month': 'Feb', 'Store': 'A', 'Sales': 29133},
 {'Month': 'Feb', 'Store': 'B', 'Sales': 22695},
 {'Month': 'Feb', 'Store': 'C', 'Sales': 28312},
 {'Month': 'Feb', 'Store': 'D', 'Sales': 31454},
 {'Month': 'Feb', 'Store': 'E', 'Sales': 46267},
 {'Month': 'March', 'Store': 'A', 'Sales': 32961},
 {'Month': 'March', 'Store': 'B', 'Sales': 26451},
 {'Month': 'March', 'Store': 'C', 'Sales': 47814},
 {'Month': 'March', 'Store': 'D', 'Sales': 36069},
 {'Month': 'March', 'Store': 'E', 'Sales': 31874},
 {'Month': 'Apr', 'Store': 'A', 'Sales': 27253},
 {'Month': 'Apr', 'Store': 'B', 'Sales': 40241},
 {'Month': 'Apr', 'Store': 'C', 'Sales': 47488},
 {'Month': 'Apr', 'Store': 'D', 'Sales': 25432},
 {'Month': 'Apr', 'Store': 'E', 'Sales': 33880},
 {'Month':

In [16]:
df=pd.read_html('data/Supermarket.html')
df

[    Unnamed: 0          A      B           C
 0            1        Day  Store  Percentage
 1            2     Monday      A          79
 2            3     Monday      B          81
 3            4     Monday      C          74
 4            5     Monday      D          77
 5            6     Monday      E          66
 6            7    Tuesday      A          78
 7            8    Tuesday      B          86
 8            9    Tuesday      C          89
 9           10    Tuesday      D          97
 10          11    Tuesday      E          86
 11          12  Wednesday      A          81
 12          13  Wednesday      B          87
 13          14  Wednesday      C          84
 14          15  Wednesday      D          94
 15          16  Wednesday      E          82
 16          17   Thursday      A          80
 17          18   Thursday      B          83
 18          19   Thursday      C          81
 19          20   Thursday      D          88
 20          21   Thursday      E 

In [15]:
df2=df[3:11]
df2

[]