In [3]:
import numpy as np
import pandas as pd

In [4]:
np.random.seed(100)
arr = np.random.randint(1,100,(5,3))
arr

array([[ 9, 25, 68],
       [88, 80, 49],
       [11, 95, 53],
       [99, 54, 67],
       [99, 15, 35]], dtype=int32)

#### Create a pandas dataframe and define column and row names

In [5]:
df = pd.DataFrame(arr) # Default column and row indexes are 0,1,2...n
df

Unnamed: 0,0,1,2
0,9,25,68
1,88,80,49
2,11,95,53
3,99,54,67
4,99,15,35


In [6]:
type(df)

pandas.core.frame.DataFrame

#### Creating our custom row and column Names

In [7]:
row_name = ['Mon' , 'Tue' , 'Wed' , 'Thu' , 'Fri']
col_name = ['A' , 'B' , 'C']


In [8]:
df = pd.DataFrame(arr,index=row_name,columns=col_name)
df

Unnamed: 0,A,B,C
Mon,9,25,68
Tue,88,80,49
Wed,11,95,53
Thu,99,54,67
Fri,99,15,35


### Creating a DataFrame from Dictionary

In [9]:
mydict = {
    'Jan' : [1,2,3,4,5],
    'Feb' : [6,7,8,9,10],
    'Mar' : [11,12,13,14,15]
}

df = pd.DataFrame(mydict,index=['Mon' , 'Tue' , 'Wed' , 'Thu' , 'Fri'])
df

Unnamed: 0,Jan,Feb,Mar
Mon,1,6,11
Tue,2,7,12
Wed,3,8,13
Thu,4,9,14
Fri,5,10,15


#### We can create a DataFrame using Files (CSV , Excel)

In [10]:
df = pd.read_csv('../bios.csv')
df

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25
...,...,...,...,...,...,...,...,...,...,...
145495,149222,Polina Luchnikova,2002-01-30,Serov,Sverdlovsk,RUS,ROC,167.0,61.0,
145496,149223,Valeriya Merkusheva,1999-09-20,Moskva (Moscow),Moskva,RUS,ROC,168.0,65.0,
145497,149224,Yuliya Smirnova,1998-05-08,Kotlas,Arkhangelsk,RUS,ROC,163.0,55.0,
145498,149225,André Foussard,1899-05-19,Niort,Deux-Sèvres,FRA,France,166.0,,1986-03-18


#### To see top 5 rows alone use .head() and for bottom 5 rows alone use .tail()

In [11]:
df.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25


In [12]:
df.tail()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
145495,149222,Polina Luchnikova,2002-01-30,Serov,Sverdlovsk,RUS,ROC,167.0,61.0,
145496,149223,Valeriya Merkusheva,1999-09-20,Moskva (Moscow),Moskva,RUS,ROC,168.0,65.0,
145497,149224,Yuliya Smirnova,1998-05-08,Kotlas,Arkhangelsk,RUS,ROC,163.0,55.0,
145498,149225,André Foussard,1899-05-19,Niort,Deux-Sèvres,FRA,France,166.0,,1986-03-18
145499,149814,Bill Phillips,1913-07-15,Dulwich Hill,New South Wales,AUS,Australia,,,2003-10-20


#### Shape of Dataframes it is number of rows and columns

In [13]:
df.shape

(145500, 10)

#### .values gives numpy array of that DataFrame because Every Datframe  made up on NumPy Array 

In [18]:
df.values

array([[1, 'Jean-François Blanchy', '1886-12-12', ..., nan, nan,
        '1960-10-02'],
       [2, 'Arnaud Boetsch', '1969-04-01', ..., 183.0, 76.0, nan],
       [3, 'Jean Borotra', '1898-08-13', ..., 183.0, 76.0, '1994-07-17'],
       ...,
       [149224, 'Yuliya Smirnova', '1998-05-08', ..., 163.0, 55.0, nan],
       [149225, 'André Foussard', '1899-05-19', ..., 166.0, nan,
        '1986-03-18'],
       [149814, 'Bill Phillips', '1913-07-15', ..., nan, nan,
        '2003-10-20']], dtype=object)

#### We can import text files also , but we should have mention the seperator correctly

In [35]:
df = pd.read_table(r'C:\Users\yuvra\OneDrive\Desktop\Hello.txt',sep=',')
df

Unnamed: 0,I,New,You,We,King


#### We can import text from ClipBoard also

In [36]:
df = pd.read_clipboard(sep=' ')
df

Unnamed: 0,####,Unnamed: 1


#### Pandas support various file formats like CSV, Excel, JSON, HTML, SQL, Parquet, ORC, Feather, Stata, SAS, Pickle, HDF5, Clipboard, XML.