In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("stock_data.csv")
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


# Skipping Row

In [5]:
    #it will skip the first line
                                #header = 1 will also work the same
df = pd.read_csv("stock_data.csv", skiprows = 1)
df

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,n.a.
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [7]:
#if you dont have a header 
                            #will make header but default values
df = pd.read_csv("stock_data.csv", header = None)
df

Unnamed: 0,0,1,2,3,4
0,tickers,eps,revenue,price,people
1,GOOGL,27.82,87,845,larry page
2,WMT,4.61,484,65,n.a.
3,MSFT,-1,85,64,bill gates
4,RIL,not available,50,1023,mukesh ambani
5,TATA,5.6,-1,n.a.,ratan tata


# Providing Header Names

In [8]:
#if you want to provide names to your header 
                            #will make header but default values
df = pd.read_csv("stock_data.csv", header = None, names = ['col1','col2', 'col3','col4'])
df

Unnamed: 0,col1,col2,col3,col4
tickers,eps,revenue,price,people
GOOGL,27.82,87,845,larry page
WMT,4.61,484,65,n.a.
MSFT,-1,85,64,bill gates
RIL,not available,50,1023,mukesh ambani
TATA,5.6,-1,n.a.,ratan tata


# Reading nRows

In [9]:
#if you want to read specific row
df = pd.read_csv("stock_data.csv", nrows = 3)
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1.0,85,64,bill gates


# Handling no data / messy data

In [11]:
#if you have cells where you dont have data like 
df = pd.read_csv("stock_data.csv")
df
#check row (index) 3 and eps column and also row 1 and people columns

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [17]:
                      #so whenever it will get values like not available and n.a. in csv
                                #it will replace them with NaN
df = pd.read_csv("stock_data.csv", na_values = ["not available", "n.a."])
df
#if you wnat to replace any faulty or messy data to NaN, you can use it

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845.0,larry page
1,WMT,4.61,484,65.0,
2,MSFT,-1.0,85,64.0,bill gates
3,RIL,,50,1023.0,mukesh ambani
4,TATA,5.6,-1,,ratan tata


In [24]:
#so we have -1 value in revenue that is faulty value
#we can apply same thing as above for replacing it with with NaN
#but -1 of eps will also get replaced and we dont want that 
#so we can pass a dictionary to the navalues

                                #so whenever it will get values like not available and n.a. in eps column of csv
                                #and also whenever it will get not av..... and included -1 in revenue column 
                                #whenever it will get values like not available and n.a. in price column of csv
                                # whenever it will get values like not available and n.a. in people column of csv

                                #it will replace them with NaN
df = pd.read_csv("stock_data.csv", na_values = {
    "eps" : ["not available", "n.a."],
    "revenue" : ["not available", "n.a.", "-1"],
    "price" : ["not available", "n.a."],
    "people" : ["not available", "n.a."]
})
df
#if you wnat to replace any faulty or messy data to NaN, you can use it

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


# Writing to a CSV File

In [29]:
df = pd.read_csv("stock_data.csv", na_values = {
    "eps" : ["not available", "n.a."],
    "revenue" : ["not available", "n.a.", "-1"],
    "price" : ["not available", "n.a."],
    "people" : ["not available", "n.a."]
})
#so to write above dataframe to a csv file
                    #column having values 0, 1, 2 ,3 and 3 will not be written
df.to_csv("new.csv", index = False)
#this will create a new.csv in our directory

In [35]:
#if you want to write specific columns to the file
df.to_csv('new_spec_col.csv', index = False, columns = ['tickers', 'eps'])

In [36]:
#now if you read above file
df = pd.read_csv("new_spec_col.csv")
df

Unnamed: 0,tickers,eps
0,GOOGL,27.82
1,WMT,4.61
2,MSFT,-1.0
3,RIL,
4,TATA,5.6


In [38]:
#to skip headers (names of columns while writing)
df.to_csv('no_header.csv', index = False, header = False)

In [39]:
#now if you read above file
df = pd.read_csv("no_header.csv")
df

Unnamed: 0,GOOGL,27.82
0,WMT,4.61
1,MSFT,-1.0
2,RIL,
3,TATA,5.6
