## Read and Parse CSV

In [4]:
import csv

## read file

In [7]:
with open('names.csv', 'r') as f:
    csv_reader = csv.reader(f)

    print(csv_reader) # can't directly use this, we need to loop over 

<_csv.reader object at 0x7958ff58aa40>


In [8]:
with open('names.csv', 'r') as f:
    csv_reader = csv.reader(f)

    for line in csv_reader:
        print(line)

['first_name', 'last_name', 'email']
['John', 'Doe', 'john-doe@bogusemail.com']
['Mary', 'Smith-Robinson', 'maryjacobs@bogusemail.com']
['Dave', 'Smith', 'davesmith@bogusemail.com']
['Jane', 'Stuart', 'janestuart@bogusemail.com']
['Tom', 'Wright', 'tomwright@bogusemail.com']
['Steve', 'Robinson', 'steverobinson@bogusemail.com']
['Nicole', 'Jacobs', 'nicolejacobs@bogusemail.com']
['Jane', 'Wright', 'janewright@bogusemail.com']
['Jane', 'Doe', 'janedoe@bogusemail.com']
['Kurt', 'Wright', 'kurtwright@bogusemail.com']
['Kurt', 'Robinson', 'kurtrobinson@bogusemail.com']
['Jane', 'Jenkins', 'janejenkins@bogusemail.com']
['Neil', 'Robinson', 'neilrobinson@bogusemail.com']
['Tom', 'Patterson', 'tompatterson@bogusemail.com']
['Sam', 'Jenkins', 'samjenkins@bogusemail.com']
['Steve', 'Stuart', 'stevestuart@bogusemail.com']
['Maggie', 'Patterson', 'maggiepatterson@bogusemail.com']
['Maggie', 'Stuart', 'maggiestuart@bogusemail.com']
['Jane', 'Doe', 'janedoe@bogusemail.com']
['Steve', 'Patterson', '

## print 1st col only

In [9]:
with open('names.csv', 'r') as f:
    csv_reader = csv.reader(f)

    for line in csv_reader:
        print(line[0])

first_name
John
Mary
Dave
Jane
Tom
Steve
Nicole
Jane
Jane
Kurt
Kurt
Jane
Neil
Tom
Sam
Steve
Maggie
Maggie
Jane
Steve
Dave
Sam
Kurt
Sam
Jane
Dave
Sam
Tom
Jane
Maggie
Mary
Neil
Corey
Steve
Jane
John
Neil
Corey
Corey
Mary
Jane
Travis
John
Travis


## skip first row of column names

In [10]:
with open('names.csv', 'r') as f:
    csv_reader = csv.reader(f)

    next(csv_reader) # can be understood better after reading about generators

    for line in csv_reader:
        print(line)

['John', 'Doe', 'john-doe@bogusemail.com']
['Mary', 'Smith-Robinson', 'maryjacobs@bogusemail.com']
['Dave', 'Smith', 'davesmith@bogusemail.com']
['Jane', 'Stuart', 'janestuart@bogusemail.com']
['Tom', 'Wright', 'tomwright@bogusemail.com']
['Steve', 'Robinson', 'steverobinson@bogusemail.com']
['Nicole', 'Jacobs', 'nicolejacobs@bogusemail.com']
['Jane', 'Wright', 'janewright@bogusemail.com']
['Jane', 'Doe', 'janedoe@bogusemail.com']
['Kurt', 'Wright', 'kurtwright@bogusemail.com']
['Kurt', 'Robinson', 'kurtrobinson@bogusemail.com']
['Jane', 'Jenkins', 'janejenkins@bogusemail.com']
['Neil', 'Robinson', 'neilrobinson@bogusemail.com']
['Tom', 'Patterson', 'tompatterson@bogusemail.com']
['Sam', 'Jenkins', 'samjenkins@bogusemail.com']
['Steve', 'Stuart', 'stevestuart@bogusemail.com']
['Maggie', 'Patterson', 'maggiepatterson@bogusemail.com']
['Maggie', 'Stuart', 'maggiestuart@bogusemail.com']
['Jane', 'Doe', 'janedoe@bogusemail.com']
['Steve', 'Patterson', 'stevepatterson@bogusemail.com']
['Dav

## write new csv file (eg with different delimeter)

In [27]:
with open('names.csv', 'r') as f:
    csv_reader = csv.reader(f)

    with open('names_new.csv', 'w') as wf:
        csv_writer = csv.writer(wf, delimiter='\t')

        for line in csv_reader:
            csv_writer.writerow(line[1:3])

## read csv with correct delimeter

In [28]:
with open('names_new.csv', 'r') as f:
    csv_reader = csv.reader(f, delimiter='\t')

    for line in csv_reader:
        print(line)

['last_name', 'email']
['Doe', 'john-doe@bogusemail.com']
['Smith-Robinson', 'maryjacobs@bogusemail.com']
['Smith', 'davesmith@bogusemail.com']
['Stuart', 'janestuart@bogusemail.com']
['Wright', 'tomwright@bogusemail.com']
['Robinson', 'steverobinson@bogusemail.com']
['Jacobs', 'nicolejacobs@bogusemail.com']
['Wright', 'janewright@bogusemail.com']
['Doe', 'janedoe@bogusemail.com']
['Wright', 'kurtwright@bogusemail.com']
['Robinson', 'kurtrobinson@bogusemail.com']
['Jenkins', 'janejenkins@bogusemail.com']
['Robinson', 'neilrobinson@bogusemail.com']
['Patterson', 'tompatterson@bogusemail.com']
['Jenkins', 'samjenkins@bogusemail.com']
['Stuart', 'stevestuart@bogusemail.com']
['Patterson', 'maggiepatterson@bogusemail.com']
['Stuart', 'maggiestuart@bogusemail.com']
['Doe', 'janedoe@bogusemail.com']
['Patterson', 'stevepatterson@bogusemail.com']
['Smith', 'davesmith@bogusemail.com']
['Wilks', 'samwilks@bogusemail.com']
['Jefferson', 'kurtjefferson@bogusemail.com']
['Stuart', 'samstuart@bogus

## dictionary reader - other type of reader

each line is printed as dictionary and the first row is no longer printed with column names

In [29]:
with open('names.csv', 'r') as f:

    csv_reader = csv.DictReader(f)

    for line in csv_reader:
        print(line)

{'first_name': 'John', 'last_name': 'Doe', 'email': 'john-doe@bogusemail.com'}
{'first_name': 'Mary', 'last_name': 'Smith-Robinson', 'email': 'maryjacobs@bogusemail.com'}
{'first_name': 'Dave', 'last_name': 'Smith', 'email': 'davesmith@bogusemail.com'}
{'first_name': 'Jane', 'last_name': 'Stuart', 'email': 'janestuart@bogusemail.com'}
{'first_name': 'Tom', 'last_name': 'Wright', 'email': 'tomwright@bogusemail.com'}
{'first_name': 'Steve', 'last_name': 'Robinson', 'email': 'steverobinson@bogusemail.com'}
{'first_name': 'Nicole', 'last_name': 'Jacobs', 'email': 'nicolejacobs@bogusemail.com'}
{'first_name': 'Jane', 'last_name': 'Wright', 'email': 'janewright@bogusemail.com'}
{'first_name': 'Jane', 'last_name': 'Doe', 'email': 'janedoe@bogusemail.com'}
{'first_name': 'Kurt', 'last_name': 'Wright', 'email': 'kurtwright@bogusemail.com'}
{'first_name': 'Kurt', 'last_name': 'Robinson', 'email': 'kurtrobinson@bogusemail.com'}
{'first_name': 'Jane', 'last_name': 'Jenkins', 'email': 'janejenkins@

this is easier to read and we dont have to scroll upwards to see which column was it

even accessing columns is easy

In [24]:
with open('names.csv', 'r') as f:

    csv_reader = csv.DictReader(f)

    for line in csv_reader:
        print(line['first_name'])

John
Mary
Dave
Jane
Tom
Steve
Nicole
Jane
Jane
Kurt
Kurt
Jane
Neil
Tom
Sam
Steve
Maggie
Maggie
Jane
Steve
Dave
Sam
Kurt
Sam
Jane
Dave
Sam
Tom
Jane
Maggie
Mary
Neil
Corey
Steve
Jane
John
Neil
Corey
Corey
Mary
Jane
Travis
John
Travis


## dictionary writer - other type of writer

for dictwriter we have to provide field names

In [30]:
with open('names.csv', 'r') as f:
    csv_reader = csv.DictReader(f)

    with open('names_new.csv', 'w') as wf:
        fieldnames = ['first_name', 'last_name', 'email'] # we need to specify fieldnames to dictwriter
        
        csv_writer = csv.DictWriter(wf, fieldnames=fieldnames, delimiter='\t')
        csv_writer.writeheader()# to include header specifying column names

        for line in csv_reader:
            csv_writer.writerow(line)

## remove certain column

- don't specify it in fieldnames
- _del_ it while writing 

In [31]:
with open('names.csv', 'r') as f:
    csv_reader = csv.DictReader(f)

    with open('names_new.csv', 'w') as wf:
        fieldnames = ['first_name', 'last_name'] # we need to specify fieldnames to dictwriter
        
        csv_writer = csv.DictWriter(wf, fieldnames=fieldnames, delimiter='\t')
        csv_writer.writeheader()# to include header specifying column names

        for line in csv_reader:
            del line['email']
            csv_writer.writerow(line)