#你的任务是逐行阅读输入 DATAFILE 文件，对于前 10 行（不包括标题），
#根据“,”拆分每行，然后为每行创建一个字典，
#键是字段的标题，值是该字段在该行中的值。
#函数 parse_file 应该返回一个字典列表，
#文件中的每个数据行是一个列表项。
#字段名称和值不应该包含多余的空白，例如空格或换行字符。
#你可以使用 Python 字符串方法strip() 删除多余的空白。
#对于这道练习，你只需解析前 10 行数据，
#所以返回的列表应该有 10 项！

In [10]:
import os
import pprint
import csv

DATADIR = ""
DATAFILE = "beatles-diskography.csv"

def parse_file(datafile):
    
    data = []
    
    with open(datafile,"r") as f:
        header = f.readline().split(",")
        #split 返回切分后字符串列表
        #调用readline()可以每次读取一行内容 read（）读取文件
        #this will give us a list of values that
        #we can use as keys!! for each one of the data item
        counter = 0
        
        for line in f:
            if counter == 10:
                break
                
            fields = line.split(",") #fields变成一个列表
            entry = {} 
            
            for i, value in enumerate(fields):
            #对于一个可迭代的（iterable）/可遍历的对象
            #（如列表、字符串），enumerate将其组成一个索引序列
            #利用它可以同时获得索引和值
                entry[header[i].strip()] = value.strip()  #字典赋值
                #strip移除【字符串】前后的字符或空格extraneous whitespace
            data.append(entry) #一组信息是一个字典每个字典加入data
            counter+=1
            
    return data 

def test():
    datafile = os.path.join(DATADIR, DATAFILE)
    d = parse_file(datafile)
    firstline = {'Title': 'Please Please Me', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '22 March 1963', 'US Chart Position': '-', 'RIAA Certification': 'Platinum', 'BPI Certification': 'Gold'}
    tenthline = {'Title': '', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '10 July 1964', 'US Chart Position': '-', 'RIAA Certification': '', 'BPI Certification': 'Gold'}

    assert d[0] == firstline
    assert d[9] == tenthline
    print (d)

In [11]:
test()

[{'Title': 'Please Please Me', 'Released': '22 March 1963', 'Label': 'Parlophone(UK)', 'UK Chart Position': '1', 'US Chart Position': '-', 'BPI Certification': 'Gold', 'RIAA Certification': 'Platinum'}, {'Title': 'With the Beatles', 'Released': '22 November 1963', 'Label': 'Parlophone(UK)', 'UK Chart Position': '1', 'US Chart Position': '-', 'BPI Certification': 'Platinum', 'RIAA Certification': 'Gold'}, {'Title': 'Beatlemania! With the Beatles', 'Released': '25 November 1963', 'Label': 'Capitol(CAN)', 'UK Chart Position': '-', 'US Chart Position': '-', 'BPI Certification': '', 'RIAA Certification': ''}, {'Title': 'Introducing... The Beatles', 'Released': '10 January 1964', 'Label': 'Vee-Jay(US)', 'UK Chart Position': '-', 'US Chart Position': '2', 'BPI Certification': '', 'RIAA Certification': ''}, {'Title': 'Meet the Beatles!', 'Released': '20 January 1964', 'Label': 'Capitol(US)', 'UK Chart Position': '-', 'US Chart Position': '1', 'BPI Certification': '', 'RIAA Certification': '5xP

In [3]:
import os
import pprint
import csv

DATADIR = ""
DATAFILE = "beatles-diskography.csv"

def parse_csv(datafile):
    
    data = []
    n = 0
    
    with open(datafile,"r") as sd:
        r = csv.DictReader(sd)
        #read all of our data into dictionaries
        # assume first row is header row
        # the names are fields
        # create [dictionary] for each row
        for line in r:
            data.append(line) #每行是一个字典每个字典加入data 
    return data 

if __name__ == '__main__':
    datafile = os.path.join(DATADIR, DATAFILE)
    #parse_csv(datafile)   
    d = parse_csv(datafile)
    firstline = {'Title': 'Please Please Me', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '22 March 1963', 'US Chart Position': '-', 'RIAA Certification': 'Platinum', 'BPI Certification': 'Gold'}
    tenthline = {'Title': '', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '10 July 1964', 'US Chart Position': '-', 'RIAA Certification': '', 'BPI Certification': 'Gold'}

    assert d[0] == firstline
    assert d[9] == tenthline
    pprint.pprint(d) ##打印得更整齐

[OrderedDict([('Title', 'Please Please Me'),
              ('Released', '22 March 1963'),
              ('Label', 'Parlophone(UK)'),
              ('UK Chart Position', '1'),
              ('US Chart Position', '-'),
              ('BPI Certification', 'Gold'),
              ('RIAA Certification', 'Platinum')]),
 OrderedDict([('Title', 'With the Beatles'),
              ('Released', '22 November 1963'),
              ('Label', 'Parlophone(UK)'),
              ('UK Chart Position', '1'),
              ('US Chart Position', '-'),
              ('BPI Certification', 'Platinum'),
              ('RIAA Certification', 'Gold')]),
 OrderedDict([('Title', 'Beatlemania! With the Beatles'),
              ('Released', '25 November 1963'),
              ('Label', 'Capitol(CAN)'),
              ('UK Chart Position', '-'),
              ('US Chart Position', '-'),
              ('BPI Certification', ''),
              ('RIAA Certification', '')]),
 OrderedDict([('Title', 'Introducing... The Beatles')