In [6]:
import xml.etree.ElementTree as ET
import pandas as pd
import glob

In [7]:
def get_filing_data(filing):
    filing_data = dict(filing.attrib)
    for child in filing:
        if child.tag == 'Registrant' or child.tag == 'Client':
            filing_data.update(child.attrib)
        elif child.tag == 'GovernmentEntities':
            filing_data['GovernmentEntities'] = [entity.attrib['GovEntityName'] for entity in child]
    return filing_data

def get_dataframe(filename):
    tree = ET.parse(filename)
    filings = [get_filing_data(filing) for filing in tree.getroot()]
    df = pd.DataFrame.from_records(filings, index='ID')
    df['Received'] = pd.to_datetime(df['Received'], format='%Y-%m-%dT%H:%M:%S.%f')
    del tree
    return df

In [8]:
df = pd.concat([get_dataframe(f) for f in glob.glob('*.xml')])
df

Unnamed: 0_level_0,Address,AffiliatedOrgsURL,Amount,ClientCountry,ClientID,ClientName,ClientPPBCountry,ClientPPBState,ClientState,ContactFullname,...,Received,RegistrantCountry,RegistrantID,RegistrantName,RegistrantPPBCountry,RegistrationEffectiveDate,SelfFiler,TerminationEffectiveDate,Type,Year
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5DD13201-9246-4282-B294-31E1A5FD45DC,"3259 Nebo Road\r\nBoulder, CO 80302",,,UNDETERMINED,1000610,FIBERFORGE INC,UNDETERMINED,,,RICHARD E BRADSHAW,...,2008-07-09 22:37:28,USA,36812,"Stirling Strategic Services, LLC",USA,,FALSE,,SECOND QUARTER (NO ACTIVITY),2008
844C6943-B298-42AE-9A24-1CD6064A00CE,"525 Second Street, NE\r\nWashington, DC 20002",,40000,USA,277,Mass Development,USA,MASSACHUSETTS,MASSACHUSETTS,Steven Wolfe,...,2008-07-09 22:39:29,USA,53584,Steven Wolfe Associates,USA,,FALSE,,YEAR-END REPORT,2006
B7537D89-C57E-4833-83FF-F78F7563E58B,"3259 Nebo Road\r\nBoulder, CO 80302",,,UNDETERMINED,280,ADVANCING ENERGY TECHNOLOGIES,UNDETERMINED,,,RICHARD E BRADSHAW,...,2008-07-09 22:40:30,USA,36812,"Stirling Strategic Services, LLC",USA,,FALSE,,SECOND QUARTER REPORT,2008
20281ABA-FB79-4D64-96C5-E5C8DD07C1C7,"3259 Nebo Road\r\nBoulder, CO 80302",,,UNDETERMINED,253,Barrier Wear,UNDETERMINED,,,Richard E Bradshaw,...,2008-07-09 22:42:31,USA,36812,"Stirling Strategic Services, LLC",USA,,FALSE,,SECOND QUARTER TERMINATION,2008
55237ADC-4779-4468-B213-5E8AF5360BCC,"525 Second Street, NE\r\nWashington, DC 20002",,40000,USA,277,Mass Development,USA,MASSACHUSETTS,MASSACHUSETTS,Steven Wolfe,...,2008-07-09 22:44:32,USA,53584,Steven Wolfe Associates,USA,,FALSE,,MID-YEAR REPORT,2007
AF42351A-F798-4779-98FA-047D27E1B9E2,"525 Second Street, NE\r\nWashington, DC 20002",,40000,USA,277,Mass Development,USA,MASSACHUSETTS,MASSACHUSETTS,Steven Wolfe,...,2008-07-09 22:48:33,USA,53584,Steven Wolfe Associates,USA,,FALSE,,YEAR-END REPORT,2007
40BE03F5-3983-404B-8A5F-D49091C199EC,"3259 Nebo Road\r\nBoulder, CO 80302",,,UNDETERMINED,315,FIDELITY ENGINEERING INC,UNDETERMINED,,,RICHARD E BRADSHAW,...,2008-07-09 22:51:34,USA,36812,"Stirling Strategic Services, LLC",USA,,FALSE,,SECOND QUARTER REPORT,2008
5DA6F75E-3B20-4F83-89EF-FF8C347E53EA,"3259 Nebo Road\r\nBoulder, CO 80302",,10000,UNDETERMINED,1000607,FLAGSHIP ENTERPRISE CENTER,UNDETERMINED,,,RICHARD BRADSHAW,...,2008-07-09 23:00:34,USA,36812,"Stirling Strategic Services, LLC",USA,,FALSE,,SECOND QUARTER REPORT,2008
6308EC22-DC07-4A95-AA46-B294493D51DC,"525 Second Street, NE\r\nWashington, DC 20002",,20000,USA,277,Mass Development,USA,MASSACHUSETTS,MASSACHUSETTS,Steven Wolfe,...,2008-07-09 23:02:35,USA,53584,Steven Wolfe Associates,USA,,FALSE,,FIRST QUARTER REPORT,2008
9F2D20FF-6C7E-4F29-B600-C4F22C2E30F9,"3259 Nebo Road\r\nBoulder, CO 80302",,,USA,1003275,INANOVATION LLC,USA,,TEXAS,RICHARD E BRADSHAW,...,2008-07-09 23:08:36,USA,36812,"Stirling Strategic Services, LLC",USA,,FALSE,,SECOND QUARTER REPORT,2008


In [17]:
df.to_csv('my.senate.csv')