In [15]:
import pandas as pd
import matplotlib
%matplotlib inline 
import string
pd.set_option('display.max_colwidth', -1)

In [16]:
df = pd.read_csv('data/records_bibs.csv', delimiter='|')
publishers = pd.read_csv('data/publishers.csv', delimiter='\t')
publishers = publishers.set_index('affiliation')

In [17]:
publishers.head()

Unnamed: 0_level_0,country,Number
affiliation,Unnamed: 1_level_1,Unnamed: 2_level_1
Routledge Taylor Francis Group,United States,1068
Cambridge University Press,United States,456
Oxford University Press,United States,327
Palgrave Macmillan,United States,182
Sage Publ,United States,160


In [18]:
def clean_data():
    publishers_uppers  = [w.upper() for w in publishers.index.values.tolist()]
    publishers.index  = publishers_uppers
    
    #fixing years
    df.at[257, 'year'] = 2014
    df.at[368, 'year'] = 2012
    df.at[690, 'year'] = 1840
    df.at[851, 'year'] = 2006
    df.at[857, 'year'] = 2013
    df.at[859, 'year'] = 2012
    df.at[863, 'year'] = 2012
    df.at[873, 'year'] = 2000
    df.at[1143, 'year'] = 2000
    df.at[995, 'year'] = 2015
    
    #fixing types
    for index, row in df.iterrows():
        if row['type'] == 'BOOK':
            df.at[index, 'type'] = 'book'
        elif row['type'] == 'incollection' or row['type']=='conference':
            df.at[index, 'type'] = 'inproceedings'
            
    #fixing journals
    for index, row in df.iterrows():
        if type(row['journal']) == float:
            continue
        if row['journal'] == 'EU-US Relations: Repairing the Transatlantic Rift: Kastellorizo Papers':
            df.at[index, 'journal'] = ''
        row['journal'] = row['journal'].replace('\&','&')
        row['journal'] = row['journal'].replace('&','and')
        df.at[index, 'journal'] = row['journal'].title().replace('Of', 'of')
        
    #fixing publishers
    for index, row in df.iterrows():
        publisher = row['publisher']
        if type(publisher) == float:
            continue
        new_publisher = []
        exclude = set(string.punctuation)
        publisher = ''.join(ch for ch in  publisher if ch not in exclude)
        publisher = publisher.upper()
        if 'ROUTLEDGE' in publisher or 'TAYLOR AND FRANCIS' in publisher:
            publisher  = 'ROUTLEDGE TAYLOR FRANCIS GROUP'
        if 'BLACKWELL' in publisher:
            publisher = 'WILEY - BLACKWELL PUBL'
        for w in publisher.split():
            w = w.replace('\&', 'and')
            w = w.replace('&', 'and')
            if w in ['UNIVERSIDAD','UNIVERSITY']:
                w = 'UNIV'
            if w in ['PUBLISHING', 'PUBLISHER', 'PUBLICATIONS', 'PUBLISHERS']:
                w = 'PUBL'
            if w == 'AMER':
                w = 'AMERICAN'
            if w == 'PERGAMONELSEVIER':
                w = 'PERGAMON ELSEVIER'
            if w not in ['INC', 'LTD']:  
                new_publisher.append(w)
        new_publisher = (' '.join(new_publisher)).replace('UNIV', 'UNIVERSITY')   
        df.at[index, 'publisher'] = new_publisher.title().replace('Of', 'of')
        if new_publisher in publishers.index and type(publishers.loc[new_publisher]['country']) != float:
            df.at[index, 'country_publication'] = publishers.loc[new_publisher]['country']
    
    #fixing affiliations
    for index, row in df.iterrows():
        affiliation = row['affiliation']
        try:
            if type(affiliation) == float:
                continue
            new_affiliation = []
            exclude = set(string.punctuation)
            affiliation = ''.join(ch for ch in  affiliation if ch not in exclude)
            affiliation = affiliation.upper()
            for w in affiliation.split():
                w = w.replace('\&', 'AND')
                w = w.replace('&', 'AND')
                if w == 'UNIVERSIDAD' or w == 'UNIVERSITY':
                    w = 'UNIV'
                new_affiliation.append(w)
            new_affiliation = ' '.join(new_affiliation)
            if 'LONDON' in new_affiliation:
                    new_affiliation  = 'UNIV LONDON'
            new_affiliation = new_affiliation.replace('N CAROLINA', 'NORTH CAROLINA')   
            new_affiliation = new_affiliation.replace('UNIV', 'UNIVERSITY')   
            df.at[index, 'affiliation'] = new_affiliation.title().replace('Of', 'of')
        except:
            print(index, new_affiliation)

In [19]:
clean_data()

In [20]:
import csv
df.to_csv('data/cleaned_records_bibs.csv', sep='|')

In [21]:
'THE STATIONERY OFFICE' in publishers.index

False

In [22]:
df

Unnamed: 0,link,country_publication,number-of-cited-references,affiliation,research-areas,publisher,type,volume,keywords-plus,keywords,...,id,categories,issn,affiliation_pais,cited-references,abstract,source,journal,author,title
0,http://play.google.com/books/reader?id=AzXDAwAAQBAJ&hl=&printsec=frontcover&source=public,,,,,The Stationery office,book,,,,...,AzXDAwAAQBAJ,,"['9780108554506', '0108554503']",,,"Negotiations on an historic trade deal between the European Union and the United States are losing momentum and must be revived. The Transatlantic Trade and Investment Partnership, or TTIP, is the most ambitious trade and investment pact ever attempted and an opportunity to revitalise the relationship between Europe and the US. As the EU and US combined account for nearly half of world GDP, the potential gains from such a deal could be substantial - as much as £100bn a year to the EU and £80bn to the US according to some estimates. While getting rid of the remaining tariffs on transatlantic trade is important, some 80 per cent of the potential gains will be derived from the removal of non-tariff barriers, a point of particular significance for SMEs. A successful TTIP will be of benefit to the rest of the world by stimulating world trade in general and encouraging progress on other multilateral trade initiatives, including encouraging China's participation. TTIP should also not be a closed shop and that there should be provision to allow third countries to participate. But without more political impetus from Washington and the big EU member states and without a concerted campaign to make the public in all the countries involved more aware of the potential benefits, the opportunity could be lost. If that happens the EU and the US are unlikely to be in the same position of influence in the world economy next time around",Google Books,Google Books,['Stationery Office (Great Britain)'],HL 179 - The Transatlantic Trade and Investment Partnership
1,http://play.google.com/books/reader?id=qkh1AwAAQBAJ&hl=&printsec=frontcover&source=public,United States,,,,Routledge Taylor Francis Group,book,,,,...,qkh1AwAAQBAJ,,"['9781135010935', '1135010935']",,,"In today’s complex and interconnected world, scholars of international relations seek to better understand challenges spurred by intensified global communication and interaction. The complex connectedness of modern society and politics compels us to investigate the pattern of interconnections among actors who inhabit social and political spaces. Gabriella Paár-Jákli's study aims to advance theory and practice by examining the networks used by specialists in North America and Europe to achieve their policy goals in the area of science and technology. Her book suggests that to overcome policy problems transnationally, three critical factors should be considered. First, as science and technology policy becomes increasingly critical to resolving global issues, it should be regarded as an integral element of the foreign policy process. Second, as liberal international relations theory argues, the increasing role of NGOs must be taken seriously alongside states as vital agents of policy reform. Third, as transatlantic relations remain center to maintaining the global order, they must be reconsidered. Paar-Jakli assesses the role of digital networks as facilitators of regional cooperation. Utilizing various techniques of social network analysis, her research indicates an active and structurally discernible network in cyberspace among transatlantic organizations, and demonstrates the role of virtual networks as facilitators of cooperative arrangements in transatlantic relations. Paár-Jákli's original research uses social network analysis to investigate transatlantic cooperation, a new approach that will be noteworthy to network and transatlantic scholars as well as policymakers.",Google Books,Google Books,['Gabriella Paár-Jákli'],Networked Governance and Transatlantic Relations
2,http://play.google.com/books/reader?id=EWJ2AwAAQBAJ&hl=&printsec=frontcover&source=public,United States,,,,Routledge Taylor Francis Group,book,,,,...,EWJ2AwAAQBAJ,,"['9781135010928', '1135010927']",,,"In today’s complex and interconnected world, scholars of international relations seek to better understand challenges spurred by intensified global communication and interaction. The complex connectedness of modern society and politics compels us to investigate the pattern of interconnections among actors who inhabit social and political spaces. Gabriella Paár-Jákli's study aims to advance theory and practice by examining the networks used by specialists in North America and Europe to achieve their policy goals in the area of science and technology. Her book suggests that to overcome policy problems transnationally, three critical factors should be considered. First, as science and technology policy becomes increasingly critical to resolving global issues, it should be regarded as an integral element of the foreign policy process. Second, as liberal international relations theory argues, the increasing role of NGOs must be taken seriously alongside states as vital agents of policy reform. Third, as transatlantic relations remain center to maintaining the global order, they must be reconsidered. Paar-Jakli assesses the role of digital networks as facilitators of regional cooperation. Utilizing various techniques of social network analysis, her research indicates an active and structurally discernible network in cyberspace among transatlantic organizations, and demonstrates the role of virtual networks as facilitators of cooperative arrangements in transatlantic relations. Paár-Jákli's original research uses social network analysis to investigate transatlantic cooperation, a new approach that will be noteworthy to network and transatlantic scholars as well as policymakers.",Google Books,Google Books,['Gabriella Paar-Jakli'],Networked Governance and Transatlantic Relations
3,http://play.google.com/books/reader?id=hwaMAwAAQBAJ&hl=&printsec=frontcover&source=public,United States,,,,University of Alabama Press,book,,,,...,hwaMAwAAQBAJ,,"['9780817357788', '0817357785']",,,"A Confluence of Transatlantic Networks: Elites, Capitalism, and Confederate Migration to Brazil is a study in Atlantic world history that examines the qualitative nature of capitalism's processes through the lens of social networks.",Google Books,Google Books,['Laura Jarnagin'],A Confluence of Transatlantic Networks
4,http://play.google.com/books/reader?id=JrU5AwAAQBAJ&hl=&printsec=frontcover&source=public,United States,,,,Springer,book,,,,...,JrU5AwAAQBAJ,,"['9781137345752', '1137345756']",,,"Beliefs held by US and European elites about unregulated markets and a currency union without fiscal union led to a transatlantic crisis unmatched in severity since the Great Depression. Leading scholars of elites analyze how elites have responded to the crisis, are altered by it and what this 'hour of elites' means for democracy.",Google Books,Google Books,"['H. Best', 'J. Higley']",Political Elites in the Transatlantic Crisis
5,http://play.google.com/books/reader?id=Z8LJBAAAQBAJ&hl=&printsec=frontcover&source=public,Germany,,,,Transcript Verlag,book,,,,...,Z8LJBAAAQBAJ,,"['9783839422731', '3839422736']",,,"From Josephine Baker's performances in the 1920s to the 1970s solidarity campaigns for Angela Davis, from Audre Lorde as »mother« of the Afro-German movement in the 1980s to the literary stardom of 1993 Nobel Laureate Toni Morrison, Germans have actively engaged with African American women's art and activism throughout the 20th century. The discursive strategies that have shaped the (West) German reactions to African American women's social activism and cultural work are examined in this study, which proposes not only a nuanced understanding of »African Americanizations« as a form of cultural exchange but also sheds new light on the role of African American culture for (West) German society, culture, and national identity.",Google Books,Google Books,['Katharina Gerund'],Transatlantic Cultural Exchange
6,http://play.google.com/books/reader?id=gkyAAgAAQBAJ&hl=&printsec=frontcover&source=public,United States,,,,Suny Press,book,,,,...,gkyAAgAAQBAJ,,"['9781438450254', '1438450257']",,,"Tracks the influence of Italian cinema on American film from the postwar period to the present. In The Transatlantic Gaze, Mary Ann McDonald Carolan documents the sustained and profound artistic impact of Italian directors, actors, and screenwriters on American film. Working across a variety of genres, including neorealism, comedy, the Western, and the art film, Carolan explores how and why American directors from Woody Allen to Quentin Tarantino have adapted certain Italian trademark techniques and motifs. Allen’s To Rome with Love (2012), for example, is an homage to the genius of Italian filmmakers, and to Federico Fellini in particular, whose Lo sceicco bianco/The White Sheik (1952) also resonates with Allen’s The Purple Rose of Cairo (1985) as well as with Neil LaBute’s Nurse Betty (2000). Tarantino’s Kill Bill saga (2003, 2004) plays off elements of Sergio Leone’s spaghetti Western C’era una volta il West/Once Upon a Time in the West (1968), a transatlantic conversation about the Western that continues in Tarantino’s Oscar-winning Django Unchained (2012). Lee Daniels’s Precious (2009) and Spike Lee’s Miracle at St. Anna (2008), meanwhile, demonstrate that the neorealism of Roberto Rossellini and Vittorio De Sica, which arose from the political and economic exigencies of postwar Italy, is an effective vehicle for critiquing social issues such as poverty and racism in a contemporary American context. The book concludes with an examination of American remakes of popular Italian films, a comparison that offers insight into the similarities and differences between the two cultures and the transformations in genre, both subtle and obvious, that underlie this form of cross-cultural exchange.",Google Books,Google Books,['Mary Ann McDonald Carolan'],The Transatlantic Gaze
7,http://play.google.com/books/reader?id=6tJTAgAAQBAJ&hl=&printsec=frontcover&source=public,United States,,,,Suny Press,book,,,,...,6tJTAgAAQBAJ,,"['9781438450261', '1438450265']",,,"Tracks the influence of Italian cinema on American film from the postwar period to the present. In The Transatlantic Gaze, Mary Ann McDonald Carolan documents the sustained and profound artistic impact of Italian directors, actors, and screenwriters on American film. Working across a variety of genres, including neorealism, comedy, the Western, and the art film, Carolan explores how and why American directors from Woody Allen to Quentin Tarantino have adapted certain Italian trademark techniques and motifs. Allen’s To Rome with Love (2012), for example, is an homage to the genius of Italian filmmakers, and to Federico Fellini in particular, whose Lo sceicco bianco/The White Sheik (1952) also resonates with Allen’s The Purple Rose of Cairo (1985) as well as with Neil LaBute’s Nurse Betty (2000). Tarantino’s Kill Bill saga (2003, 2004) plays off elements of Sergio Leone’s spaghetti Western C’era una volta il West/Once Upon a Time in the West (1968), a transatlantic conversation about the Western that continues in Tarantino’s Oscar-winning Django Unchained (2012). Lee Daniels’s Precious (2009) and Spike Lee’s Miracle at St. Anna (2008), meanwhile, demonstrate that the neorealism of Roberto Rossellini and Vittorio De Sica, which arose from the political and economic exigencies of postwar Italy, is an effective vehicle for critiquing social issues such as poverty and racism in a contemporary American context. The book concludes with an examination of American remakes of popular Italian films, a comparison that offers insight into the similarities and differences between the two cultures and the transformations in genre, both subtle and obvious, that underlie this form of cross-cultural exchange.",Google Books,Google Books,['Mary Ann McDonald Carolan'],"Transatlantic Gaze, The"
8,http://play.google.com/books/reader?id=2zyeAgAAQBAJ&hl=&printsec=frontcover&source=public,United Kingdom,,,,Oup Oxford,book,,,,...,2zyeAgAAQBAJ,,"['9780191511264', '0191511269']",,,"William James and the Transatlantic Conversation focuses on the American philosopher and psychologist William James (1842-1910) and his engagements with European thought, together with the multidisciplinary reception of his work on both sides of the Atlantic since his death. James's encounters with European thinkers and ideas ran throughout his early life and across his distinguished international career, in which he participated in a number of transatlantic conversations in science, philosophy, psychology, religion, ethics, and literature. This volume explores and extends these conversations by drawing together twelve scholars from a range of disciplines on both sides of the Atlantic to assess James's work in all its variety, to trace his multidisciplinary reception across the twentieth century, and to evaluate his legacy in the twenty-first century. The first half of the book considers James's many intellectual influences and the second half focuses on A Pluralistic Universe (1909), the published text of his 1908 Hibbert Lectures at Oxford University, as a key text for assessing James's transatlantic conversations. The pluralistic transatlantic currents addressed in the first part of the volume enable a fuller understanding of James's philosophy of pluralism that forms the explicit focus for the second part. Taken as a collection, the volume is unique in scholarship on James in generating transatlantic, interdisciplinary, and cross-generational dialogues, and it repositions James as an important international thinker and arguably the most distinctive American intellectual figure of the nineteenth and twentieth centuries.",Google Books,Google Books,"['Martin Halliwell', 'Joel D. S. Rasmussen']",William James and the Transatlantic Conversation
9,http://play.google.com/books/reader?id=95yaAgAAQBAJ&hl=&printsec=frontcover&source=public,Germany,,,,Internet Gesellschaft Collaboratory,book,,,,...,95yaAgAAQBAJ,,"['9783000446481', '3000446486']",,,"Now available online: The Transatlantic Colossus: Global Contributions to Broaden the Debate on the EU-US Free Trade Agreement (2014), a publication from the Berlin Forum on Global Politics (BFoGP) in collaboration with the Internet & Society Collaboratory and FutureChallenges.org of the Bertelsmann Stiftung. The free trade agreement (TAFTA | TTIP) currently being negotiated between the United States and the European Union has the potential to significantly impact the lives of people on both sides of the Atlantic and across the world. Because it is crucial to broaden the debate on this topic of global importance, the Berlin Forum on Global Politics decided to send out an international call for papers in order to collect a strong plurality of views on TAFTA | TTIP as part of the Collaboratory's Initiative on ""Globalization and the Internet"". The result is an open knowledge publication, freely accessible under its Creative Commons (BY) license, which includes 22 articles written by a multitude of well-informed global stakeholders, members of civil society, academia, think tanks, consumer and activist groups, and business organizations.",Google Books,Google Books,"['Daniel Cardoso, Philani Mthembu, Marc Venhaus, Miguelángel Verde Garrido']",The Transatlantic Colossus
