# h2_ENB_gen
Find all interactions in the original dataset for one issue or a range of issues.

In [1]:
import urllib.request
import re
from bs4 import BeautifulSoup
from dateutil.parser import parse
import csv
import pandas as pd
import requests
import nltk
import itertools
import collections
from collections import Counter
import import_ipynb
import c1_extract_text as c1 
import a1_tools as tools
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

importing Jupyter notebook from c1_extract_text.ipynb
importing Jupyter notebook from a1_tools.ipynb
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/tatianacogne/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/tatianacogne/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [2]:
DICTIONARY, DICTIONARY_NUM, NAMES = tools.create_dict()

In [3]:
ENTITIES = [s.replace('\n','') for s in list(open('Files/entities_interactions.txt'))]
SUPPORTS_LINKS = ['with','and','for the','on behalf of the','supported by','speaking for the','for several']
OPPOSITION_LINKS= ['opposed by','while','opposed by the']
LIST_TAGS = ['IN', 'CC', 'NN', 'NNP', 'JJ','NNPS','MD','VBP','VB','VBZ','VBD','RB','VBN','PRP', 'NNS']
PARTY_GROUPINGS = sorted(set([s.replace('\n','').upper() for s in list(open('Files/party_groupings.txt'))] + [s.replace('\n','').title() for s in list(open('Files/party_groupings.txt'))] + [s.replace('\n','') for s in list(open('Files/party_groupings.txt'))]))
PARTIES = sorted(set(ENTITIES).difference(set(PARTY_GROUPINGS)))

In [4]:
def extract_s2e_issue_number(issue_number):
    """ Extract all the sentence with at least one party inside. """
    #List sentences
    sentences = c1.extract_sentences_for_one_issue(issue_number)
    
    list_entities = ENTITIES
    #Create list that wil contain all the sentences with at least two entities
    sentences_s2 = []

    for i, s in enumerate(sentences):
    #Split line into words with tokenizer to detect entity
        tokens = tools.tokenize_sentence(s,False)

        if(len(set(tokens).intersection(set(ENTITIES)))> 1):
            sentences_s2.append(s)

    return sentences_s2

In [5]:
def find_pos_tagged_s2e(list_s2e):
    """ Use NLTK to pos_tag all the sentences from list_s2e and return a list of all the sentences pos_tagged"""
    pos_tagged = []
    for s in list_s2e:
        s = s.replace('\\t','')
        s = re.sub(r'\([^)]*\)', '', s)
        tokens = tools.tokenize_sentence(s, False)
        pos_tagged.append(nltk.pos_tag(tokens))  
   
    return pos_tagged

In [6]:
def find_patterns(pos_tagged, list_tags):  
    """ Find all the pattern in list_tags needed in sentences pos_tagged. """
    groups = [x[0] for x in pos_tagged[0] if x[1] in list_tags]

    return groups

In [7]:
def find_1g(groups): 
    """ Find all the entities in the groups. Return a list of entities"""
    groups = [g.replace(',','') for g in groups]
    entities = set(groups).intersection(set(ENTITIES))
    return list(entities)

In [8]:
def find_2g(groups, opp):
    """ Find all the entities for each groups. Return two lists of entities"""

    index = groups.index(opp)

    g1 = find_1g(groups[:index])
    g2 = find_1g(groups[index +1:])

    return g1, g2

In [9]:
def detect_groups_cooperations(groups):
    """ Return one or two groups with only entities and the original sentence. Return a list with one or two list"""

    # Case 1 : Opposition between two groups
    if(set(OPPOSITION_LINKS).intersection(set(groups)) != set()):
        opp = list(set(OPPOSITION_LINKS).intersection(set(groups)))[0]
        
        g1, g2 = find_2g(groups, opp)
        return [g1, g2], [opp]

    # Case 2 : Only support
    else:
        g1 = find_1g(groups)
        return [g1], []

In [10]:
def find_coalitions(groups, sentence, opposition_index):
    """ Remove all the parties in groups that speak for a coalition, return list group updated if the case. """
    group_updates = []

    new_tokens = []

    for group in groups: 
        set_group = set(group)

        truples_c = []
        token =tools.tokenize_sentence(sentence,False)

        links = ['for the','for several','on behalf of the','speaking for the','on behalf of','for','speaking for','for a number of members of the', 'speaking on behalf of the']

        if(set(token).intersection(set(links)) != set()):

            for i in range(len(token)-2):

                if(token[i] in group and token[i+1] in links and token[i+2] in PARTY_GROUPINGS):

                    set_group.remove(token[i])

                    if(len(opposition_index) !=0):
                        s = sentence[:opposition_index[0]]
                        v = sentence[opposition_index[0]:]
                        s = s.replace(token[i+1],'').replace(token[i],'')
                        u = ' '
                        sentence = u.join([s,v])
                    else:
                        sentence = sentence.replace(token[i+1],'').replace(token[i],'')
                        

        group_updates.append(list(set_group))
        
    return group_updates, sentence

In [11]:
def remove_from_concern_of(groups, sentence):
    """ Filter that remove all entities when they are mentioned with the pattern from or concern of. """
    group_updates = []

    for group in groups: 
        truples_c = []
        token = tools.tokenize_sentence(sentence,False)

        links = ['from','from the','concerns of the','concern of']


        if(set(token).intersection(set(links)) != set()):
            
            for i in range(len(token)-2):
                if((token[i+1] in PARTIES or token[i+1] in PARTY_GROUPINGS) and token[i] in links):
                    if(token[i+2] in PARTIES or token[i+2] in PARTY_GROUPINGS):
                        
                        group = [g for g in group if g != token[i+2]]
                        sentence = sentence.replace(token[i+2],'')
                    group = [g for g in group if g != token[i+1]]

                    sentence = sentence.replace(token[i+1],'')

        group_updates.append(group)
        
    
    return group_updates, sentence


In [12]:
def behalf_of(sentence, group_cooperation, link):
    """Function that find all the interaction of type "behalf". """
    token =tools.tokenize_sentence(sentence, True)
    index = token.index(list(link)[0])

    country_A = set(token[:index]).intersection(set(group_cooperation))

    countries_B = set(group_cooperation).difference(set(country_A))
    tuples = []  
    for x in countries_B:

        tuples.append((list(country_A)[0].upper(),x,['behalf','cooperation']))
        
    tuples += tools.rSubset(list(countries_B),['agreement','cooperation'])

    return sorted(tuples)


In [13]:
def supported_by(sentence, group_cooperation, link ):
    """Function that find all the interaction of type "support". """
    token = tools.tokenize_sentence(sentence, True)
    index = token.index(list(link)[0])

    country_A = set(token[:index]).intersection(set(group_cooperation))
    countries_B = set(group_cooperation).difference(set(country_A))
    tuples = []  

    for x in countries_B:
        
        tuples.append((x,list(country_A)[0].upper(),['support','cooperation']))
        
    tuples += tools.rSubset(list(countries_B),['agreement','cooperation'])

    return sorted(tuples)

In [14]:
def check_link(link, sentence, group_cooperation):
    """Verify that there is entities in both sides of the link"""
    token =tools.tokenize_sentence(sentence, True)

    index = token.index(list(link)[0])
    country_A = set(token[:index]).intersection(set(group_cooperation))
    country_B = set(token[index:]).intersection(set(group_cooperation))
    return country_A != set() and country_B != set()

In [15]:
def coop(sentence, group_cooperation):
    """Function that find all the interaction of type "cooperation" and classify them. """
    
    token =tools.tokenize_sentence(sentence, True)

    behalf = ['speaking for','on behalf of','speaking for the ','on behalf of the']
    behalf = set(token).intersection(set(behalf))

    support = ['supported by','supported by the']
    support = set(token).intersection(set(support))


    tuples = []
    cooperation = []
    if(behalf != set()and check_link(behalf, sentence, group_cooperation)):
        return behalf_of(sentence, group_cooperation, behalf)
    else: 
        if(support != set() and  check_link(support, sentence, group_cooperation)):
            return supported_by(sentence, group_cooperation, support)

        else :
            return tools.rSubset(group_cooperation,['agreement','cooperation'])

In [16]:
def opposed_by(sentence, group_cooperation, link):
    """ Function that find all the interactions of type "opposition" ."""
    token =tools.tokenize_sentence(sentence, True)
    index = token.index(list(link)[0])
    countries_A = group_cooperation[0]
    countries_B = group_cooperation[1]

    # Create tuples for the opposition
    tuples = list(itertools.product(countries_A, countries_B))

    tuples = [(c2.upper(),c1.upper(),['opposition']) for c1,c2 in tuples] 



    #Add cooperation between both groups
    splited = sentence.split(list(link)[0])
    if(len(countries_A)!=1):
        tuples_A = coop([splited[0]], countries_A)
        tuples += tuples_A
    
    if(len(countries_B)!=1):
        tuples_B = coop([splited[1]], countries_B)
        tuples += tuples_B
    
    return tuples

In [17]:
def criticized_by(sentence, group_cooperation, link):
    """ Function that find all the interactions of type "criticism" ."""
    token =tools.tokenize_sentence(sentence, True)
    index = token.index(list(link)[0])
    countries_A = list(set(group_cooperation[0]).intersection(set(token[:index])))
    countries_B = list(set(group_cooperation[0]).intersection(set(token[index+1:])))
    
    # Create tuples for the opposition
    tuples = list(itertools.product(countries_A, countries_B))
    tuples = [(c1.upper(),c2.upper(),['criticism']) for c1,c2 in tuples]

    #Add cooperation between both groups
    splited = sentence[0].split(list(link)[0])
    
    if(len(countries_A)!=1):
        tuples_A = coop(splited[0], countries_A)
        tuples += tuples_A
    
    if(len(countries_B)!=1):
        tuples_B = coop(splited[1], countries_B)
        tuples += tuples_B
    
    return tuples 

In [18]:
def remove_double_s(sentences):
    """ Filter that try to find if one sentence contain two entities but they are not related. """
    tags_wanted = ['VBD','MD']
    words_wanted = ['and']
    s2 = []
    sentences_filtered = []
    s_to_filter = []
    s_filtered = []
    set_sentences = set(sentences)
    for s in sentences:
        pos_tagged = find_pos_tagged_s2e([s])

        
        
        filtered = [x for x in pos_tagged[0] if x[0] in ENTITIES or x[0] in words_wanted or x[1] in tags_wanted]
        filtered_only_VBD = [x for x in pos_tagged[0] if x[0] in PARTIES or x[0] in words_wanted or x[1]== 'VBD']


        if(len(filtered_only_VBD)>= 5):
            for i in range(len(filtered_only_VBD)-4):
                if( filtered_only_VBD[i][0] in ENTITIES and filtered_only_VBD[i+1][1]=='VBD' and filtered_only_VBD[i+2][0] == 'and' and  filtered_only_VBD[i+3][0] in PARTIES and  filtered_only_VBD[i+4][1]in tags_wanted and s in set_sentences):

                    s_to_filter.append((s,filtered_only_VBD[i+1]))
                    set_sentences.remove(s)

        if(len(filtered)>= 5):
            for i in range(len(filtered)-4):
                if( filtered[i][0] in ENTITIES and filtered[i+1][1] in tags_wanted and filtered[i+2][0] == 'and' and  filtered[i+3][0] in ENTITIES and  filtered[i+4][1]in tags_wanted and s in set_sentences):

                    s_to_filter.append((s,filtered[i+1]))
                    set_sentences.remove(s)

    sentences = list(set_sentences) 
    for s in s_to_filter:

        index = s[0].index(s[1][0])
        s1 = s[0][:index]
        s2 = s[0][index+1:]
        sentences.append(s1)
        sentences.append(s2)
    
    return list(set(sentences))

In [19]:
def remove_on_99s_from_programme(sentences):
    """ Filter that remove entities that are related to some patterns that are not interactions. """
    set_sentences = set(sentences)
    s_to_filter = []
    s_filtered = []
    note = ['92s','Programme','proposed by the','proposed by']
    for s in sentences:

        pos_tagged = find_pos_tagged_s2e([s])[0]

      
        for i in range(len(pos_tagged)-1):

            if(pos_tagged[i][0] == 'on' and pos_tagged[i+1][0] in ENTITIES and s in set_sentences):

                s_to_filter.append((s,pos_tagged[i+1][0]))
                set_sentences.remove(s)

            if((pos_tagged[i+1][0] == '92s' or pos_tagged[i+1][0] == 'Programme') and pos_tagged[i][0] in ENTITIES and s in set_sentences):

                s_to_filter.append((s,pos_tagged[i][0]))
                set_sentences.remove(s)
            
            if((pos_tagged[i][0] == 'proposed by the' or pos_tagged[i+1][0] == 'proposed by') and pos_tagged[i+1][0] in ENTITIES and s in set_sentences):
  
                s_to_filter.append((s.replace (pos_tagged[i+1][0],''),pos_tagged[i+1][0]))
                set_sentences.remove(s)


    x = ' '
    for s in s_to_filter:
        token = tools.tokenize_sentence(s[0],False)
        tokens = []
        for i in range(len(token)-1):
            if(token[i] not in note and  not (token[i+1]==s[1])):
                tokens.append(token[i])

        #s_f = s[0].replace(s[1],'')
        s_f = x.join(tokens)
        s_filtered.append(s_f)

 
    s_filtered += list(set_sentences)
    
    return list(set(s_filtered))

In [20]:
def check_doubles(sentence, group_cooperation):
    """ Filter that find if one entity is mentioned twice in two different manner and we should not count and interaction between them. """
    gc_new = []
    
    for g in group_cooperation:

        g_upper = [e.upper() for e in g]
        g_title = [e.title() for e in g]
        if(len(set(g_upper)) != len(g)):
            gc_new.append(list(set(g).difference(set(g_title))))
            removed = list(set(g).intersection(set(g_title)))
            for r in removed:
                sentence = sentence.replace(r,'')
        else:
            gc_new.append(g)
    return gc_new, sentence

In [21]:
def find_inversions(sentences):
    """ Filter tht try to detect sentence that have been inverted (verb - entity - entity) and change it to be able to detect the interaction. """
    set_sentences = set(sentences)

    s_to_filter = []
    s_filtered = []
    verbs_int =['Supported','Opposed']
    for s in sentences:
        # Inversion by starting by the verb
        pos_tagged = find_pos_tagged_s2e([s])[0]
        words = [s[0] for s in pos_tagged]


        for i in range(len(pos_tagged)-5):
            # verb_int + entities (no the) + entites (not the)
            if(words[i] in verbs_int and words[i+1] == 'by' and words[i+2] in ENTITIES and words[i+3] in ENTITIES):
                supporters = words[i+3]
                x = ' '
                reverse_s = words[i+2] + " " + words[i].lower() + " by" + ' ' + supporters
                s_filtered.append(reverse_s)
                set_sentences.remove(s)
            
            # verb_int + the +entities + entites (not the)
            if(words[i] in verbs_int and words[i+1] == 'by' and words[i+2] == 'the' and words[i+3] in ENTITIES and words[i+4] in ENTITIES):
                supporters = words[i+4]
                x = ' '
                reverse_s = words[i+3] + " " + words[i].lower() + " by" + ' ' + supporters
                s_filtered.append(reverse_s)
                set_sentences.remove(s)

            # verb_int + entities (not the) + the + entites 
            if(words[i] in verbs_int and words[i+1] == 'by' and words[i+2] in ENTITIES and words[i+3] == 'the' and words[i+4] in ENTITIES):
                supporters = words[i+4]
                x = ' '
                reverse_s = words[i+2] + " " + words[i].lower() + " by" + ' ' + supporters
                s_filtered.append(reverse_s)
                set_sentences.remove(s)
            
            # verb_int + the + entities + the + entites 
            if(words[i] in verbs_int and words[i+1] == 'by' and words[i+2] == 'the' and words[i+3] in ENTITIES and words[i+4] == 'the' and words[i+5] in ENTITIES):
                supporters = words[i+5]
                x = ' '
                reverse_s = words[i+3] + " " + words[i].lower() + " by" + ' ' + supporters
                s_filtered.append(reverse_s)
                set_sentences.remove(s)
        
        # Inversion in the middle of the sentence
        for i in range(len(words)-3):

            if(words[i] == 'supported' and words[i+1]== 'the' and words[i+2] in ENTITIES):
                supporters = list(set(ENTITIES).intersection(set(list(words)[:i])))
                x = ' '
                reverse_s = words[i+2] + " " + 'supported by' + ' ' + x.join(w for w in supporters)
                s_filtered.append(reverse_s)
                set_sentences.remove(s)
            
            if(words[i] == 'supported' and words[i+1] in ENTITIES):
                supporters = list(set(ENTITIES).intersection(set(list(words)[:i])))
                x = ' '
                reverse_s = words[i+1] + " " + 'supported by' + ' ' + x.join(w for w in supporters)
                s_filtered.append(reverse_s)
                set_sentences.remove(s)
 
 
    s_filtered += list(set_sentences)

    return list(set(s_filtered))


In [22]:
def check_cooperation(sentence, entities):
    index = []
    words = tools.tokenize_sentence(sentence, True)

    for a in entities:
        index.append(words.index(a))
    index.sort()
    return index[1]== 1 + index[0] or index[1] == index[0] + 2

In [23]:
def check_interaction(group_cooperation):
    """ Function that return if there is or not an interaction. """
    return len(group_cooperation) == 2 or len(group_cooperation[0])>1

In [24]:
def remove_representent(pos_tagged):
    """ Remove all the entities mentioned when it is related to a presentant and so not a interaction. """
    entities_repr = []

    for i in range(len(pos_tagged)-2):
        if(pos_tagged[i][0] in ENTITIES and pos_tagged[i+1][0] == 'for' and pos_tagged[i+2][0] in PARTY_GROUPINGS):
            entities_repr.append(pos_tagged[i])
            entities_repr.append(pos_tagged[i+1])
        if(pos_tagged[i][0] in ENTITIES and pos_tagged[i+1][0] == 'for the' and pos_tagged[i+2][0] in PARTY_GROUPINGS):
            entities_repr.append(pos_tagged[i])
            entities_repr.append(pos_tagged[i+1])

    y = [[g for g in pos_tagged if g not in entities_repr]]
    x = ' '
    sentences_updated = x.join(word[0] for word in y[0])

    return y, sentences_updated
    

In [25]:
def write_line(issue_number, x):
    """ Function that help to whrite a line wit all the interactions. """
    """  'behalf'	 'support'	 'spokewith'	 'agreement'	 'delay'	 'opposition'	 'criticism'	 'cooperation'"""
    dict_interactions = {'behalf' : 0, 'support' : 0, 'agreement':0 , 'opposition':0,'criticism' :0, 'cooperation':0}
    for int in x[2]:
        dict_interactions[int] = 1
    values = list(dict_interactions.values())

    v = []
    v.append(issue_number)
    v.append(x[0])
    v.append(x[1])
    v += values
    v.append(x[3])

    return v

In [26]:
def get_key(val): 
    """ Find the key of one value in the dicitonary of the entities. """
    for key, value in DICTIONARY.items(): 
       
        if (len(value) == 1 and val == value[0]): 
             key_country = key 

        if(len(value) > 1 and val in value):
            key_country = key
    if key_country not in list(DICTIONARY_NUM.keys()):
        print(val)
    return DICTIONARY_NUM[key_country]

In [27]:
def add_id(interactions_):
    """ Match for each country its id. """
    tup = []
    
    for i in interactions_:

        c_a = i[1]
        c_b = i[2]

        id_cb = 9999
        id_ca = 9999
    
        if(c_a in NAMES):

            id_ca = get_key(c_a)

        if(c_a.upper() in NAMES):

            id_ca = get_key(c_a.upper())

        if(c_b in NAMES):

            id_cb = get_key(c_b.upper())

        if(c_b.upper() in NAMES):

            id_cb = get_key(c_b.upper())

        if(id_ca == 9999 ):
            print('not added c_a :', repr(c_a))
        
        if(id_cb == 9999 ):
            print('not added c_b :', repr(c_b))
        i.append(id_ca)
        i.append(id_cb)
        

    return interactions_

In [28]:
def create_df(cooperations, issue_number):
    """ Combine all the information to create the dataframe. """
    cooperations = [i for i in cooperations if len(i) == 12]
    if(len(cooperations)>0):
        ca = [x[1].upper() for x in cooperations]
        cb = [x[2].upper() for x in cooperations]
        id_ca = [x[10] for x in cooperations]
        id_cb = [x[11] for x in cooperations]
        behalf = [x[3] for x in cooperations]
        support = [x[4] for x in cooperations]
        agreement =[x[5] for x in cooperations]
        opposition =[x[6] for x in cooperations]
        criticism =[x[7] for x in cooperations]
        cooperation =[x[8] for x in cooperations]
        sentences = [x[9] for x in cooperations]
        dict_issue = {'type': 'generated','issue': cooperations[0][0],'id_ca':id_ca,'id_cb':id_cb,'Country A':ca, 'Country B': cb, 'behalf':behalf,'support':support,'agreement':agreement,'opposition':opposition,'criticism':criticism,'cooperation':cooperation,'sentences': sentences}
    else:
        dict_issue = {'type': 'generated','issue': issue_number,'id_ca':1111,'id_cb':1111, 'behalf':[],'support':[],'agreement':[],'opposition':[],'criticism':[],'cooperation':[],'sentences':[]}

    df = pd.DataFrame(dict_issue)
    return df

In [29]:
def interactions(issue_number):

    """ Function that combine all the filters to find all the interaction for one specific issue. """
    
    sentences = extract_s2e_issue_number(issue_number)
    #sentences = ['']


    # Filters to remove to pre-procss sentences to be able to detect if there is an interaction or not

    sentences = remove_on_99s_from_programme(sentences)
    sentences = remove_double_s(sentences)
    sentences = find_inversions(sentences)


    interactions_ = []
    sentences_int = []

    for sentence_original in set(sentences):
        
        interactions = []
        # Use NLTK to do pos tagging the sentence and filter the tags to keep only the one wanted
        pos_tagged = find_pos_tagged_s2e([sentence_original])
        
        tags_filtered1, sentences_updated = remove_representent(pos_tagged[0])
        sentence_original = sentences_updated
        
        tags_filtered = find_patterns(tags_filtered1, LIST_TAGS)

        # Create the group of entities that interacts in the sentence. Use filters to keep the correct one
        group_cooperation, opp = detect_groups_cooperations(tags_filtered)
        
        if(len(opp)!=0):
            opposition_index = [sentence_original.index(opp[0])]
        else:
            opposition_index = []
        group_cooperation, sentence  = find_coalitions(group_cooperation, sentence_original, opposition_index)

        group_cooperation, sentence = remove_from_concern_of(group_cooperation ,sentence)
        
        
        group_cooperation, sentence = check_doubles(sentence, group_cooperation)
        
        # Check if interactions after all the filters
        interaction_bool = check_interaction(group_cooperation)

        if(interaction_bool):
            
            # Try to find an opposition link 
            token = tools.tokenize_sentence(sentence, True)
            opposition = ['opposed by','while','opposed by the']
            opposition = set(token).intersection(set(opposition))

            # Try to find an criticism link
            criticism = ['criticized']
            criticism= set(token).intersection(set(criticism))
            

            # Check if there is an opposition in the sentence
            if(opposition != set() and len(group_cooperation) ==2):
                interactions += opposed_by(sentence, group_cooperation, opposition)
            else:
                # Check if there is a criticism in the sentence
                if(criticism != set() and check_link(criticism, sentence, group_cooperation[0])):
                    interactions += criticized_by(sentence, group_cooperation, criticism)         
                else :
                    
                    if(check_cooperation(sentence_original, group_cooperation[0])):
                        # Find all the cooperations
                        
                        interactions += coop(sentence, group_cooperation[0])

 
            
            # Add the interactions in the list of all the interactions and the sentence realated to it if there is at least one interaction  
            if(len(interactions)!= 0):
                s = str(sentence_original)
                sentences_int.append(sentence_original)     
                interactions = [(x[0],x[1],x[2],s) for x in interactions]
                interactions_ += interactions

    # Create a dataframe with all the information of the interactions   
    interactions_ = [ write_line(issue_number, x) for x in interactions_]
    interactions_ = add_id(interactions_)
    df = create_df(interactions_,issue_number)
    
    return df, sentences_int

In [30]:
def extract_interaction_range(list_issues):
    """ Function that find all interaction for a range of issues. """
    issues = tools.extract_from_csv_list_issues('Files/list_meetings.csv')
    #issues = list_issues
    df = pd.DataFrame()
    sentences = []
    for issue_number in sorted(issues):
        print(issue_number)
        df_temp,s  = interactions(issue_number)
        frames = [df, df_temp]
        df = pd.concat(frames)
        sentences += s
    
    return df, sentences

In [31]:
df, s = extract_interaction_range(range(1,778))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
19
20
21
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_b : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_a : 'CAPE VERDE'
not added c_b : 'C

In [32]:
df.to_csv('Text/interactions.csv')

In [33]:
df

Unnamed: 0,type,issue,id_ca,id_cb,Country A,Country B,behalf,support,agreement,opposition,criticism,cooperation,sentences
0,generated,1,148,14,NEW ZEALAND,AUSTRIA,0.0,1.0,0.0,0.0,0.0,1.0,Austria supported by New Zealand stated that c...
0,generated,2,75,165,EU,POLAND,0.0,0.0,1.0,0.0,0.0,1.0,The EU the US Australia Canada Japan and Polan...
1,generated,2,13,213,AUSTRALIA,US,0.0,0.0,1.0,0.0,0.0,1.0,The EU the US Australia Canada Japan and Polan...
2,generated,2,13,104,AUSTRALIA,JAPAN,0.0,0.0,1.0,0.0,0.0,1.0,The EU the US Australia Canada Japan and Polan...
3,generated,2,165,104,POLAND,JAPAN,0.0,0.0,1.0,0.0,0.0,1.0,The EU the US Australia Canada Japan and Polan...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
719,generated,776,172,29,SAUDI ARABIA,BRAZIL,0.0,0.0,1.0,0.0,0.0,1.0,Brazil with Saudi Arabia and Ecuador stressed ...
720,generated,776,29,172,BRAZIL,SAUDI ARABIA,0.0,0.0,1.0,0.0,0.0,1.0,Brazil with Saudi Arabia and Ecuador stressed ...
721,generated,776,66,172,ECUADOR,SAUDI ARABIA,0.0,0.0,1.0,0.0,0.0,1.0,Brazil with Saudi Arabia and Ecuador stressed ...
722,generated,776,153,37,NORWAY,CANADA,0.0,0.0,1.0,0.0,0.0,1.0,Canada and Norway underscored the need to avoi...
