# Create a model that will get the names and political parties using Machine learning

## Library and data imports

In [1]:
import spacy
import json
import random
from spacy.training.example import Example

In [2]:
def load_data(file):
    with open(file, "r", encoding="utf-8") as f:
        data = json.load(f)
        
    return data

In [3]:
def save_data(file, data):
    with open(file, "w", encoding="utf-8") as f:
        json.dump(data, f, indent = 4)

In [7]:
TRAIN_DATA = load_data("data/political_training_data.json")

## Model Training

In [5]:
def train_spacy(data, iterations):
    TRAIN_DATA = data
    nlp = spacy.blank("en")
    
    if "ner" not in nlp.pipe_names:
        nlp.add_pipe("ner", last=True)
        ner =nlp.get_pipe("ner")
        
    # add labels into the model
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])
            
    #deactivate all other pipes
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
    with nlp.disable_pipes(*other_pipes):
        optimizer = nlp.begin_training()
        
        #iterate training process
        for itn in range(iterations):
            print("Starting iteration " +str(itn))
            random.shuffle(TRAIN_DATA)
            losses = {}
            
            for text, annotations in TRAIN_DATA:
                doc=nlp.make_doc(text)
                example = Example.from_dict(doc, annotations)
                nlp.update(
                    [example],
                    drop=0.2,
                    sgd =optimizer,
                    losses=losses
                )
                print(losses)
            
    return (nlp)

In [9]:
import time
start_time = time.time()
nlp = train_spacy(TRAIN_DATA, 30)
nlp.to_disk("political_ner_model")
print("--- %s seconds ---" % (time.time() - start_time))

Starting iteration 0
{'ner': 855.1705154119645}
Starting iteration 1
{'ner': 285.5568032572871}
Starting iteration 2
{'ner': 195.22609341340802}
Starting iteration 3
{'ner': 153.4458233149475}
Starting iteration 4
{'ner': 117.24101123519048}
Starting iteration 5
{'ner': 95.64985042916463}
Starting iteration 6
{'ner': 73.59693354773833}
Starting iteration 7
{'ner': 73.87732284541163}
Starting iteration 8
{'ner': 91.8834652915355}
Starting iteration 9
{'ner': 58.89872064932659}
Starting iteration 10
{'ner': 53.42263842428859}
Starting iteration 11
{'ner': 36.071037430657185}
Starting iteration 12
{'ner': 69.4704300019846}
Starting iteration 13
{'ner': 62.06814824459397}
Starting iteration 14
{'ner': 31.50419510699183}
Starting iteration 15
{'ner': 47.610254634183214}
Starting iteration 16
{'ner': 46.03422127012681}
Starting iteration 17
{'ner': 31.637988605447045}
Starting iteration 18
{'ner': 30.36392552286308}
Starting iteration 19
{'ner': 55.88537462413585}
Starting iteration 20
{'ner

In [12]:
text = "I will not vote for Raila"

In [13]:
nlp = spacy.load("political_ner_model")
doc = nlp(text)

for ent in doc.ents:
    print(ent.text, ent.label_)

Raila POLITICIAN
