In [1]:
import os
os.chdir('..')

### 1. Read data

In [31]:
os.listdir('data/input')

['cybersecurity-reports', 'muc4']

In [67]:
import json
from collections import OrderedDict

path = 'data/input/cybersecurity-reports/train_full.json'
with open(path, "r") as doc_keys_file:
    input_roles = json.load(doc_keys_file, object_pairs_hook=OrderedDict)
    
input_roles['fireeye-1']#['doc']

OrderedDict([('doc',
              'Chinese cyber espionage operators modified the software packages of a legitimate vendor, NetSarang Computer, allowing access to a broad range of industries and institutions that include financial services, transportation, telecommunications, energy, media, academic, retail, and gaming. The malicious actors embedded a backdoor named SHADOWPAD, which can access network data and download additional malware. Based on shared communications infrastructure and similar targeting and tactics, FireEye Threat Intelligence assesses this activity is related to a Chinese espionage team that previously targeted a gaming company in 2015.'),
             ('roles',
              OrderedDict([('adversary',
                            [['Chinese cyber espionage operators',
                              'malicious actors',
                              'Chinese espionage team']]),
                           ('capability', [['backdoor', 'SHADOWPAD']]),
                   

In [56]:
input_roles.copy()

OrderedDict([('fireeye-1',
              OrderedDict([('doc',
                            'Chinese cyber espionage operators modified the software packages of a legitimate vendor, NetSarang Computer, allowing access to a broad range of industries and institutions that include financial services, transportation, telecommunications, energy, media, academic, retail, and gaming. The malicious actors embedded a backdoor named SHADOWPAD, which can access network data and download additional malware. Based on shared communications infrastructure and similar targeting and tactics, FireEye Threat Intelligence assesses this activity is related to a Chinese espionage team that previously targeted a gaming company in 2015.'),
                           ('roles',
                            OrderedDict([('adversary',
                                          [['Chinese cyber espionage operators',
                                            'malicious actors',
                                       

### 2. Load tokenizer and preprocessor

In [44]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', use_fast=False)

# this separates the paragraphs into sentences
import spacy
nlp = spacy.load("en_core_web_sm")

In [8]:
import random
from copy import deepcopy
import spacy
from transformers import AutoTokenizer

class Doc_Seq:
    def __init__(self, key, seq_tag_pair, doc):
        self.key = key
        self.seq_tag_pair = seq_tag_pair
        self.doc = doc
    
    def __repr__(self):
        return f'Doc_Seq(key={self.key}, doc={self.doc.text})'

def create_sent_tagging(input_roles, test_set=False, bert_model = 'bert-base-uncased', keys_dict_none_empty=None):
    """
    :param input_roles: ordered dict containing key, doc and roles
    :param test_set: boolean to indicate whether we are processing train or test set
    :param keys_dict_none_empty: list of keys to ignore
    """

    tokenizer = AutoTokenizer.from_pretrained(bert_model, use_fast=False)
    nlp = spacy.load("en_core_web_sm")
    
    doc_keys = deepcopy(input_roles)
    
    # turn doc_keys (entity) into doc_keys (mentions)
    for docid in doc_keys:
        for role in doc_keys[docid]["roles"]:
            mentions = list()
            for entity in doc_keys[docid]["roles"][role]:
                for mention in entity:
                    if mention not in mentions:
                        mentions.append(mention)
            doc_keys[docid]["roles"][role] = mentions

    seqs_all_o = []
    seqs_not_all_o = []
    all_examples = []
    para_lens = []
    # summ = 0
    for key in doc_keys:
        
        # keys to ignore
        if keys_dict_none_empty is not None:
            if key not in keys_dict_none_empty: continue

        # get and sort doc-level spans to extract from doc key
        doc = doc_keys[key]["doc"]
        tags_values = doc_keys[key]["roles"]
        for tag in tags_values:
            values = tags_values[tag]

            values.sort(key=lambda x: len(x) *(-1)) # longest first
            # values.sort(key=lambda x: len(x))     # shortest first

        # sample tag_values
        # OrderedDict([('perp_individual_id', ['TERRORISTS', 'TERRORIST']), 
        # ('perp_organization_id', ['FARABUNDO MARTI NATIONAL LIBERATION FRONT', 'MARTI NATIONAL', 'FMLN']),
        # ('phys_tgt_id', ['LAS CANAS BRIDGE']), ('hum_tgt_name', []), 
        # ('incident_instrument_id', ['MORTAR', 'RIFLE'])])

        # get all the sentences from this doc key
        # JT: Split the documents by paragraphs
        paragraphs = doc.split("\n\n")
        doc_sents = []
        for para in paragraphs:
            para2 = " ".join(para.split("\n"))#.lower()
            para2 = nlp(para2)
            cnt = 0
            for sent in para2.sents:
                cnt += 1
                doc_sents.append(sent.text)
            para_lens.append(cnt)


        # get seqs and annotate
        # JT: Split long paragraphs by sentences
        num_sent_to_include = 3
        for idx in range(len(doc_sents)):
            if test_set:
                start = idx * num_sent_to_include
            else:
                start = idx
            end = start + num_sent_to_include
            if start >= len(doc_sents): break

            if end > len(doc_sents): end = len(doc_sents)

            # JT: sequence is a long string of the paragraph
            sequence = " ".join(doc_sents[start: end])
            
            all_o = True
            seq_tag_pair = []
            spacy_doc = nlp(sequence.lower())
            for tok in spacy_doc:
                # JT: my own modification to keep track of spacy tokens
                subword_tokens = tokenizer.tokenize(tok.text)
                for subword_token in subword_tokens:
                    seq_tag_pair.append([subword_token, 'O', tok.i])
            # seq_tokenized = tokenizer.tokenize(sequence)
            # seq_tag_pair = [[token, 'O'] for token in seq_tokenized]
            
            # JT: this iterates through the roles
            for tag_anno in tags_values:
                values = tags_values[tag_anno]
                for value in values:
                    value_tokenized = tokenizer.tokenize(value)
                    for idx, token_tag in enumerate(seq_tag_pair):
                        token, tag, word_idx = token_tag[0], token_tag[1], token_tag[2]
                        if token == value_tokenized[0]:
                            start, end = idx, idx + len(value_tokenized)
                            if end <= len(seq_tag_pair):
                                candidate = [x[0] for x in seq_tag_pair[start: end]]
                                tags = [x[1] for x in seq_tag_pair[start: end]] 

                                already_annoted = False
                                for tag in tags: 
                                    if tag != 'O': already_annoted = True
                                if already_annoted: continue

                                if " ".join(candidate) == " ".join(value_tokenized):
                                    all_o = False
                                    seq_tag_pair[start][1] = "B-" + tag_anno
                                    for i in range(start + 1, end):
                                        seq_tag_pair[i][1] = "I-" + tag_anno

            example = {'doc_id':key, 'seq': seq_tag_pair, 'text':sequence}
            # example = doc(key, seq_tag_pair, spacy_doc)
            all_examples.append(example)
            if not all_o:
                seqs_not_all_o.append(example)
            else:
                seqs_all_o.append(example)
            print(seq_tag_pair)

    # print(seqs_all_o, len(seqs_not_all_o))
    seqs_all_o_sample = random.sample(seqs_all_o, min(len(seqs_not_all_o), len(seqs_all_o)) )
    all_examples_sample_neg = seqs_not_all_o + seqs_all_o_sample
    print("Average paragraph sent # :", sum(para_lens)/len(para_lens))

    return all_examples_sample_neg, all_examples
    
all_examples_sample_neg, all_examples = create_sent_tagging(input_roles)

[['chinese', 'B-adversary', 0], ['cyber', 'I-adversary', 1], ['espionage', 'I-adversary', 2], ['operators', 'I-adversary', 3], ['modified', 'O', 4], ['the', 'O', 5], ['software', 'O', 6], ['packages', 'O', 7], ['of', 'O', 8], ['a', 'O', 9], ['legitimate', 'B-victim', 10], ['vendor', 'I-victim', 11], [',', 'I-victim', 12], ['nets', 'B-victim', 13], ['##aran', 'I-victim', 13], ['##g', 'I-victim', 13], ['computer', 'I-victim', 14], [',', 'O', 15], ['allowing', 'O', 16], ['access', 'O', 17], ['to', 'O', 18], ['a', 'O', 19], ['broad', 'O', 20], ['range', 'O', 21], ['of', 'O', 22], ['industries', 'O', 23], ['and', 'O', 24], ['institutions', 'O', 25], ['that', 'O', 26], ['include', 'O', 27], ['financial', 'B-victim', 28], ['services', 'I-victim', 29], [',', 'O', 30], ['transportation', 'O', 31], [',', 'O', 32], ['telecommunications', 'O', 33], [',', 'O', 34], ['energy', 'O', 35], [',', 'O', 36], ['media', 'O', 37], [',', 'O', 38], ['academic', 'O', 39], [',', 'O', 40], ['retail', 'O', 41], ['

[['the', 'O', 0], ['manner', 'O', 1], ['in', 'O', 2], ['which', 'O', 3], ['this', 'B-capability', 4], ['system', 'I-capability', 5], ['operates', 'O', 6], ['suggests', 'O', 7], ['that', 'O', 8], ['each', 'O', 9], ['gt', 'O', 10], ['##ag', 'O', 10], ['value', 'O', 11], ['is', 'O', 12], ['associated', 'O', 13], ['with', 'O', 14], ['a', 'O', 15], ['different', 'O', 16], ['trick', 'B-adversary', 17], ['##bot', 'I-adversary', 17], ['customer', 'I-adversary', 18], ['.', 'O', 19], ['fire', 'O', 20], ['##eye', 'O', 20], ['threat', 'O', 21], ['intelligence', 'O', 22], ['has', 'O', 23], ['identified', 'O', 24], ['and', 'O', 25], ['clustered', 'O', 26], ['a', 'O', 27], ['number', 'O', 28], ['of', 'O', 29], ['trick', 'B-capability', 30], ['##bot', 'I-capability', 30], ['gt', 'O', 31], ['##ag', 'O', 31], ['values', 'O', 32], ['found', 'O', 33], ['in', 'O', 34], ['the', 'O', 35], ['configuration', 'O', 36], ['of', 'O', 37], ['this', 'B-capability', 38], ['system', 'I-capability', 39], ['and', 'O', 4

[['it', 'O', 0], ['consists', 'O', 1], ['of', 'O', 2], ['high', 'O', 3], ['-', 'O', 4], ['level', 'O', 5], ['analysis', 'O', 6], ['and', 'O', 7], ['is', 'O', 8], ['not', 'O', 9], ['meant', 'O', 10], ['to', 'O', 11], ['provide', 'O', 12], ['in', 'O', 13], ['-', 'O', 14], ['depth', 'O', 15], ['insights', 'O', 16], ['into', 'O', 17], ['the', 'O', 18], ['nu', 'O', 19], ['##ances', 'O', 19], ['of', 'O', 20], ['each', 'O', 21], ['incident', 'O', 22], ['.', 'O', 23], ['our', 'O', 24], ['methodology', 'O', 25], ['evaluate', 'O', 26], ['##s', 'O', 26], ['four', 'O', 27], ['categories', 'O', 28], [',', 'O', 29], ['which', 'O', 30], ['are', 'O', 31], ['target', 'O', 32], [',', 'O', 33], ['so', 'O', 34], ['##phi', 'O', 34], ['##stic', 'O', 34], ['##ation', 'O', 34], [',', 'O', 35], ['impact', 'O', 36], [',', 'O', 37], ['and', 'O', 38], ['affected', 'O', 39], ['equipment', 'O', 40], ['architecture', 'O', 41], ['based', 'O', 42], ['on', 'O', 43], ['the', 'O', 44], ['purdue', 'O', 45], ['model', 'O',

[['fire', 'O', 0], ['##eye', 'O', 0], ['threat', 'O', 1], ['intelligence', 'O', 2], ['also', 'O', 3], ['observed', 'O', 4], ['changes', 'O', 5], ['in', 'O', 6], ['the', 'O', 7], ['way', 'O', 8], ['em', 'B-capability', 9], ['##ote', 'I-capability', 9], ['##t', 'I-capability', 9], ['communicate', 'O', 10], ['##s', 'O', 10], ['through', 'O', 11], ['http', 'B-infrastructure', 12], ['post', 'I-infrastructure', 13], ['requests', 'I-infrastructure', 14], ['with', 'O', 15], ['its', 'O', 16], ['command', 'B-capability', 17], ['and', 'I-capability', 18], ['control', 'I-capability', 19], ['(', 'I-capability', 20], ['c', 'I-capability', 21], ['&', 'I-capability', 21], ['amp', 'I-capability', 21], [';', 'I-capability', 21], ['c', 'I-capability', 21], [')', 'I-capability', 22], ['servers', 'I-capability', 23], ['.', 'O', 24], ['these', 'O', 25], ['changes', 'O', 26], ['were', 'O', 27], ['likely', 'O', 28], ['made', 'O', 29], ['to', 'O', 30], ['evade', 'O', 31], ['host', 'O', 32], ['and', 'O', 33], [

[['the', 'O', 0], ['ina', 'O', 1], ['##ct', 'O', 1], ['##ivity', 'O', 1], ['of', 'O', 2], ['em', 'B-adversary', 3], ['##ote', 'I-adversary', 3], ['##t', 'I-adversary', 3], ['distribution', 'I-adversary', 4], ['operations', 'I-adversary', 5], ['is', 'O', 6], ['supported', 'O', 7], ['by', 'O', 8], ['evidence', 'O', 9], ['observed', 'O', 10], ['across', 'O', 11], ['the', 'O', 12], ['fire', 'O', 13], ['##eye', 'O', 13], ['sensor', 'O', 14], ['network', 'O', 15], ['and', 'O', 16], ['active', 'O', 17], ['monitoring', 'O', 18], ['of', 'O', 19], ['the', 'O', 20], ['em', 'B-capability', 21], ['##ote', 'I-capability', 21], ['##t', 'I-capability', 21], ['bot', 'I-capability', 22], ['##net', 'I-capability', 22], ['.', 'O', 23], ['the', 'O', 24], ['cause', 'O', 25], ['for', 'O', 26], ['this', 'O', 27], ['current', 'O', 28], ['ce', 'O', 29], ['##ssa', 'O', 29], ['##tion', 'O', 29], ['of', 'O', 30], ['activity', 'O', 31], ['is', 'O', 32], ['unclear', 'O', 33], [',', 'O', 34], ['and', 'O', 35], ['it',

[['the', 'O', 0], ['publication', 'O', 1], ['includes', 'O', 2], ['an', 'O', 3], ['explicit', 'O', 4], ['requirement', 'O', 5], ['to', 'O', 6], ['either', 'O', 7], ['segment', 'O', 8], ['operational', 'O', 9], ['technology', 'O', 10], ['(', 'O', 11], ['ot', 'O', 12], [')', 'O', 13], ['or', 'O', 14], ['ensure', 'O', 15], ['the', 'O', 16], ['devices', 'O', 17], ['are', 'O', 18], ['compliant', 'O', 19], ['with', 'O', 20], ['imposed', 'O', 21], ['cyber', 'O', 22], ['security', 'O', 23], ['requirements', 'O', 24], ['.', 'O', 25], ['the', 'O', 26], ['inclusion', 'O', 27], ['of', 'O', 28], ['ot', 'O', 29], ['-', 'O', 30], ['specific', 'O', 31], ['requirements', 'O', 32], ['in', 'O', 33], ['an', 'O', 34], ['information', 'O', 35], ['technology', 'O', 36], ['(', 'O', 37], ['it', 'O', 38], [')', 'O', 38], ['-', 'O', 38], ['dominate', 'O', 38], ['cyber', 'O', 39], ['security', 'O', 40], ['standard', 'O', 41], ['demonstrates', 'O', 42], ['the', 'O', 43], ['per', 'O', 44], ['##tine', 'O', 44], ['##

[['additional', 'O', 0], ['samples', 'O', 1], ['and', 'O', 2], ['related', 'O', 3], ['infrastructure', 'O', 4], ['were', 'O', 5], ['subsequently', 'O', 6], ['uncovered', 'O', 7], ['.', 'O', 8], ['this', 'O', 9], ['newest', 'O', 10], ['version', 'O', 11], ['of', 'O', 12], ['armed', 'B-capability', 13], ['##cl', 'I-capability', 13], ['##oud', 'I-capability', 13], ['contains', 'O', 14], ['anti', 'B-capability', 15], ['-', 'I-capability', 16], ['forensic', 'I-capability', 17], ['features', 'I-capability', 18], ['.', 'O', 19]]
[['this', 'O', 0], ['newest', 'O', 1], ['version', 'O', 2], ['of', 'O', 3], ['armed', 'B-capability', 4], ['##cl', 'I-capability', 4], ['##oud', 'I-capability', 4], ['contains', 'O', 5], ['anti', 'B-capability', 6], ['-', 'I-capability', 7], ['forensic', 'I-capability', 8], ['features', 'I-capability', 9], ['.', 'O', 10]]
[['after', 'O', 0], ['a', 'O', 1], ['brief', 'O', 2], ['ce', 'O', 3], ['##ssa', 'O', 3], ['##tion', 'O', 3], ['of', 'O', 4], ['q', 'B-capability', 5

[['observed', 'O', 0], ['recipients', 'O', 1], ['included', 'O', 2], ['individuals', 'B-victim', 3], ['at', 'I-victim', 4], ['financial', 'I-victim', 5], ['organizations', 'I-victim', 6], ['based', 'I-victim', 7], ['in', 'I-victim', 8], ['russia', 'I-victim', 9], ['and', 'I-victim', 10], ['slovakia', 'I-victim', 11], ['.', 'O', 12], ['based', 'O', 13], ['on', 'O', 14], ['the', 'O', 15], ['use', 'O', 16], ['of', 'O', 17], ['overlapping', 'O', 18], ['tactics', 'O', 19], [',', 'O', 20], ['techniques', 'O', 21], [',', 'O', 22], ['and', 'O', 23], ['procedures', 'O', 24], ['(', 'O', 25], ['tt', 'O', 26], ['##ps', 'O', 26], [')', 'O', 27], [',', 'O', 28], ['we', 'O', 29], ['assess', 'O', 30], ['with', 'O', 31], ['moderate', 'O', 32], ['confidence', 'O', 33], ['that', 'O', 34], ['the', 'O', 35], ['recent', 'O', 36], ['kay', 'B-capability', 37], ['##sl', 'I-capability', 37], ['##ice', 'I-capability', 37], ['/', 'I-capability', 38], ['cool', 'I-capability', 39], ['##pants', 'I-capability', 39], 

[['alternative', 'O', 0], ['mit', 'O', 1], ['##iga', 'O', 1], ['##tions', 'O', 1], [',', 'O', 2], ['such', 'O', 3], ['as', 'O', 4], ['fire', 'O', 5], ['##wall', 'O', 5], ['rules', 'O', 6], [',', 'O', 7], ['intrusion', 'O', 8], ['detection', 'O', 9], ['signatures', 'O', 10], [',', 'O', 11], ['or', 'O', 12], ['os', 'O', 13], ['-', 'O', 14], ['version', 'O', 15], ['upgrades', 'O', 16], [',', 'O', 17], ['can', 'O', 18], ['be', 'O', 19], ['deployed', 'O', 20], ['as', 'O', 21], ['counter', 'O', 22], ['##me', 'O', 22], ['##as', 'O', 22], ['##ures', 'O', 22], ['in', 'O', 23], ['the', 'O', 24], ['absence', 'O', 25], ['of', 'O', 26], ['patches', 'O', 27], ['.', 'O', 28]]
[['on', 'O', 0], ['july', 'O', 1], ['29', 'O', 2], [',', 'O', 3], ['2019', 'O', 4], [',', 'O', 5], ['capital', 'B-victim', 6], ['one', 'I-victim', 7], ['announced', 'O', 8], ['a', 'O', 9], ['data', 'O', 10], ['security', 'O', 11], ['incident', 'O', 12], ['in', 'O', 13], ['which', 'O', 14], ['an', 'O', 15], ['unauthorized', 'B-ad

[['we', 'O', 0], ['sur', 'O', 1], ['##mise', 'O', 1], ['##d', 'O', 1], ['that', 'O', 2], ['the', 'O', 3], ['network', 'O', 4], ['was', 'O', 5], ['being', 'O', 6], ['operated', 'O', 7], ['by', 'O', 8], ['a', 'O', 9], ['marketing', 'O', 10], ['firm', 'O', 11], ['in', 'O', 12], ['india', 'O', 13], ['on', 'O', 14], ['behalf', 'O', 15], ['of', 'O', 16], ['actors', 'O', 17], ['seeking', 'O', 18], ['to', 'O', 19], ['support', 'O', 20], ['uae', 'O', 21], ['political', 'O', 22], ['interests', 'O', 23], ['.', 'O', 24], ['we', 'O', 25], ['have', 'O', 26], ['continued', 'O', 27], ['to', 'O', 28], ['identify', 'O', 29], ['additional', 'O', 30], ['activity', 'O', 31], ['that', 'O', 32], ['we', 'O', 33], ['believe', 'O', 34], ['is', 'O', 35], ['being', 'O', 36], ['conducted', 'O', 37], ['by', 'O', 38], ['this', 'O', 39], ['network', 'O', 40], [',', 'O', 41], ['including', 'O', 42], ['the', 'O', 43], ['lever', 'O', 44], ['##aging', 'O', 44], ['of', 'O', 45], ['persona', 'O', 46], ['##s', 'O', 46], ['o

[['a', 'O', 0], ['newly', 'B-capability', 1], ['identified', 'I-capability', 2], ['download', 'I-capability', 3], ['##er', 'I-capability', 3], [',', 'O', 4], ['referred', 'O', 5], ['to', 'O', 6], ['by', 'O', 7], ['fire', 'O', 8], ['##eye', 'O', 8], ['threat', 'O', 9], ['intelligence', 'O', 10], ['as', 'O', 11], ['bull', 'B-capability', 12], ['##z', 'I-capability', 12], ['##link', 'I-capability', 12], [',', 'O', 13], ['demonstrates', 'O', 14], ['a', 'O', 15], ['number', 'O', 16], ['of', 'O', 17], ['overlap', 'O', 18], ['##s', 'O', 18], ['with', 'O', 19], ['mal', 'O', 20], ['##ware', 'O', 20], ['developed', 'O', 21], ['and', 'O', 22], ['offered', 'O', 23], ['by', 'O', 24], ['bad', 'O', 25], ['##bu', 'O', 25], ['##ll', 'O', 25], ['##z', 'O', 25], ['##ven', 'O', 25], ['##om', 'O', 25], ['.', 'O', 26], ['during', 'O', 27], ['july', 'O', 28], ['2019', 'O', 29], [',', 'O', 30], ['two', 'O', 31], ['campaigns', 'O', 32], ['were', 'O', 33], ['identified', 'O', 34], ['using', 'O', 35], ['this', '

[['however', 'O', 0], [',', 'O', 1], ['the', 'O', 2], ['actors', 'O', 3], ['themselves', 'O', 4], ['may', 'O', 5], ['have', 'O', 6], ['been', 'O', 7], ['reassigned', 'O', 8], ['or', 'O', 9], ['relieved', 'O', 10], ['from', 'O', 11], ['their', 'O', 12], ['contracts', 'O', 13], ['.', 'O', 14], ['operators', 'O', 15], ['previously', 'O', 16], ['associated', 'O', 17], ['with', 'O', 18], ['apt', 'B-adversary', 19], ['##10', 'I-adversary', 19], ['may', 'I-adversary', 20], ['have', 'O', 21], ['returned', 'O', 22], ['as', 'O', 23], ['part', 'O', 24], ['of', 'O', 25], ['different', 'O', 26], ['apt', 'O', 27], ['missions', 'O', 28], ['working', 'O', 29], ['on', 'O', 30], ['behalf', 'O', 31], ['of', 'O', 32], ['the', 'O', 33], ['people', 'B-adversary', 34], ["'", 'I-adversary', 35], ['s', 'I-adversary', 35], ['republic', 'I-adversary', 36], ['of', 'I-adversary', 37], ['china', 'I-adversary', 38], ['ministry', 'I-adversary', 39], ['of', 'I-adversary', 40], ['state', 'I-adversary', 41], ['security'

[['based', 'O', 0], ['on', 'O', 1], ['overlap', 'O', 2], ['##s', 'O', 2], ['in', 'O', 3], ['the', 'O', 4], ['attacker', 'O', 5], ['infrastructure', 'O', 6], [',', 'O', 7], ['we', 'O', 8], ['assess', 'O', 9], ['with', 'O', 10], ['high', 'O', 11], ['confidence', 'O', 12], ['that', 'O', 13], ['this', 'O', 14], ['recent', 'O', 15], ['campaign', 'O', 16], ['was', 'O', 17], ['conducted', 'O', 18], ['by', 'O', 19], ['a', 'O', 20], ['mexican', 'B-adversary', 21], ['threat', 'I-adversary', 22], ['actor', 'I-adversary', 23], ['who', 'O', 24], ['has', 'O', 25], ['been', 'O', 26], ['active', 'O', 27], ['for', 'O', 28], ['more', 'O', 29], ['than', 'O', 30], ['four', 'O', 31], ['years', 'O', 32], ['.', 'O', 33], ['there', 'O', 34], ['are', 'O', 35], ['multiple', 'O', 36], ['indications', 'O', 37], ['that', 'O', 38], ['brazilian', 'B-adversary', 39], ['mal', 'I-adversary', 40], ['##ware', 'I-adversary', 40], ['developer', 'I-adversary', 41], ['(', 'I-adversary', 41], ['s', 'I-adversary', 41], [')', '

[['the', 'O', 0], ['emails', 'B-infrastructure', 1], ['leverage', 'O', 2], ['##d', 'O', 2], ['"', 'O', 3], ['youth', 'O', 4], ['in', 'O', 5], ['afghanistan', 'O', 6], ['"', 'O', 6], ['-', 'O', 6], ['related', 'O', 6], ['lure', 'O', 7], ['content', 'O', 8], ['and', 'O', 9], ['contained', 'O', 10], ['a', 'O', 11], ['malicious', 'B-capability', 12], ['word', 'I-capability', 13], ['document', 'I-capability', 14], ['with', 'O', 15], ['additional', 'O', 16], ['deco', 'O', 17], ['##y', 'O', 17], ['content', 'O', 18], ['to', 'O', 19], ['deliver', 'O', 20], ['a', 'O', 21], ['variant', 'O', 22], ['of', 'O', 23], ['x', 'B-capability', 24], ['##rat', 'I-capability', 24], ['mal', 'I-capability', 25], ['##ware', 'I-capability', 25], ['to', 'O', 26], ['the', 'O', 27], ['targeted', 'O', 28], ['systems', 'O', 29], ['.', 'O', 30], ['we', 'O', 31], ['currently', 'O', 32], ['lack', 'O', 33], ['sufficient', 'O', 34], ['information', 'O', 35], ['to', 'O', 36], ['attribute', 'O', 37], ['this', 'O', 38], ['ac

[['the', 'O', 0], ['command', 'B-capability', 1], ['and', 'I-capability', 2], ['control', 'I-capability', 3], ['(', 'I-capability', 4], ['c', 'I-capability', 5], ['&', 'I-capability', 5], ['amp', 'I-capability', 5], [';', 'I-capability', 5], ['c', 'I-capability', 5], [')', 'I-capability', 6], ['server', 'I-capability', 7], ['hosts', 'O', 8], ['the', 'O', 9], ['x', 'B-capability', 10], ['##key', 'I-capability', 10], ['##log', 'I-capability', 10], ['mal', 'I-capability', 11], ['##ware', 'I-capability', 11], [',', 'O', 12], ['a', 'B-capability', 13], ['key', 'I-capability', 14], ['##log', 'I-capability', 14], ['##ger', 'I-capability', 14], ['that', 'O', 15], ['is', 'O', 16], ['also', 'O', 17], ['capable', 'O', 18], ['of', 'O', 19], ['taking', 'O', 20], ['screens', 'O', 21], ['##hot', 'O', 21], ['##s', 'O', 21], ['.', 'O', 22], ['among', 'O', 23], ['the', 'O', 24], ['files', 'O', 25], ['obtained', 'O', 26], ['from', 'O', 27], ['the', 'O', 28], ['se', 'O', 29], ['##ver', 'O', 29], ['is', 'O

[['analysis', 'O', 0], ['of', 'O', 1], ['the', 'O', 2], ['mal', 'B-capability', 3], ['##ware', 'I-capability', 3], ['is', 'O', 4], ['ongoing', 'O', 5], ['and', 'O', 6], ['will', 'O', 7], ['be', 'O', 8], ['updated', 'O', 9], ['as', 'O', 10], ['additional', 'O', 11], ['information', 'O', 12], ['is', 'O', 13], ['uncovered', 'O', 14], ['.', 'O', 15], ['this', 'O', 16], ['observed', 'O', 17], ['activity', 'O', 18], ['is', 'O', 19], ['significant', 'O', 20], ['because', 'O', 21], ['it', 'O', 22], ['provides', 'O', 23], ['evidence', 'O', 24], ['of', 'O', 25], ['business', 'O', 26], ['relationships', 'O', 27], ['among', 'O', 28], ['criminal', 'O', 29], ['groups', 'O', 30], [',', 'O', 31], ['which', 'O', 32], ['may', 'O', 33], ['enable', 'O', 34], ['the', 'O', 35], ['groups', 'O', 36], ['to', 'O', 37], ['attain', 'O', 38], ['higher', 'O', 39], ['levels', 'O', 40], ['of', 'O', 41], ['technical', 'O', 42], ['so', 'O', 43], ['##phi', 'O', 43], ['##stic', 'O', 43], ['##ation', 'O', 43], ['or', 'O',

[['based', 'O', 0], ['on', 'O', 1], ['overlap', 'O', 2], ['##s', 'O', 2], ['in', 'O', 3], ['infrastructure', 'O', 4], ['and', 'O', 5], ['observed', 'O', 6], ['tactics', 'O', 7], [',', 'O', 8], ['techniques', 'O', 9], [',', 'O', 10], ['and', 'O', 11], ['procedures', 'O', 12], ['(', 'O', 13], ['tt', 'O', 14], ['##ps', 'O', 14], [')', 'O', 15], [',', 'O', 16], ['we', 'O', 17], ['assess', 'O', 18], ['with', 'O', 19], ['moderate', 'O', 20], ['confidence', 'O', 21], ['that', 'O', 22], ['this', 'O', 23], ['recent', 'O', 24], ['activity', 'O', 25], ['can', 'O', 26], ['be', 'O', 27], ['attributed', 'O', 28], ['to', 'O', 29], ['te', 'B-adversary', 30], ['##mp', 'I-adversary', 30], ['.', 'I-adversary', 30], ['meta', 'I-adversary', 30], ['##st', 'I-adversary', 30], ['##rik', 'I-adversary', 30], ['##e', 'I-adversary', 30], ['.', 'O', 31], ['the', 'O', 32], ['most', 'O', 33], ['recent', 'O', 34], ['campaign', 'O', 35], ['employed', 'O', 36], ['two', 'O', 37], ['new', 'O', 38], ['delivery', 'O', 39],

[['the', 'O', 0], ['campaign', 'O', 1], ['appeared', 'O', 2], ['to', 'O', 3], ['impact', 'O', 4], ['financial', 'B-victim', 5], ['services', 'I-victim', 6], ['organizations', 'I-victim', 7], ['based', 'I-victim', 8], ['in', 'I-victim', 9], ['southeast', 'I-victim', 10], ['asia', 'I-victim', 11], [',', 'O', 12], ['although', 'O', 13], ['targeting', 'O', 14], ['may', 'O', 15], ['have', 'O', 16], ['been', 'O', 17], ['broader', 'O', 18], ['than', 'O', 19], ['what', 'O', 20], ['was', 'O', 21], ['directly', 'O', 22], ['observed', 'O', 23], ['.', 'O', 24], ['we', 'O', 25], ['have', 'O', 26], ['high', 'O', 27], ['confidence', 'O', 28], ['that', 'O', 29], ['this', 'O', 30], ['recent', 'O', 31], ['campaign', 'O', 32], ['can', 'O', 33], ['be', 'O', 34], ['attributed', 'O', 35], ['to', 'O', 36], ['te', 'B-adversary', 37], ['##mp', 'I-adversary', 37], ['.', 'I-adversary', 37], ['splinter', 'I-adversary', 37], ['based', 'O', 38], ['on', 'O', 39], ['overlap', 'O', 40], ['##s', 'O', 40], ['in', 'O', 4

[['while', 'O', 0], ['we', 'O', 1], ['began', 'O', 2], ['to', 'O', 3], ['see', 'O', 4], ['limited', 'O', 5], ['em', 'B-capability', 6], ['##ote', 'I-capability', 6], ['##t', 'I-capability', 6], ['activity', 'O', 7], ['beginning', 'O', 8], ['on', 'O', 9], ['aug', 'O', 10], ['.', 'O', 11], ['21', 'O', 12], [',', 'O', 13], ['2019', 'O', 14], [',', 'O', 15], ['no', 'O', 16], ['spa', 'O', 17], ['##m', 'O', 17], ['campaigns', 'O', 18], ['had', 'O', 19], ['been', 'O', 20], ['identified', 'O', 21], ['.', 'O', 22], ['on', 'O', 23], ['sept', 'O', 24], ['.', 'O', 25], ['16', 'O', 26], [',', 'O', 27], ['2019', 'O', 28], [',', 'O', 29], ['fire', 'O', 30], ['##eye', 'O', 30], ['threat', 'O', 31], ['intelligence', 'O', 32], ['observed', 'O', 33], ['em', 'B-capability', 34], ['##ote', 'I-capability', 34], ['##t', 'I-capability', 34], ['resume', 'O', 35], ['spa', 'O', 36], ['##mming', 'O', 36], ['using', 'O', 37], ['both', 'O', 38], ['german', 'O', 39], ['and', 'O', 40], ['english', 'O', 41], ['lure', 

[['the', 'O', 0], ['b', 'O', 1], ['##l', 'O', 1], ['appears', 'O', 2], ['to', 'O', 3], ['be', 'O', 4], ['a', 'O', 5], ['legitimate', 'O', 6], ['news', 'O', 7], ['site', 'O', 8], ['that', 'O', 9], ['maintains', 'O', 10], ['pages', 'O', 11], ['in', 'O', 12], ['english', 'O', 13], [',', 'O', 14], ['chinese', 'O', 15], [',', 'O', 16], ['spanish', 'O', 17], [',', 'O', 18], ['portuguese', 'O', 19], [',', 'O', 20], ['and', 'O', 21], ['vietnamese', 'O', 22], ['.', 'O', 23], ['we', 'O', 24], ['have', 'O', 25], ['observed', 'O', 26], ['indications', 'O', 27], ['that', 'O', 28], ['the', 'O', 29], ['b', 'O', 30], ['##l', 'O', 30], ['is', 'O', 31], ['linked', 'O', 32], ['to', 'O', 33], ['the', 'O', 34], ['epoch', 'O', 35], ['times', 'O', 36], ['and', 'O', 37], ['to', 'O', 38], ['the', 'O', 39], ['sound', 'O', 40], ['of', 'O', 41], ['hope', 'O', 42], ['radio', 'O', 43], ['network', 'O', 44], [',', 'O', 45], ['media', 'O', 46], ['organizations', 'O', 47], ['that', 'O', 48], ['open', 'O', 49], ['sourc

[['fire', 'O', 0], ['##eye', 'O', 0], ['threat', 'O', 1], ['intelligence', 'O', 2], ['&', 'O', 3], ['n', 'O', 4], ['##bs', 'O', 4], ['##p', 'O', 4], [';', 'O', 4], ['identified', 'O', 4], ['multiple', 'O', 5], ['modified', 'B-capability', 6], ['tri', 'I-capability', 7], ['##ton', 'I-capability', 7], ['framework', 'I-capability', 8], ['ex', 'I-capability', 9], ['##ec', 'I-capability', 9], ['##utable', 'I-capability', 9], ['##s', 'I-capability', 9], ['uploaded', 'I-capability', 10], ['to', 'O', 11], ['a', 'O', 12], ['mal', 'O', 13], ['##ware', 'O', 13], ['analysis', 'O', 14], ['sand', 'O', 15], ['##box', 'O', 15], ['in', 'O', 16], ['september', 'O', 17], ['2019', 'O', 18], ['.', 'O', 19], ['while', 'O', 20], ['the', 'O', 21], ['modified', 'O', 22], ['ex', 'O', 23], ['##ec', 'O', 23], ['##utable', 'O', 23], ['##s', 'O', 23], ['were', 'O', 24], ['apparently', 'O', 25], ['uploaded', 'O', 26], ['by', 'O', 27], ['a', 'O', 28], ['researcher', 'O', 29], [',', 'O', 30], ['their', 'O', 31], ['rel

[['fire', 'O', 0], ['##eye', 'O', 0], ['threat', 'O', 1], ['intelligence', 'O', 2], ['discovered', 'O', 3], ['infrastructure', 'O', 4], ['leverage', 'O', 5], ['##d', 'O', 5], ['by', 'O', 6], ['a', 'O', 7], ['suspected', 'B-adversary', 8], ['iran', 'I-adversary', 9], ['-', 'I-adversary', 10], ['nexus', 'I-adversary', 11], ['actor', 'I-adversary', 12], ['to', 'O', 13], ['scan', 'O', 14], ['or', 'O', 15], ['compromise', 'O', 16], ['targets', 'O', 17], ['in', 'O', 18], ['the', 'O', 19], ['u', 'O', 20], ['.', 'O', 20], ['s', 'O', 20], ['.', 'O', 21], [',', 'O', 22], ['israel', 'O', 23], [',', 'O', 24], ['saudi', 'O', 25], ['arabia', 'O', 26], [',', 'O', 27], ['united', 'O', 28], ['arab', 'O', 29], ['emirates', 'O', 30], ['(', 'O', 31], ['uae', 'O', 32], [')', 'O', 33], [',', 'O', 34], ['and', 'O', 35], ['others', 'O', 36], ['.', 'O', 37], ['the', 'O', 38], ['actor', 'B-adversary', 39], ["'", 'I-adversary', 40], ['s', 'I-adversary', 40], ['activities', 'O', 41], ['were', 'O', 42], ['primaril

[['exploit', 'O', 1], ['granted', 'O', 2], ['attackers', 'O', 3], ['access', 'O', 4], ['to', 'O', 5], ['browser', 'B-infrastructure', 6], ["'", 'I-infrastructure', 7], ['s', 'I-infrastructure', 7], ['locals', 'I-infrastructure', 8], ['##tor', 'I-infrastructure', 8], ['##age', 'I-infrastructure', 8], ['in', 'O', 10], ['reality', 'O', 11], [',', 'O', 12], ['when', 'O', 13], ['users', 'O', 14], ['clicked', 'O', 15], ['the', 'O', 16], ['links', 'O', 17], [',', 'O', 18], ['malicious', 'B-capability', 19], ['java', 'I-capability', 20], ['##script', 'I-capability', 20], ['code', 'I-capability', 21], ['contained', 'O', 22], ['in', 'O', 23], ['the', 'O', 24], ['html', 'B-infrastructure', 25], ['files', 'I-infrastructure', 26], ['would', 'O', 27], ['execute', 'O', 28], ['and', 'O', 29], ['steal', 'O', 30], ['data', 'O', 31], ['from', 'O', 32], ['the', 'O', 33], ['user', 'O', 34], ["'", 'O', 35], ['s', 'O', 35], ['browser', 'O', 36], ['locals', 'O', 37], ['##tor', 'O', 37], ['##age', 'O', 37], ['

[['while', 'O', 1], ['many', 'O', 2], ['variants', 'O', 3], ['of', 'O', 4], ['ce', 'B-capability', 5], ['##rber', 'I-capability', 5], ['have', 'O', 6], ['been', 'O', 7], ['released', 'O', 8], ['over', 'O', 9], ['time', 'O', 10], [',', 'O', 11], ['this', 'O', 12], ['"', 'B-capability', 13], ['feature', 'I-capability', 14], ['"', 'I-capability', 15], ['has', 'O', 17], ['always', 'O', 18], ['remained', 'O', 19], ['the', 'O', 20], ['same', 'O', 21], ['.', 'O', 22], ['that', 'O', 23], ['is', 'O', 24], ['until', 'O', 25], ['today', 'O', 26], [',', 'O', 27], ['when', 'O', 28], ['both', 'O', 29], ['ems', 'O', 30], ['##iso', 'O', 30], ['##ft', 'O', 30], ['researcher', 'O', 31], ['sarah', 'O', 32], [',', 'O', 33], ['otherwise', 'O', 34], ['known', 'O', 35], ['as', 'O', 36], ['xx', 'O', 38], ['##to', 'O', 38], ['##ffe', 'O', 38], ['##ex', 'O', 38], ['##x', 'O', 38], [',', 'O', 39], ['and', 'O', 40], ['swift', 'O', 42], ['##ons', 'O', 42], ['##ec', 'O', 42], ['##urity', 'O', 42], ['found', 'O', 44

[['the', 'O', 1], ['standard', 'O', 2], [',', 'O', 3], ['which', 'O', 4], ['works', 'O', 5], ['by', 'O', 6], ['deploy', 'O', 7], ['##ing', 'O', 7], ['a', 'O', 8], ['new', 'O', 9], ['mac', 'O', 10], ['address', 'O', 11], ['to', 'O', 12], ['a', 'O', 13], ['device', 'O', 14], ['in', 'O', 15], ['order', 'O', 16], ['to', 'O', 17], ['break', 'O', 18], ['down', 'O', 19], ['user', 'O', 20], ['tracking', 'O', 21], ['attempts', 'O', 22], [',', 'O', 23], ['is', 'O', 24], ['still', 'O', 25], ['under', 'O', 26], ['development', 'O', 27], ['at', 'O', 28], ['the', 'O', 29], ['ieee', 'O', 30], ['(', 'O', 31], ['institute', 'O', 32], ['of', 'O', 33], ['electrical', 'O', 34], ['and', 'O', 35], ['electronics', 'O', 36], ['engineers', 'O', 37], [')', 'O', 38], ['and', 'O', 39], ['has', 'O', 40], ['already', 'O', 41], ['passed', 'O', 42], ['a', 'O', 43], ['few', 'O', 44], ['security', 'O', 45], ['tests', 'O', 46], ['.', 'O', 47]]
[['the', 'O', 0], ['standard', 'O', 1], [',', 'O', 2], ['which', 'O', 3], ['w

[['willem', 'O', 0], ['de', 'O', 1], ['gr', 'O', 2], ['##oot', 'O', 2], [',', 'O', 3], ['a', 'O', 4], ['dutch', 'O', 5], ['security', 'O', 6], ['expert', 'O', 7], [',', 'O', 8], ['is', 'O', 9], ['asking', 'O', 10], ['owners', 'O', 11], ['of', 'O', 12], ['defunct', 'O', 13], ['or', 'O', 14], ['soon', 'O', 15], ['-', 'O', 16], ['to', 'O', 17], ['-', 'O', 18], ['be', 'O', 19], ['-', 'O', 20], ['dead', 'O', 21], ['online', 'O', 22], ['stores', 'O', 23], ['to', 'O', 24], ['donate', 'O', 25], ['their', 'O', 26], ['domains', 'B-infrastructure', 27], ['so', 'O', 28], ['he', 'O', 29], ['can', 'O', 30], ['set', 'O', 31], ['up', 'O', 32], ['honey', 'B-infrastructure', 33], ['##pot', 'I-infrastructure', 33], ['##s', 'I-infrastructure', 33], ['and', 'O', 34], ['track', 'O', 35], ['credit', 'B-capability', 36], ['card', 'I-capability', 37], ['stealing', 'I-capability', 38], ['mal', 'I-capability', 39], ['##ware', 'I-capability', 39], ['and', 'O', 40], ['other', 'O', 41], ['types', 'O', 42], ['of', '

[['some', 'O', 0], ['of', 'O', 1], ['the', 'O', 2], ['malicious', 'B-capability', 3], ['android', 'I-capability', 4], ['mine', 'I-capability', 5], ['##craft', 'I-capability', 5], ['mod', 'I-capability', 6], ['##s', 'I-capability', 6], ['(', 'O', 7], ['es', 'O', 8], ['##et', 'O', 8], [')', 'O', 9], ['news', 'O', 11], ['of', 'O', 12], ['malicious', 'B-capability', 13], ['android', 'I-capability', 14], ['apps', 'I-capability', 15], ['hosted', 'O', 16], ['on', 'O', 17], ['the', 'O', 18], ['google', 'B-infrastructure', 19], ['play', 'I-infrastructure', 20], ['store', 'I-infrastructure', 21], ['does', 'O', 22], ['n', 'O', 23], ["'", 'O', 23], ['t', 'O', 23], ['seem', 'O', 24], ['to', 'O', 25], ['stop', 'O', 26], ['coming', 'O', 27], ['these', 'O', 28], ['days', 'O', 29], [',', 'O', 30], ['as', 'O', 31], ['es', 'O', 32], ['##et', 'O', 32], ['and', 'O', 33], ['z', 'O', 34], ['##sca', 'O', 34], ['##ler', 'O', 34], ['researchers', 'O', 35], ['recently', 'O', 36], ['disclosed', 'O', 37], ['they',

[['the', 'O', 1], ['data', 'O', 2], ['from', 'O', 3], ['ava', 'O', 4], ['##st', 'O', 4], ["'", 'O', 5], ['s', 'O', 5], ['report', 'O', 6], ['was', 'O', 7], ['gathered', 'O', 8], ['via', 'O', 9], ['av', 'O', 10], ['##g', 'O', 10], ['tune', 'O', 11], ['##up', 'O', 11], ['’', 'O', 12], ['s', 'O', 12], ['automatic', 'O', 13], ['software', 'O', 14], ['update', 'O', 15], ['##r', 'O', 15], ['feature', 'O', 16], [',', 'O', 17], ['therefore', 'O', 18], [',', 'O', 19], ['it', 'O', 20], ['also', 'O', 21], ['included', 'O', 22], ['details', 'O', 23], ['on', 'O', 24], ['the', 'O', 25], ['version', 'O', 26], ['number', 'O', 27], ['of', 'O', 28], ['each', 'O', 29], ['software', 'O', 30], ['package', 'O', 31], ['.', 'O', 32], ['more', 'O', 34], ['than', 'O', 35], ['half', 'O', 36], ['of', 'O', 37], ['active', 'O', 38], ['software', 'O', 39], ['install', 'O', 40], ['##s', 'O', 40], ['are', 'O', 41], ['out', 'O', 42], ['-', 'O', 43], ['of', 'O', 44], ['-', 'O', 45], ['date', 'O', 46]]
[['the', 'O', 0], 

[['a', 'O', 0], ['bot', 'B-capability', 1], ['##net', 'I-capability', 1], ['specialized', 'O', 2], ['in', 'O', 3], ['gift', 'O', 4], ['card', 'O', 5], ['fraud', 'O', 6], ['is', 'O', 7], ['using', 'O', 8], ['the', 'O', 9], ['infrastructure', 'B-infrastructure', 10], ['of', 'I-infrastructure', 11], ['nearly', 'I-infrastructure', 12], ['1', 'I-infrastructure', 13], [',', 'I-infrastructure', 13], ['000', 'I-infrastructure', 13], ['websites', 'I-infrastructure', 14], ['to', 'O', 15], ['check', 'O', 16], ['the', 'O', 17], ['balance', 'O', 18], ['of', 'O', 19], ['several', 'O', 20], ['types', 'O', 21], ['of', 'O', 22], ['electronic', 'O', 23], ['gift', 'O', 24], ['cards', 'O', 25], ['in', 'O', 26], ['order', 'O', 27], ['to', 'O', 28], ['def', 'O', 29], ['##ra', 'O', 29], ['##ud', 'O', 29], ['legitimate', 'O', 30], ['card', 'O', 31], ['owners', 'O', 32], ['.', 'O', 33], ['di', 'O', 35], ['##sti', 'O', 35], ['##l', 'O', 35], ['networks', 'O', 36], [',', 'O', 37], ['a', 'O', 38], ['cyber', 'O', 

[['march', 'O', 1], ['18th', 'O', 2], ['2017', 'O', 3]]
[['march', 'O', 0], ['18th', 'O', 1], ['2017', 'O', 2]]
[['during', 'O', 0], ['the', 'O', 1], ['past', 'O', 2], ['year', 'O', 3], [',', 'O', 4], ['let', 'O', 5], ["'", 'O', 6], ['s', 'O', 6], ['en', 'O', 7], ['##cr', 'O', 7], ['##yp', 'O', 7], ['##t', 'O', 7], ['has', 'O', 8], ['issued', 'O', 9], ['a', 'O', 10], ['total', 'O', 11], ['of', 'O', 12], ['15', 'O', 13], [',', 'O', 13], ['270', 'O', 13], ['ss', 'B-infrastructure', 14], ['##l', 'I-infrastructure', 14], ['certificates', 'I-infrastructure', 15], ['that', 'O', 16], ['contained', 'O', 17], ['the', 'O', 18], ['word', 'O', 19], ['"', 'O', 20], ['pay', 'O', 21], ['##pal', 'O', 21], ['"', 'O', 22], ['in', 'O', 23], ['the', 'O', 24], ['domain', 'O', 25], ['name', 'O', 26], ['or', 'O', 27], ['the', 'O', 28], ['certificate', 'O', 29], ['identity', 'O', 30], ['.', 'O', 31], ['of', 'O', 33], ['these', 'O', 34], [',', 'O', 35], ['approximately', 'O', 36], ['14', 'O', 37], [',', 'O', 3

[['in', 'O', 0], ['february', 'O', 1], ['2015', 'O', 2], [',', 'O', 3], ['google', 'O', 4], ['shut', 'O', 5], ['down', 'O', 6], ['most', 'O', 7], ['of', 'O', 8], ['the', 'O', 9], ['desktop', 'O', 10], ['and', 'O', 11], ['mobile', 'O', 12], ['clients', 'O', 13], ['and', 'O', 14], ['recommended', 'O', 15], ['users', 'O', 16], ['to', 'O', 17], ['move', 'O', 18], ['to', 'O', 19], ['its', 'O', 20], ['newer', 'O', 21], ['google', 'O', 22], ['hang', 'O', 23], ['##outs', 'O', 23], ['service', 'O', 24], ['.', 'O', 25], ['in', 'O', 27], ['spite', 'O', 28], ['of', 'O', 29], ['the', 'O', 30], ['announcement', 'O', 31], [',', 'O', 32], ['google', 'O', 33], ['left', 'O', 34], ['talk', 'O', 35], ['server', 'O', 36], ['running', 'O', 37], [',', 'O', 38], ['and', 'O', 39], ['many', 'O', 40], ['users', 'O', 41], ['just', 'O', 42], ['switched', 'O', 43], ['to', 'O', 44], ['x', 'O', 45], ['##mp', 'O', 45], ['##p', 'O', 45], ['clients', 'O', 46], ['like', 'O', 47], ['adi', 'O', 48], ['##um', 'O', 48], [','

[['the', 'O', 1], ['post', 'O', 2], ['sharing', 'O', 3], ['the', 'O', 4], ['information', 'O', 5], ['was', 'O', 6], ['spotted', 'O', 7], ['by', 'O', 8], ['a', 'O', 9], ['researcher', 'O', 10], ['at', 'O', 11], ['under', 'O', 12], ['the', 'O', 13], ['breach', 'O', 14], [',', 'O', 15], ['a', 'O', 16], ['company', 'O', 17], ['that', 'O', 18], ['monitors', 'O', 19], ['the', 'O', 20], ['cyber', 'O', 21], ['##cr', 'O', 21], ['##ime', 'O', 21], ['space', 'O', 22], ['and', 'O', 23], ['currently', 'O', 24], ['working', 'O', 25], ['on', 'O', 26], ['releasing', 'O', 27], ['a', 'O', 28], ['new', 'O', 29], ['service', 'O', 30], ['aiming', 'O', 31], ['to', 'O', 32], ['provide', 'O', 33], ['companies', 'O', 34], ['intelligence', 'O', 35], ['about', 'O', 36], ['potential', 'O', 37], ['breach', 'O', 38], ['##es', 'O', 38], ['.', 'O', 39]]
[['the', 'O', 0], ['post', 'O', 1], ['sharing', 'O', 2], ['the', 'O', 3], ['information', 'O', 4], ['was', 'O', 5], ['spotted', 'O', 6], ['by', 'O', 7], ['a', 'O', 8]

[['va', 'O', 1], ['##de', 'O', 1], ['secure', 'O', 2], ["'", 'O', 3], ['s', 'O', 3], ['phi', 'O', 5], ['##sher', 'O', 5], ['##s', 'O', 5], ["'", 'O', 6], ['favorites', 'O', 7], ['report', 'O', 9], ['for', 'O', 10], ['q', 'O', 11], ['##4', 'O', 11], ['2019', 'O', 12], ['highlights', 'O', 13], ['the', 'O', 14], ['25', 'O', 15], ['most', 'O', 16], ['imp', 'O', 17], ['##erson', 'O', 17], ['##ated', 'O', 17], ['brands', 'O', 18], ['in', 'O', 19], ['phi', 'O', 20], ['##shing', 'O', 20], ['attacks', 'O', 21], ['with', 'O', 22], ['the', 'O', 23], ['list', 'O', 24], ['being', 'O', 25], ['compiled', 'O', 26], ['after', 'O', 27], ['examining', 'O', 28], ['phi', 'B-capability', 29], ['##shing', 'I-capability', 29], ['ur', 'I-capability', 30], ['##ls', 'I-capability', 30], ['detected', 'O', 31], ['by', 'O', 32], ['va', 'O', 33], ['##de', 'O', 33], ['secure', 'O', 34], ["'", 'O', 35], ['s', 'O', 35], ['technology', 'O', 36], ['.', 'O', 37], ['"', 'O', 39], ['lever', 'O', 40], ['##aging', 'O', 40], [

[['starting', 'O', 1], ['today', 'O', 2], ['with', 'O', 3], ['the', 'O', 4], ['windows', 'O', 5], ['10', 'O', 6], ['mail', 'O', 7], ['and', 'O', 8], ['calendar', 'O', 9], ['icons', 'O', 10], [',', 'O', 11], ['microsoft', 'O', 12], ['is', 'O', 13], ['slowly', 'O', 14], ['releasing', 'O', 15], ['new', 'O', 16], ['updated', 'O', 17], ['and', 'O', 18], ['colorful', 'O', 19], ['icons', 'O', 20], ['to', 'O', 21], ['users', 'O', 22], ['running', 'O', 23], ['windows', 'O', 24], ['10', 'O', 25], ['insider', 'O', 26], ['builds', 'O', 27]]
[['starting', 'O', 0], ['today', 'O', 1], ['with', 'O', 2], ['the', 'O', 3], ['windows', 'O', 4], ['10', 'O', 5], ['mail', 'O', 6], ['and', 'O', 7], ['calendar', 'O', 8], ['icons', 'O', 9], [',', 'O', 10], ['microsoft', 'O', 11], ['is', 'O', 12], ['slowly', 'O', 13], ['releasing', 'O', 14], ['new', 'O', 15], ['updated', 'O', 16], ['and', 'O', 17], ['colorful', 'O', 18], ['icons', 'O', 19], ['to', 'O', 20], ['users', 'O', 21], ['running', 'O', 22], ['windows', '

[['"', 'O', 1], ['outside', 'O', 2], ['of', 'O', 3], ['its', 'O', 4], ['google', 'O', 5], ['education', 'O', 6], ['platform', 'O', 7], [',', 'O', 8], ['google', 'O', 9], ['forbid', 'O', 10], ['##s', 'O', 10], ['children', 'O', 11], ['under', 'O', 12], ['the', 'O', 13], ['age', 'O', 14], ['of', 'O', 15], ['13', 'O', 16], ['in', 'O', 17], ['the', 'O', 18], ['united', 'O', 19], ['states', 'O', 20], ['from', 'O', 21], ['having', 'O', 22], ['their', 'O', 23], ['own', 'O', 24], ['google', 'O', 25], ['accounts', 'O', 26], ['.', 'O', 27]]
[['"', 'O', 0], ['outside', 'O', 1], ['of', 'O', 2], ['its', 'O', 3], ['google', 'O', 4], ['education', 'O', 5], ['platform', 'O', 6], [',', 'O', 7], ['google', 'O', 8], ['forbid', 'O', 9], ['##s', 'O', 9], ['children', 'O', 10], ['under', 'O', 11], ['the', 'O', 12], ['age', 'O', 13], ['of', 'O', 14], ['13', 'O', 15], ['in', 'O', 16], ['the', 'O', 17], ['united', 'O', 18], ['states', 'O', 19], ['from', 'O', 20], ['having', 'O', 21], ['their', 'O', 22], ['own'

[['to', 'O', 1], ['resolve', 'O', 2], ['these', 'O', 3], ['performance', 'O', 4], ['issues', 'O', 5], [',', 'O', 6], ['microsoft', 'O', 7], ['has', 'O', 8], ['been', 'O', 9], ['testing', 'O', 10], ['a', 'O', 11], ['new', 'O', 12], ['mit', 'O', 13], ['##iga', 'O', 13], ['##tion', 'O', 13], ['technique', 'O', 14], ['called', 'O', 15], ['re', 'O', 16], ['##tp', 'O', 16], ['##olin', 'O', 16], ['##e', 'O', 16], ['in', 'O', 17], ['windows', 'O', 18], ['insider', 'O', 19], ['preview', 'O', 20], ['builds', 'O', 21], ['since', 'O', 22], ['build', 'O', 23], ['1827', 'O', 24], ['##2', 'O', 24], ['.', 'O', 25], ['this', 'O', 27], ['mit', 'O', 28], ['##iga', 'O', 28], ['##tion', 'O', 28], ['has', 'O', 29], ['recently', 'O', 30], ['been', 'O', 31], ['back', 'O', 32], ['##port', 'O', 32], ['##ed', 'O', 32], ['to', 'O', 34], ['windows', 'O', 35], ['10', 'O', 36], ['october', 'O', 37], ['2018', 'O', 38]]
[['this', 'O', 1], ['mit', 'O', 2], ['##iga', 'O', 2], ['##tion', 'O', 2], ['has', 'O', 3], ['recen

[['the', 'O', 1], ['screens', 'O', 2], ['##hot', 'O', 2], ['##s', 'O', 2], ['show', 'O', 3], ['that', 'O', 4], ['microsoft', 'O', 5], ["'", 'O', 6], ['s', 'O', 6], ['new', 'O', 7], ['edge', 'O', 8], ['will', 'O', 9], ['share', 'O', 10], ['a', 'O', 11], ['lot', 'O', 12], ['with', 'O', 13], ['google', 'O', 14], ["'", 'O', 15], ['s', 'O', 15], ['chrome', 'O', 16], ['on', 'O', 17], ['the', 'O', 18], ['user', 'O', 19], ['interface', 'O', 20], ['side', 'O', 21], [',', 'O', 22], ['but', 'O', 23], ['it', 'O', 24], ['will', 'O', 25], ['also', 'O', 26], ['come', 'O', 27], ['with', 'O', 28], ['a', 'O', 29], ['number', 'O', 30], ['of', 'O', 31], ['unique', 'O', 32], ['features', 'O', 33], ['such', 'O', 34], ['as', 'O', 35], ['an', 'O', 36], ['integrated', 'O', 37], ['news', 'O', 38], ['feed', 'O', 39], ['powered', 'O', 40], ['by', 'O', 41], ['microsoft', 'O', 42], ['news', 'O', 43], [',', 'O', 44], ['bing', 'O', 46], ['as', 'O', 47], ['the', 'O', 48], ['default', 'O', 49], ['search', 'O', 50], ['e

[['it', 'O', 1], ['should', 'O', 2], ['be', 'O', 3], ['noted', 'O', 4], ['that', 'O', 5], ['the', 'B-capability', 6], ['ransom', 'I-capability', 7], ['##ware', 'I-capability', 7], ['has', 'O', 8], ['been', 'O', 9], ['commonly', 'O', 10], ['called', 'O', 11], ['the', 'B-capability', 12], ['mail', 'I-capability', 13], ['##to', 'I-capability', 13], ['ransom', 'I-capability', 14], ['##ware', 'I-capability', 14], ['due', 'O', 15], ['to', 'O', 16], ['the', 'O', 17], ['app', 'O', 18], ['##ended', 'O', 18], ['extension', 'O', 19], [',', 'O', 20], ['but', 'O', 21], ['analysis', 'O', 22], ['of', 'O', 23], ['one', 'O', 24], ['of', 'O', 25], ['its', 'O', 26], ['dec', 'O', 27], ['##ry', 'O', 27], ['##pt', 'O', 27], ['##ors', 'O', 27], ['indicates', 'O', 29], ['that', 'O', 30], ['it', 'O', 31], ['is', 'O', 32], ['named', 'O', 33], ['net', 'B-capability', 34], ['##walker', 'I-capability', 34], ['.', 'O', 35], ['we', 'O', 37], ['will', 'O', 38], ['discuss', 'O', 39], ['this', 'O', 40], ['later', 'O', 

[['from', 'O', 1], ['bulb', 'O', 2], ['to', 'O', 3], ['bridge', 'O', 4], ['to', 'O', 5], ['network', 'O', 6], ['tracked', 'O', 8], ['as', 'O', 9], ['cv', 'O', 10], ['##e', 'O', 10], ['-', 'O', 10], ['2020', 'O', 10], ['-', 'O', 11], ['600', 'O', 12], ['##7', 'O', 12], [',', 'O', 13], ['the', 'O', 14], ['bug', 'O', 15], ['has', 'O', 16], ['a', 'O', 17], ['severity', 'O', 18], ['score', 'O', 19], ['of', 'O', 20], ['7', 'O', 21], ['.', 'O', 21], ['9', 'O', 21], ['out', 'O', 22], ['of', 'O', 23], ['10', 'O', 24], ['.', 'O', 25], ['it', 'O', 26], ['is', 'O', 27], ['a', 'O', 28], ['heap', 'O', 29], ['buffer', 'O', 30], ['over', 'O', 31], ['##flow', 'O', 31], ['that', 'O', 32], ['can', 'O', 33], ['be', 'O', 34], ['exploited', 'O', 35], ['remotely', 'O', 36], ['in', 'O', 37], ['philips', 'B-victim', 38], ['hue', 'I-victim', 39], ['bridge', 'O', 40], ['model', 'O', 41], ['2', 'O', 42], ['.', 'O', 42], ['x', 'O', 42], ['to', 'O', 43], ['execute', 'O', 44], ['arbitrary', 'O', 45], ['code', 'O', 4

[['the', 'O', 0], ['u', 'O', 1], ['.', 'O', 1], ['s', 'O', 1], ['.', 'O', 2], ['department', 'O', 3], ['of', 'O', 4], ['justice', 'O', 5], ['(', 'O', 6], ['do', 'O', 7], ['##j', 'O', 7], [')', 'O', 8], ['today', 'O', 9], ['issued', 'O', 10], ['a', 'O', 11], ['notification', 'O', 12], ['to', 'O', 13], ['raise', 'O', 14], ['awareness', 'O', 15], ['among', 'O', 16], ['victims', 'O', 17], ['of', 'O', 18], ['the', 'O', 19], ['quantum', 'O', 21], ['stress', 'O', 22], ['##er', 'O', 22], ['distributed', 'O', 23], ['denial', 'B-capability', 24], ['of', 'I-capability', 25], ['service', 'I-capability', 26], ['(', 'I-capability', 27], ['dd', 'I-capability', 28], ['##os', 'I-capability', 28], [')', 'I-capability', 29], ['for', 'O', 30], ['-', 'O', 31], ['hire', 'O', 32], ['service', 'O', 33], ['operated', 'O', 34], ['by', 'O', 35], ['david', 'B-adversary', 37], ['bu', 'I-adversary', 38], ['##kos', 'I-adversary', 38], ['##ki', 'I-adversary', 38], ['.', 'O', 39], ['dd', 'B-capability', 41], ['##os', 

[['the', 'O', 1], ['campaign', 'O', 2], ['uses', 'O', 3], ['a', 'O', 4], ['devi', 'O', 5], ['##ous', 'O', 5], ['method', 'O', 6], ['to', 'O', 7], ['get', 'O', 8], ['the', 'O', 9], ['potential', 'O', 10], ['victims', 'O', 11], ['to', 'O', 12], ['install', 'O', 13], ['the', 'O', 14], ['mal', 'O', 15], ['##ware', 'O', 15], ['on', 'O', 16], ['their', 'O', 17], ['devices', 'O', 18], [':', 'O', 19], ['it', 'O', 20], ['asks', 'O', 22], ['them', 'O', 23], ['to', 'O', 25], ['enable', 'O', 26], ['google', 'O', 28], ['play', 'O', 29], ['protect', 'O', 30], ['while', 'O', 31], ['actually', 'O', 32], ['di', 'O', 34], ['##sa', 'O', 34], ['##bling', 'O', 34], ['it', 'O', 35], ['after', 'O', 36], ['being', 'O', 37], ['granted', 'O', 38], ['permission', 'O', 39], ['##s', 'O', 39], ['on', 'O', 40], ['the', 'O', 41], ['device', 'O', 42], ['.', 'O', 43], ['to', 'O', 45], ['deliver', 'O', 46], ['the', 'O', 47], ['mal', 'O', 48], ['##ware', 'O', 48], [',', 'O', 49], ['the', 'B-adversary', 50], ['attackers',

[['please', 'O', 1], ['note', 'O', 2], ['that', 'O', 3], ['exam', 'O', 4], ['vo', 'O', 5], ['##ucher', 'O', 5], ['##s', 'O', 5], ['are', 'O', 6], ['not', 'O', 7], ['included', 'O', 8], ['with', 'O', 9], ['this', 'O', 10], ['deal', 'O', 11], ['.', 'O', 12], ['if', 'O', 14], ['cisco', 'O', 15], ['certification', 'O', 16], ['##s', 'O', 16], ['are', 'O', 17], ['not', 'O', 19], ['what', 'O', 20], ['you', 'O', 21], ['are', 'O', 22], ['looking', 'O', 23], ['for', 'O', 24], [',', 'O', 25], ['we', 'O', 26], ['also', 'O', 27], ['have', 'O', 28], ['a', 'O', 29], ['the', 'O', 31], ['dock', 'O', 32], ['##er', 'O', 32], ['and', 'O', 33], ['ku', 'O', 34], ['##ber', 'O', 34], ['##net', 'O', 34], ['##es', 'O', 34], ['certification', 'O', 35], ['training', 'O', 36], ['bundle', 'O', 37], ['for', 'O', 38], ['$', 'O', 39], ['15', 'O', 40], ['.', 'O', 40], ['99', 'O', 40], ['and', 'O', 41], ['a', 'O', 42], ['com', 'O', 44], ['##pt', 'O', 44], ['##ia', 'O', 44], ['it', 'O', 45], ['certification', 'O', 46], [

[['a', 'B-capability', 0], ['modular', 'I-capability', 1], ['mal', 'I-capability', 2], ['##ware', 'I-capability', 2], ['with', 'O', 3], ['worm', 'B-capability', 4], ['capabilities', 'I-capability', 5], ['exploits', 'O', 7], ['known', 'O', 8], ['vu', 'O', 9], ['##ln', 'O', 9], ['##era', 'O', 9], ['##bilities', 'O', 9], ['in', 'O', 10], ['servers', 'B-infrastructure', 11], ['running', 'O', 12], ['elastic', 'O', 13], ['##sea', 'O', 13], ['##rch', 'O', 13], [',', 'O', 14], ['had', 'O', 15], ['##oop', 'O', 15], [',', 'O', 16], ['red', 'O', 17], ['##is', 'O', 17], [',', 'O', 18], ['spring', 'O', 19], [',', 'O', 20], ['web', 'O', 21], ['##logic', 'O', 21], [',', 'O', 22], ['think', 'O', 23], ['##ph', 'O', 23], ['##p', 'O', 23], [',', 'O', 24], ['and', 'O', 25], ['sql', 'O', 26], ['##ser', 'O', 26], ['##ver', 'O', 26], ['to', 'O', 27], ['spread', 'O', 28], ['from', 'O', 29], ['one', 'B-infrastructure', 30], ['server', 'I-infrastructure', 31], ['to', 'O', 32], ['another', 'O', 33], ['and', 'O',

[['"', 'O', 1], ['with', 'O', 2], ['this', 'O', 3], ['update', 'O', 4], [',', 'O', 5], ['you', 'O', 6], ['can', 'O', 7], ['block', 'O', 8], ['access', 'O', 9], ['2016', 'O', 10], ['from', 'O', 11], ['executing', 'O', 12], ['visual', 'O', 13], ['basic', 'O', 14], ['for', 'O', 15], ['applications', 'O', 16], ['(', 'O', 17], ['v', 'O', 18], ['##ba', 'O', 18], [')', 'O', 19], ['code', 'O', 20], ['if', 'O', 21], ['the', 'O', 22], ['file', 'O', 23], ['that', 'O', 24], ['contains', 'O', 25], ['v', 'O', 26], ['##ba', 'O', 26], ['code', 'O', 27], ['is', 'O', 28], ['identified', 'O', 29], ['as', 'O', 30], ['being', 'O', 31], ['from', 'O', 32], ['the', 'O', 33], ['internet', 'O', 34], ['.', 'O', 35], ['"', 'O', 36]]
[['"', 'O', 0], ['with', 'O', 1], ['this', 'O', 2], ['update', 'O', 3], [',', 'O', 4], ['you', 'O', 5], ['can', 'O', 6], ['block', 'O', 7], ['access', 'O', 8], ['2016', 'O', 9], ['from', 'O', 10], ['executing', 'O', 11], ['visual', 'O', 12], ['basic', 'O', 13], ['for', 'O', 14], ['app

[['north', 'B-adversary', 0], ['korean', 'I-adversary', 1], ['backed', 'I-adversary', 2], ['hacking', 'I-adversary', 3], ['groups', 'I-adversary', 4], ['were', 'O', 5], ['behind', 'O', 6], ['multiple', 'O', 7], ['cyber', 'O', 8], ['##att', 'O', 8], ['##ack', 'O', 8], ['##s', 'O', 8], ['impact', 'O', 10], ['##ing', 'O', 10], ['financial', 'B-victim', 12], ['institutions', 'I-victim', 13], ['and', 'I-victim', 14], ['crypt', 'I-victim', 15], ['##oc', 'I-victim', 15], ['##ur', 'I-victim', 15], ['##ren', 'I-victim', 15], ['##cy', 'I-victim', 15], ['exchanges', 'I-victim', 16], ['as', 'O', 17], ['detailed', 'O', 18], ['in', 'O', 19], ['a', 'O', 20], ['report', 'O', 21], ['issued', 'O', 22], ['by', 'O', 23], ['a', 'O', 24], ['panel', 'O', 25], ['of', 'O', 26], ['experts', 'O', 27], ['for', 'O', 29], ['the', 'O', 30], ['united', 'O', 31], ['nations', 'O', 32], ['(', 'O', 33], ['un', 'O', 34], [')', 'O', 35], ['security', 'O', 37], ['council', 'O', 38], ['.', 'O', 39], ['according', 'O', 41], [

[['french', 'O', 0], ['security', 'O', 1], ['researcher', 'O', 2], ['robert', 'O', 3], ['baptiste', 'O', 4], ['found', 'O', 5], ['the', 'O', 6], ['api', 'O', 7], ['of', 'O', 8], ['the', 'O', 9], ['63', 'O', 11], ['##red', 'O', 11], ['safe', 'O', 12], ['mobile', 'O', 14], ['application', 'O', 15], ['known', 'O', 16], ['as', 'O', 17], ['"', 'O', 18], ['ye', 'O', 19], ['##lp', 'O', 19], ['for', 'O', 20], ['conservatives', 'O', 21], ['"', 'O', 22], ['wide', 'O', 24], ['open', 'O', 25], [',', 'O', 26], ['with', 'O', 27], ['no', 'O', 28], ['authentication', 'O', 29], ['needed', 'O', 30], ['to', 'O', 31], ['access', 'O', 32], ['and', 'O', 33], ['view', 'O', 34], ['the', 'O', 35], ['data', 'O', 36], ['stored', 'O', 37], ['within', 'O', 38], ['the', 'O', 39], ['app', 'O', 40], ["'", 'O', 41], ['s', 'O', 41], ['database', 'O', 43], ['.', 'O', 44], ['according', 'O', 46], ['to', 'O', 47], ['its', 'O', 48], ['official', 'O', 49], ['description', 'O', 50], [',', 'O', 51], ['63', 'O', 52], ['##red',

[['this', 'O', 1], ['issue', 'O', 2], [',', 'O', 3], ['nicknamed', 'O', 4], ['a', 'O', 5], ['"', 'B-capability', 6], ['web', 'I-capability', 7], ['cache', 'I-capability', 8], ['deception', 'I-capability', 9], ['attack', 'I-capability', 10], [',', 'I-capability', 11], ['"', 'I-capability', 12], ['was', 'O', 13], ['discovered', 'O', 14], ['by', 'O', 15], ['israeli', 'O', 16], ['security', 'O', 17], ['researcher', 'O', 18], ['om', 'O', 19], ['##er', 'O', 19], ['gil', 'O', 20], [',', 'O', 21], ['who', 'O', 22], ['says', 'O', 23], ['it', 'O', 24], ['affects', 'O', 25], ['many', 'O', 26], ['online', 'O', 27], ['services', 'O', 28], ['.', 'O', 29]]
[['this', 'O', 0], ['issue', 'O', 1], [',', 'O', 2], ['nicknamed', 'O', 3], ['a', 'O', 4], ['"', 'B-capability', 5], ['web', 'I-capability', 6], ['cache', 'I-capability', 7], ['deception', 'I-capability', 8], ['attack', 'I-capability', 9], [',', 'I-capability', 10], ['"', 'I-capability', 11], ['was', 'O', 12], ['discovered', 'O', 13], ['by', 'O', 1

[['we', 'O', 0], ['are', 'O', 1], ['experiencing', 'O', 2], ['an', 'O', 3], ['out', 'O', 4], ['##age', 'O', 4], ['from', 'O', 5], ['our', 'O', 6], ['service', 'O', 7], ['provider', 'O', 8], [',', 'O', 9], ['@', 'O', 10], ['aw', 'O', 10], ['##sc', 'O', 10], ['##lou', 'O', 10], ['##d', 'O', 10], ['-', 'O', 11], ['please', 'O', 12], ['be', 'O', 13], ['patient', 'O', 14], ['as', 'O', 15], ['we', 'O', 16], ['await', 'O', 17], ['their', 'O', 18], ['resolution', 'O', 19], ['.', 'O', 20], ['—', 'O', 22], ['my', 'O', 23], ['##foot', 'O', 23], ['##ball', 'O', 23], ['##now', 'O', 23], ['.', 'O', 23], ['com', 'O', 23], ['(', 'O', 24], ['@', 'O', 25], ['my', 'O', 25], ['##foot', 'O', 25], ['##ball', 'O', 25], ['##now', 'O', 25], [')', 'O', 26], ['february', 'O', 27], ['28', 'O', 28], [',', 'O', 29], ['2017', 'O', 30], ['im', 'O', 32], ['##gur', 'O', 32], ['website', 'O', 33], ['returning', 'O', 34], ['blank', 'O', 35], ['web', 'O', 36], ['page', 'O', 37]]
[['—', 'O', 1], ['my', 'O', 2], ['##foot', 

[['yesterday', 'O', 0], ['microsoft', 'O', 1], ['released', 'O', 2], ['insider', 'O', 3], ['preview', 'O', 4], ['build', 'O', 5], ['150', 'O', 6], ['##46', 'O', 6], ['for', 'O', 8], ['pc', 'O', 9], ['to', 'O', 10], ['insider', 'O', 11], ['##s', 'O', 11], ['in', 'O', 12], ['the', 'O', 13], ['fast', 'O', 14], ['ring', 'O', 15], [',', 'O', 16], ['for', 'O', 17], ['the', 'O', 18], ['most', 'O', 19], ['part', 'O', 20], [',', 'O', 21], ['this', 'O', 22], ['release', 'O', 23], ['is', 'O', 24], ['a', 'O', 25], ['bug', 'O', 26], ['fix', 'O', 27], ['with', 'O', 28], ['some', 'O', 29], ['cosmetic', 'O', 30], ['changes', 'O', 31], ['.', 'O', 32], ['the', 'O', 33], ['most', 'O', 34], ['notable', 'O', 35], ['new', 'O', 36], ['feature', 'O', 37], [',', 'O', 38], ['though', 'O', 39], [',', 'O', 40], ['is', 'O', 41], ['the', 'O', 42], ['introduction', 'O', 43], ['of', 'O', 44], ['application', 'O', 45], ['installation', 'O', 46], ['control', 'O', 47], [',', 'O', 48], ['which', 'O', 49], ['allows', 'O',

[['according', 'O', 1], ['to', 'O', 2], ['google', 'O', 3], [',', 'O', 4], ['the', 'O', 5], ['chrome', 'O', 6], ['settings', 'O', 7], ['changes', 'O', 8], ['for', 'O', 9], ['which', 'O', 10], ['safe', 'O', 11], ['brows', 'O', 12], ['##ing', 'O', 12], ['will', 'O', 13], ['show', 'O', 14], ['notification', 'O', 15], ['##s', 'O', 15], ['on', 'O', 16], ['mac', 'O', 17], ['##os', 'O', 17], ['include', 'O', 18], ['the', 'O', 19], ['"', 'O', 20], ['manipulation', 'O', 21], ['of', 'O', 22], ['chrome', 'O', 23], ['user', 'O', 24], ['settings', 'O', 25], [',', 'O', 26], ['specifically', 'O', 27], ['the', 'O', 28], ['start', 'O', 29], ['page', 'O', 30], [',', 'O', 31], ['home', 'O', 32], ['page', 'O', 33], [',', 'O', 34], ['and', 'O', 35], ['default', 'O', 36], ['search', 'O', 37], ['engine', 'O', 38], ['.', 'O', 39], ['"', 'O', 40], ['google', 'O', 42], ['recently', 'O', 43], ['released', 'O', 44], ['a', 'O', 45], ['special', 'O', 46], ['api', 'O', 47], ['named', 'O', 48], ['the', 'O', 49], ['ch

[['among', 'O', 1], ['the', 'O', 2], ['websites', 'B-infrastructure', 3], ['actively', 'O', 4], ['leaking', 'O', 5], ['customers', 'B-victim', 6], ["'", 'O', 7], ['sensitive', 'B-infrastructure', 8], ['data', 'I-infrastructure', 9], ['to', 'O', 10], ['an', 'O', 11], ['unauthorized', 'O', 12], ['third', 'O', 13], ['party', 'O', 14], ['is', 'O', 15], ['fi', 'B-victim', 16], ['##la', 'I-victim', 16], ['uk', 'O', 17], [',', 'O', 18], ['likely', 'O', 19], ['since', 'O', 20], ['november', 'O', 21], ['2018', 'O', 22], [',', 'O', 23], ['when', 'O', 24], ['it', 'O', 25], ['was', 'O', 26], ['compromised', 'O', 27], ['.', 'O', 28]]
[['among', 'O', 0], ['the', 'O', 1], ['websites', 'B-infrastructure', 2], ['actively', 'O', 3], ['leaking', 'O', 4], ['customers', 'B-victim', 5], ["'", 'O', 6], ['sensitive', 'B-infrastructure', 7], ['data', 'I-infrastructure', 8], ['to', 'O', 9], ['an', 'O', 10], ['unauthorized', 'O', 11], ['third', 'O', 12], ['party', 'O', 13], ['is', 'O', 14], ['fi', 'B-victim', 15

[['their', 'O', 0], ['analysis', 'O', 1], ['unearthed', 'O', 2], ['the', 'O', 3], ['fact', 'O', 4], ['that', 'O', 5], ['around', 'O', 7], ['60', 'O', 8], ['%', 'O', 9], ['of', 'O', 10], ['all', 'O', 11], ['microsoft', 'B-infrastructure', 12], ['office', 'I-infrastructure', 13], ['365', 'I-infrastructure', 14], ['and', 'I-infrastructure', 15], ['g', 'I-infrastructure', 16], ['suite', 'I-infrastructure', 17], ['tenants', 'O', 18], ['have', 'O', 19], ['been', 'O', 20], ['targeted', 'O', 21], ['using', 'O', 22], ['im', 'B-capability', 23], ['##ap', 'I-capability', 23], ['-', 'I-capability', 24], ['based', 'I-capability', 25], ['password', 'I-capability', 26], ['-', 'I-capability', 27], ['spraying', 'I-capability', 28], ['attacks', 'O', 29], ['and', 'O', 30], [',', 'O', 31], ['as', 'O', 32], ['a', 'O', 33], ['direct', 'O', 34], ['result', 'O', 35], [',', 'O', 36], ['approximately', 'O', 37], ['25', 'O', 38], ['%', 'O', 39], ['of', 'O', 40], ['g', 'B-infrastructure', 41], ['suite', 'I-infras

[['when', 'O', 0], ['the', 'O', 1], ['data', 'O', 2], ['breach', 'O', 3], ['was', 'O', 4], ['discovered', 'O', 5], [',', 'O', 6], ['the', 'O', 7], ['school', 'O', 8], ['launched', 'O', 9], ['an', 'O', 10], ['investigation', 'O', 11], ['and', 'O', 12], ['discovered', 'O', 13], ['that', 'O', 14], ['unauthorized', 'O', 15], ['changes', 'O', 16], ['to', 'O', 17], ['student', 'B-infrastructure', 18], ["'", 'I-infrastructure', 19], ['s', 'I-infrastructure', 19], ['grades', 'I-infrastructure', 20], ['and', 'I-infrastructure', 21], ['attendance', 'I-infrastructure', 22], ['records', 'I-infrastructure', 23], ['were', 'O', 24], ['made', 'O', 25], ['.', 'O', 26], ['the', 'O', 27], ['statement', 'O', 28], ['further', 'O', 29], ['states', 'O', 30], ['that', 'O', 31], ['this', 'O', 32], ['breach', 'O', 33], ['"', 'O', 34], ['appears', 'O', 35], ['to', 'O', 36], ['be', 'O', 37], ['limited', 'O', 38], ['to', 'O', 39], ['the', 'O', 40], ['high', 'O', 41], ['school', 'O', 42], ['.', 'O', 43], ['"', 'O',

[['in', 'O', 1], ['one', 'O', 2], ['instance', 'O', 3], [',', 'O', 4], ['researchers', 'O', 5], ['from', 'O', 6], ['check', 'O', 8], ['point', 'O', 9], ['research', 'O', 10], ['were', 'O', 11], ['able', 'O', 12], ['to', 'O', 13], ['find', 'O', 14], ['an', 'O', 15], ['ultrasound', 'O', 16], ['machine', 'O', 17], ['running', 'O', 18], ['a', 'O', 19], ['windows', 'B-infrastructure', 21], ['2000', 'I-infrastructure', 22], ['os', 'I-infrastructure', 23], ['which', 'O', 24], ['no', 'O', 25], ['longer', 'O', 26], ['receives', 'O', 27], ['security', 'O', 28], ['patches', 'O', 29], [',', 'O', 30], ['leaving', 'O', 31], ['the', 'O', 32], ['machine', 'O', 33], ['vulnerable', 'O', 34], ['to', 'O', 35], ['a', 'O', 36], ['multitude', 'O', 37], ['of', 'O', 38], ['attacks', 'O', 39], ['.', 'O', 40]]
[['in', 'O', 0], ['one', 'O', 1], ['instance', 'O', 2], [',', 'O', 3], ['researchers', 'O', 4], ['from', 'O', 5], ['check', 'O', 7], ['point', 'O', 8], ['research', 'O', 9], ['were', 'O', 10], ['able', 'O'

[['after', 'O', 0], ['the', 'O', 1], ['april', 'O', 2], ['release', 'O', 3], [',', 'O', 4], ['n', 'O', 5], ['##vid', 'O', 5], ['##ia', 'O', 5], ['will', 'O', 6], ['no', 'O', 7], ['longer', 'O', 8], ['include', 'O', 9], ['support', 'O', 10], ['for', 'O', 11], ['3d', 'O', 12], ['vision', 'O', 13], [',', 'O', 14], ['but', 'O', 15], ['will', 'O', 16], ['continue', 'O', 17], ['supporting', 'O', 18], ['any', 'O', 19], ['critical', 'O', 21], ['driver', 'O', 22], ['issues', 'O', 23], ['that', 'O', 24], ['arise', 'O', 25], ['in', 'O', 26], ['release', 'O', 27], ['41', 'O', 28], ['##8', 'O', 28], ['through', 'O', 29], ['april', 'O', 30], ['2020', 'O', 31], ['.', 'O', 32], ['"', 'O', 34], ['following', 'O', 35], ['the', 'O', 36], ['posting', 'O', 37], ['of', 'O', 38], ['the', 'O', 39], ['final', 'O', 40], ['driver', 'O', 41], ['from', 'O', 42], ['release', 'O', 43], ['41', 'O', 44], ['##8', 'O', 44], ['in', 'O', 45], ['april', 'O', 46], ['2019', 'O', 47], [',', 'O', 48], ['ge', 'O', 49], ['##forc

[['b', 'O', 0], ['##lee', 'O', 0], ['##ping', 'O', 0], ['##com', 'O', 0], ['##put', 'O', 0], ['##er', 'O', 0], ['was', 'O', 1], ['first', 'O', 2], ['notified', 'O', 3], ['about', 'O', 4], ['the', 'O', 5], ['ya', 'B-capability', 6], ['##tron', 'I-capability', 6], ['ra', 'I-capability', 7], ['##as', 'I-capability', 7], ['by', 'O', 8], ['a', 'O', 10], ['security', 'O', 11], ['researcher', 'O', 12], ['who', 'O', 13], ['goes', 'O', 14], ['by', 'O', 15], ['the', 'O', 16], ['name', 'O', 17], ['a', 'O', 18], ['shadow', 'O', 19], ['.', 'O', 20], ['since', 'O', 21], ['then', 'O', 22], [',', 'O', 23], ['the', 'O', 24], ['actor', 'B-adversary', 25], ['behind', 'I-adversary', 26], ['this', 'I-adversary', 27], ['ransom', 'I-adversary', 28], ['##ware', 'I-adversary', 28], ['has', 'O', 30], ['strangely', 'O', 31], ['been', 'O', 32], ['promoting', 'O', 33], ['the', 'B-capability', 34], ['service', 'I-capability', 35], ['by', 'O', 36], ['t', 'O', 37], ['##wee', 'O', 37], ['##ting', 'O', 37], ['to', 'O',

[['also', 'O', 0], [',', 'O', 1], ['"', 'O', 2], ['the', 'O', 3], ['department', 'O', 4], ['can', 'O', 5], ['not', 'O', 6], ['confirm', 'O', 7], ['that', 'O', 8], ['any', 'O', 9], ['clients', 'B-victim', 10], ['’', 'O', 11], ['personal', 'O', 12], ['information', 'O', 13], ['was', 'O', 14], ['acquired', 'O', 15], ['from', 'O', 16], ['its', 'O', 17], ['email', 'B-infrastructure', 18], ['system', 'I-infrastructure', 19], ['or', 'O', 20], ['used', 'O', 21], ['inappropriate', 'O', 22], ['##ly', 'O', 22], ['.', 'O', 23]]
[['during', 'O', 0], ['the', 'O', 1], ['second', 'O', 2], ['day', 'O', 3], ['of', 'O', 4], ['p', 'O', 5], ['##wn', 'O', 5], ['##2', 'O', 5], ['##own', 'O', 5], ['vancouver', 'O', 6], ['2019', 'O', 7], [',', 'O', 8], ['competitors', 'O', 9], ['successfully', 'O', 10], ['p', 'O', 11], ['##wn', 'O', 11], ['##ed', 'O', 11], ['the', 'O', 12], ['mo', 'B-infrastructure', 13], ['##zi', 'I-infrastructure', 13], ['##lla', 'I-infrastructure', 13], ['fire', 'I-infrastructure', 14], ['#

[['word', 'B-infrastructure', 0], ['##press', 'I-infrastructure', 0], ['websites', 'I-infrastructure', 1], ['using', 'O', 2], ['un', 'B-infrastructure', 3], ['##pa', 'I-infrastructure', 3], ['##tched', 'I-infrastructure', 3], ['social', 'I-infrastructure', 4], ['warfare', 'I-infrastructure', 5], ['installations', 'I-infrastructure', 6], ['(', 'O', 7], ['v', 'O', 8], ['##3', 'O', 8], ['.', 'O', 8], ['5', 'O', 8], ['.', 'O', 8], ['1', 'O', 8], ['and', 'O', 9], ['v', 'O', 10], ['##3', 'O', 10], ['.', 'O', 10], ['5', 'O', 10], ['.', 'O', 10], ['2', 'O', 10], [')', 'O', 11], ['are', 'O', 12], ['exposed', 'O', 13], ['to', 'O', 14], ['attacks', 'O', 15], ['abu', 'O', 16], ['##sing', 'O', 16], ['a', 'O', 17], ['stored', 'O', 19], ['cross', 'O', 20], ['-', 'O', 21], ['site', 'O', 22], ['script', 'O', 23], ['##ing', 'O', 23], ['(', 'O', 24], ['x', 'O', 25], ['##ss', 'O', 25], [')', 'O', 26], ['vulnerability', 'O', 27], ['fixed', 'O', 28], ['in', 'O', 29], ['the', 'O', 30], ['3', 'O', 31], ['.', 

[['according', 'O', 0], ['to', 'O', 1], ['dave', 'O', 3], ['gr', 'O', 4], ['##och', 'O', 4], ['##ock', 'O', 4], ['##i', 'O', 4], [',', 'O', 5], ['a', 'O', 6], ['senior', 'O', 7], ['program', 'O', 8], ['manager', 'O', 9], ['at', 'O', 10], ['microsoft', 'O', 11], [',', 'O', 12], ['microsoft', 'O', 13], ['plans', 'O', 14], ['on', 'O', 15], ['adding', 'O', 16], ['a', 'O', 17], ['graph', 'O', 18], ['##ing', 'O', 18], ['mode', 'O', 19], ['to', 'O', 20], ['the', 'O', 21], ['windows', 'O', 22], ['10', 'O', 24], ['cal', 'O', 25], ['##cula', 'O', 25], ['##tor', 'O', 25], ['in', 'O', 26], ['order', 'O', 27], ['to', 'O', 28], ['better', 'O', 29], ['aid', 'O', 30], ['students', 'O', 31], ['who', 'O', 32], ['are', 'O', 33], ['learning', 'O', 34], ['how', 'O', 35], ['to', 'O', 36], ['graph', 'O', 37], ['in', 'O', 38], ['algebra', 'O', 39], ['.', 'O', 40], ['"', 'O', 42], ['high', 'O', 43], ['school', 'O', 44], ['algebra', 'O', 45], ['is', 'O', 46], ['the', 'O', 47], ['gateway', 'O', 48], ['to', 'O', 

[['as', 'O', 0], ['further', 'O', 1], ['detailed', 'O', 2], ['by', 'O', 3], ['bea', 'O', 4], ['##zle', 'O', 4], ['##y', 'O', 4], ["'", 'O', 5], ['s', 'O', 5], ['2019', 'O', 7], ['breach', 'O', 8], ['briefing', 'O', 9], ['report', 'O', 10], [',', 'O', 11], ['the', 'O', 12], ['highest', 'O', 13], ['ransom', 'O', 14], ['demanded', 'O', 15], ['from', 'O', 16], ['its', 'O', 17], ['ins', 'O', 18], ['##ured', 'O', 18], ['##s', 'O', 18], ['was', 'O', 19], ['of', 'O', 20], ['$', 'O', 21], ['8', 'O', 22], ['.', 'O', 22], ['5', 'O', 22], ['million', 'O', 23], ['or', 'O', 24], ['3', 'O', 25], [',', 'O', 25], ['000', 'O', 25], ['bit', 'O', 26], ['##co', 'O', 26], ['##in', 'O', 26], [',', 'O', 27], ['while', 'O', 28], ['the', 'O', 29], ['highest', 'O', 31], ['ransom', 'O', 32], ['paid', 'O', 33], ['by', 'O', 34], ['one', 'O', 35], ['of', 'O', 36], ['its', 'O', 37], ['clients', 'O', 38], ['was', 'O', 39], ['of', 'O', 40], ['$', 'O', 41], ['93', 'O', 42], ['##5', 'O', 42], [',', 'O', 42], ['000', 'O',

[['in', 'O', 0], ['an', 'O', 1], ['advisory', 'O', 2], ['titled', 'O', 3], ['"', 'O', 4], ['management', 'O', 5], ['alert', 'O', 6], ['–', 'O', 7], ['fe', 'B-infrastructure', 8], ['##ma', 'I-infrastructure', 8], ['did', 'O', 9], ['not', 'O', 10], ['safeguard', 'O', 11], ['disaster', 'B-victim', 12], ['survivors', 'I-victim', 13], ['’', 'O', 14], ['sensitive', 'O', 15], ['personally', 'O', 16], ['identifiable', 'O', 17], ['information', 'O', 18], ['"', 'O', 19], [',', 'O', 20], ['it', 'O', 21], ['is', 'O', 22], ['disclosed', 'O', 23], ['that', 'O', 24], ['fe', 'B-infrastructure', 25], ['##ma', 'I-infrastructure', 25], ['did', 'O', 26], ['not', 'O', 27], ['appropriately', 'O', 28], ['safeguard', 'O', 29], ['personal', 'B-infrastructure', 31], ['information', 'I-infrastructure', 32], ['of', 'O', 33], ['survivors', 'B-victim', 34], [',', 'O', 35], ['including', 'O', 36], ['bank', 'O', 37], ['account', 'O', 38], ['information', 'O', 39], [',', 'O', 40], ['and', 'O', 41], ['provided', 'O', 4

[['the', 'O', 0], ['vulnerability', 'O', 1], ['patch', 'O', 2], ['##ed', 'O', 2], ['in', 'O', 3], ['word', 'O', 4], ['##press', 'O', 4], ['5', 'O', 5], ['.', 'O', 5], ['1', 'O', 5], ['.', 'O', 5], ['1', 'O', 5], ['would', 'O', 6], ['make', 'O', 7], ['it', 'O', 8], ['possible', 'O', 9], ['for', 'O', 10], ['bad', 'B-adversary', 11], ['actors', 'I-adversary', 12], ['to', 'O', 13], ['take', 'O', 14], ['over', 'O', 15], ['websites', 'B-infrastructure', 16], ['using', 'O', 17], ['a', 'O', 18], ['cross', 'O', 19], ['-', 'O', 20], ['site', 'O', 21], ['request', 'O', 22], ['forge', 'O', 23], ['##ry', 'O', 23], ['(', 'O', 24], ['cs', 'O', 25], ['##rf', 'O', 25], [')', 'O', 26], ['vulnerability', 'O', 27], ['by', 'O', 28], ['lu', 'O', 30], ['##ring', 'O', 30], ['a', 'O', 31], ['logged', 'O', 32], ['on', 'O', 33], ['administrator', 'O', 34], ['into', 'O', 35], ['visiting', 'O', 36], ['a', 'O', 37], ['malicious', 'B-infrastructure', 38], ['website', 'I-infrastructure', 39], ['containing', 'O', 40],

[['a', 'O', 0], ['chinese', 'B-adversary', 1], ['threat', 'I-adversary', 2], ['group', 'I-adversary', 3], ['was', 'I-adversary', 4], ['using', 'O', 5], ['hacking', 'O', 6], ['tools', 'O', 7], ['developed', 'O', 8], ['by', 'O', 9], ['the', 'O', 10], ['nsa', 'B-adversary', 11], ['more', 'O', 12], ['than', 'O', 13], ['a', 'O', 14], ['year', 'O', 15], ['before', 'O', 16], ['shadow', 'B-adversary', 17], ['broker', 'I-adversary', 18], ['##s', 'I-adversary', 18], ['leaked', 'O', 19], ['them', 'O', 20], ['in', 'O', 21], ['april', 'O', 22], ['2017', 'O', 23], [',', 'O', 24], ['tools', 'O', 25], ['that', 'O', 26], ['were', 'O', 27], ['later', 'O', 28], ['used', 'O', 29], ['in', 'O', 30], ['highly', 'O', 31], ['destructive', 'O', 32], ['attacks', 'O', 33], ['such', 'O', 34], ['as', 'O', 35], ['the', 'O', 36], ['wanna', 'B-capability', 37], ['##cr', 'I-capability', 37], ['##y', 'I-capability', 37], ['ransom', 'I-capability', 38], ['##ware', 'I-capability', 38], ['campaign', 'O', 40], ['from', 'O',

[['in', 'O', 0], ['a', 'O', 1], ['blog', 'O', 2], ['post', 'O', 3], ['released', 'O', 4], ['in', 'O', 5], ['conjunction', 'O', 6], ['with', 'O', 7], ['ms', 'O', 8], ['build', 'O', 9], [',', 'O', 10], ['microsoft', 'O', 11], ['stated', 'O', 12], ['that', 'O', 13], ['the', 'O', 14], ['mac', 'O', 15], ['##os', 'O', 15], ['version', 'O', 16], ['would', 'O', 17], ['be', 'O', 18], ['released', 'O', 19], ['soon', 'O', 20], ['.', 'O', 21], ['"', 'O', 23], ['if', 'O', 24], ['you', 'O', 25], ['’', 'O', 26], ['re', 'O', 26], ['not', 'O', 27], ['on', 'O', 28], ['windows', 'O', 29], ['10', 'O', 30], [',', 'O', 31], ['"', 'O', 32], ['stated', 'O', 33], ['microsoft', 'O', 34], ['in', 'O', 35], ['a', 'O', 36], ['blog', 'O', 37], ['post', 'O', 38], [',', 'O', 39], ['"', 'O', 41], ['do', 'O', 42], ['n', 'O', 43], ['’', 'O', 43], ['t', 'O', 43], ['worry', 'O', 44], ['―', 'O', 44], ['we', 'O', 44], ['’', 'O', 44], ['re', 'O', 44], ['looking', 'O', 45], ['forward', 'O', 46], ['to', 'O', 47], ['sharing', 'O

[['the', 'O', 0], ['37', 'B-victim', 2], [',', 'I-victim', 2], ['900', 'I-victim', 2], ['ko', 'I-victim', 3], ['##ol', 'I-victim', 3], ['king', 'I-victim', 4], ['shop', 'I-victim', 5], ['member', 'I-victim', 6], ['records', 'O', 7], ['contained', 'O', 8], ['personally', 'O', 9], ['identifiable', 'O', 10], ['information', 'O', 11], ['(', 'O', 12], ['pi', 'O', 13], ['##i', 'O', 13], [')', 'O', 14], ['such', 'O', 15], ['as', 'O', 16], ['"', 'O', 17], ['emails', 'B-infrastructure', 18], [',', 'O', 19], ['password', 'O', 20], ['##s', 'O', 20], ['(', 'O', 21], ['access', 'O', 22], ['to', 'O', 23], ['the', 'O', 24], ['portal', 'O', 25], [')', 'O', 26], [',', 'O', 27], ['names', 'O', 28], [',', 'O', 29], ['phones', 'O', 30], [',', 'O', 31], ['do', 'O', 32], ['##b', 'O', 32], [',', 'O', 33], ['vo', 'O', 34], ['##ucher', 'O', 34], ['codes', 'O', 35], [',', 'O', 36], ['links', 'O', 37], ['to', 'O', 38], ['the', 'O', 39], ['externally', 'O', 40], ['stored', 'O', 41], ['certificates', 'O', 42], [',

[['the', 'O', 0], ['startup', 'O', 1], ['company', 'O', 2], ['strive', 'O', 3], ['##s', 'O', 3], ['to', 'O', 4], ['allow', 'O', 5], ['security', 'O', 6], ['and', 'O', 7], ['network', 'O', 8], ['teams', 'O', 9], ['to', 'O', 10], ['work', 'O', 11], ['smarter', 'O', 12], ['by', 'O', 14], ['taking', 'O', 15], ['advantage', 'O', 16], ['of', 'O', 17], ['the', 'O', 18], ['huge', 'O', 19], ['quantities', 'O', 20], ['of', 'O', 21], ['real', 'O', 22], ['-', 'O', 23], ['time', 'O', 24], ['data', 'O', 25], ['network', 'O', 26], ['devices', 'O', 27], ['generate', 'O', 28], ['on', 'O', 29], ['a', 'O', 30], ['regular', 'O', 31], ['basis', 'O', 32], ['to', 'O', 33], ['automatically', 'O', 34], ['identify', 'O', 35], ['and', 'O', 36], ['block', 'O', 37], ['malicious', 'O', 38], ['traffic', 'O', 39], ['as', 'O', 40], ['a', 'O', 41], ['network', 'O', 42], ['defense', 'O', 43], ['measure', 'O', 44], ['.', 'O', 45], ['more', 'O', 47], ['exactly', 'O', 48], [',', 'O', 49], ['net', 'B-infrastructure', 50], [

[['microsoft', 'O', 0], ['announced', 'O', 1], ['the', 'O', 2], ['addition', 'O', 3], ['of', 'O', 4], ['live', 'O', 6], ['response', 'O', 7], ['capabilities', 'O', 8], ['to', 'O', 9], ['its', 'O', 10], ['microsoft', 'B-infrastructure', 11], ['defender', 'I-infrastructure', 12], ['advanced', 'I-infrastructure', 13], ['threat', 'I-infrastructure', 14], ['protection', 'I-infrastructure', 15], ['(', 'I-infrastructure', 16], ['microsoft', 'I-infrastructure', 17], ['defender', 'I-infrastructure', 18], ['atp', 'I-infrastructure', 19], [')', 'I-infrastructure', 20], ['enabling', 'O', 21], ['security', 'O', 22], ['operation', 'O', 23], ['teams', 'O', 24], ['to', 'O', 25], ['perform', 'O', 26], ['system', 'B-infrastructure', 27], ['forensic', 'I-infrastructure', 28], ['analysis', 'I-infrastructure', 29], ['remotely', 'O', 30], ['.', 'O', 31], ['microsoft', 'B-infrastructure', 33], ['defender', 'I-infrastructure', 34], ['atp', 'I-infrastructure', 35], ['is', 'O', 36], ['a', 'O', 37], ['security',

[['for', 'O', 1], ['more', 'O', 2], ['information', 'O', 3], ['on', 'O', 4], ['how', 'O', 5], ['these', 'O', 6], ['vu', 'O', 7], ['##ln', 'O', 7], ['##era', 'O', 7], ['##bilities', 'O', 7], ['work', 'O', 8], [',', 'O', 9], ['who', 'O', 10], ['discovered', 'O', 11], ['them', 'O', 12], [',', 'O', 13], ['and', 'O', 14], ['how', 'O', 15], ['you', 'O', 16], ['can', 'O', 17], ['test', 'O', 18], ['if', 'O', 19], ['you', 'O', 20], ['are', 'O', 21], ['vulnerable', 'O', 22], [',', 'O', 23], ['you', 'O', 24], ['can', 'O', 25], ['read', 'O', 26], ['our', 'O', 27], ['new', 'O', 29], ['rid', 'B-capability', 30], ['##l', 'I-capability', 30], ['and', 'I-capability', 31], ['fallout', 'I-capability', 32], ['attacks', 'I-capability', 33], ['impact', 'O', 34], ['all', 'O', 35], ['modern', 'O', 36], ['intel', 'O', 37], ['cpu', 'O', 38], ['##s', 'O', 38], ['article', 'O', 40], ['.', 'O', 41]]
[['for', 'O', 0], ['more', 'O', 1], ['information', 'O', 2], ['on', 'O', 3], ['how', 'O', 4], ['these', 'O', 5], ['v

[['it', 'O', 1], ['has', 'O', 2], ['been', 'O', 3], ['discovered', 'O', 4], ['that', 'O', 5], ['google', 'O', 6], ['is', 'O', 7], ['hiding', 'O', 8], ['three', 'O', 9], ['google', 'O', 10], ['pay', 'O', 11], ['privacy', 'O', 12], ['settings', 'O', 13], ['unless', 'O', 14], ['you', 'O', 15], ['access', 'O', 16], ['the', 'O', 17], ['service', 'O', 18], ["'", 'O', 19], ['s', 'O', 19], ['settings', 'O', 20], ['screen', 'O', 21], ['through', 'O', 22], ['a', 'O', 23], ['special', 'O', 24], ['ur', 'O', 25], ['##l', 'O', 25], ['.', 'O', 26], ['these', 'O', 27], ['settings', 'O', 28], ['allow', 'O', 29], ['you', 'O', 30], ['to', 'O', 31], ['restrict', 'O', 32], ['whether', 'O', 33], ['google', 'O', 34], ['pay', 'O', 35], ['shares', 'O', 36], ['your', 'O', 37], ['credit', 'O', 38], ['##worth', 'O', 38], ['##iness', 'O', 38], [',', 'O', 39], ['personal', 'O', 40], ['information', 'O', 41], [',', 'O', 42], ['or', 'O', 43], ['google', 'O', 44], ['pay', 'O', 45], ['account', 'O', 46], ['information'

[['"', 'O', 1], ['i', 'O', 2], ['represent', 'O', 3], ['brave', 'O', 4], [',', 'O', 5], ['a', 'O', 6], ['rapidly', 'O', 7], ['growing', 'O', 8], ['internet', 'O', 9], ['browser', 'O', 10], ['based', 'O', 11], ['in', 'O', 12], ['san', 'O', 13], ['francisco', 'O', 14], ['.', 'O', 15], ['brave', 'O', 16], ['’', 'O', 17], ['s', 'O', 17], ['ceo', 'O', 18], [',', 'O', 19], ['brendan', 'O', 20], ['e', 'O', 21], ['##ich', 'O', 21], [',', 'O', 22], ['is', 'O', 23], ['the', 'O', 24], ['inventor', 'O', 25], ['of', 'O', 26], ['java', 'O', 27], ['##script', 'O', 27], [',', 'O', 28], ['and', 'O', 29], ['co', 'O', 30], ['-', 'O', 31], ['founded', 'O', 32], ['mo', 'O', 33], ['##zi', 'O', 33], ['##lla', 'O', 33], ['/', 'O', 34], ['fire', 'O', 35], ['##fo', 'O', 35], ['##x', 'O', 35], ['.', 'O', 36]]
[['"', 'O', 0], ['i', 'O', 1], ['represent', 'O', 2], ['brave', 'O', 3], [',', 'O', 4], ['a', 'O', 5], ['rapidly', 'O', 6], ['growing', 'O', 7], ['internet', 'O', 8], ['browser', 'O', 9], ['based', 'O', 10]

[['radio', 'B-capability', 1], ['bal', 'I-capability', 2], ['##ou', 'I-capability', 2], ['##ch', 'I-capability', 2], ['(', 'O', 3], ['or', 'O', 4], ['rb', 'B-capability', 5], ['music', 'I-capability', 6], [')', 'O', 7], ['included', 'O', 8], ['functionality', 'O', 9], ['from', 'O', 10], ['ah', 'B-capability', 11], ['##my', 'I-capability', 11], ['##th', 'I-capability', 11], ['android', 'I-capability', 12], ['rat', 'I-capability', 13], [',', 'O', 14], ['an', 'B-capability', 15], ['open', 'I-capability', 16], ['-', 'I-capability', 17], ['source', 'I-capability', 18], ['project', 'I-capability', 19], ['software', 'I-capability', 20], ['that', 'O', 21], ['became', 'O', 22], ['public', 'O', 23], ['in', 'O', 24], ['late', 'O', 25], ['2017', 'O', 26], ['.', 'O', 27], ['since', 'O', 28], ['its', 'O', 29], ['open', 'O', 30], ['release', 'O', 31], [',', 'O', 32], ['several', 'O', 33], ['malicious', 'O', 34], ['mobile', 'O', 35], ['apps', 'O', 36], ['borrowed', 'O', 37], ['its', 'O', 38], ['code',

[['out', 'O', 1], ['of', 'O', 2], ['the', 'O', 3], ['three', 'O', 4], ['flaws', 'O', 5], ['impact', 'O', 6], ['##ing', 'O', 6], ['the', 'O', 7], ['cisco', 'O', 8], ['small', 'O', 9], ['business', 'O', 10], ['220', 'O', 11], ['series', 'O', 12], ['smart', 'O', 13], ['switches', 'O', 14], [',', 'O', 15], ['two', 'O', 16], ['are', 'O', 17], ['critical', 'B-capability', 18], ['remote', 'I-capability', 19], ['code', 'I-capability', 20], ['execution', 'I-capability', 21], ['(', 'O', 22], ['cv', 'O', 23], ['##e', 'O', 23], ['-', 'O', 23], ['2019', 'O', 23], ['-', 'O', 24], ['1913', 'O', 25], [')', 'O', 26], ['and', 'O', 27], ['authentication', 'B-capability', 28], ['bypass', 'I-capability', 29], ['security', 'I-capability', 30], ['(', 'O', 31], ['cv', 'O', 32], ['##e', 'O', 32], ['-', 'O', 32], ['2019', 'O', 32], ['-', 'O', 33], ['1912', 'O', 34], [')', 'O', 35], ['issues', 'O', 36], ['.', 'O', 37], ['an', 'B-adversary', 39], ['una', 'I-adversary', 40], ['##uth', 'I-adversary', 40], ['##ent',

[['as', 'O', 0], ['a', 'O', 1], ['result', 'O', 2], [',', 'O', 3], ['matt', 'O', 5], ['decided', 'O', 6], ['to', 'O', 7], ['show', 'O', 8], ['how', 'O', 9], ['the', 'O', 10], ['vulnerability', 'O', 11], ['could', 'O', 12], ['be', 'O', 13], ['exploited', 'O', 14], ['and', 'O', 15], ['published', 'O', 16], ['a', 'O', 17], ['po', 'O', 18], ['##c', 'O', 18], ['where', 'O', 20], ['he', 'O', 21], ['gets', 'O', 22], ['a', 'O', 23], ['command', 'O', 24], ['prompt', 'O', 25], ['window', 'O', 26], ['running', 'O', 27], ['with', 'O', 28], ['system', 'O', 30], ['account', 'O', 31], ['privileges', 'O', 32], [',', 'O', 33], ['the', 'O', 34], ['highest', 'O', 35], ['for', 'O', 36], ['a', 'O', 37], ['user', 'O', 38], ['on', 'O', 39], ['windows', 'O', 40], ['.', 'O', 41], ['fast', 'O', 43], ['forward', 'O', 44], ['a', 'O', 45], ['few', 'O', 46], ['days', 'O', 47], ['and', 'O', 48], ['valve', 'B-victim', 49], ['published', 'O', 50], ['a', 'O', 51], ['patch', 'O', 52], ['for', 'O', 54], ['the', 'O', 55],

[['this', 'O', 0], ['comes', 'O', 1], ['after', 'O', 2], ['being', 'O', 3], ['in', 'O', 4], ['##ert', 'O', 4], ['since', 'O', 5], ['the', 'O', 6], ['beginning', 'O', 7], ['of', 'O', 8], ['june', 'O', 9], ['.', 'O', 10], ['although', 'O', 12], ['it', 'O', 13], ['started', 'O', 14], ['as', 'O', 15], ['a', 'O', 16], ['banking', 'B-capability', 17], ['trojan', 'I-capability', 18], ['in', 'O', 19], ['2014', 'O', 20], [',', 'O', 21], ['em', 'B-capability', 22], ['##ote', 'I-capability', 22], ['##t', 'I-capability', 22], ['changed', 'O', 23], ['its', 'O', 24], ['course', 'O', 25], ['to', 'O', 26], ['becoming', 'O', 27], ['a', 'O', 28], ['bot', 'O', 29], ['##net', 'O', 29], ['that', 'O', 30], ['delivers', 'O', 31], ['various', 'O', 32], ['mal', 'B-capability', 33], ['##ware', 'I-capability', 33], ['strains', 'O', 34], ['.', 'O', 35]]
[['although', 'O', 1], ['it', 'O', 2], ['started', 'O', 3], ['as', 'O', 4], ['a', 'O', 5], ['banking', 'B-capability', 6], ['trojan', 'I-capability', 7], ['in', '

[['"', 'O', 1], ['apart', 'O', 2], ['from', 'O', 3], ['a', 'O', 4], ['few', 'O', 5], ['pun', 'O', 6], ['##ct', 'O', 6], ['##uation', 'O', 6], ['errors', 'O', 7], ['and', 'O', 8], ['the', 'O', 9], ['missing', 'O', 10], ['space', 'O', 11], ['before', 'O', 12], ['the', 'O', 13], ['word', 'O', 14], ['‘', 'O', 15], ['please', 'O', 16], ['’', 'O', 17], [',', 'O', 18], ['this', 'O', 19], ['message', 'O', 20], ['is', 'O', 21], ['clean', 'O', 22], [',', 'O', 23], ['clear', 'O', 24], ['and', 'O', 25], ['low', 'O', 26], ['-', 'O', 27], ['key', 'O', 28], ['enough', 'O', 29], ['not', 'O', 30], ['to', 'O', 31], ['raise', 'O', 32], ['instant', 'O', 33], ['alarm', 'O', 34], ['bells', 'O', 35], [',', 'O', 36], ['"', 'O', 37], ['details', 'O', 38], ['so', 'O', 39], ['##ph', 'O', 39], ['##os', 'O', 39], ["'", 'O', 40], ['paul', 'O', 42], ['duck', 'O', 43], ['##lin', 'O', 43], ['who', 'O', 45], ['analyzed', 'O', 46], ['the', 'O', 47], ['campaign', 'O', 48], ['.', 'O', 49]]
[['"', 'O', 0], ['apart', 'O', 1

[['besides', 'O', 1], ['the', 'O', 2], ['blue', 'O', 3], ['##tooth', 'O', 3], ['speakers', 'O', 4], ['no', 'O', 5], ['longer', 'O', 6], ['working', 'O', 7], ['properly', 'O', 8], [',', 'O', 9], ['users', 'O', 10], ['will', 'O', 11], ['also', 'O', 12], ['notice', 'O', 13], ['a', 'O', 14], ['microsoft', 'O', 16], ['blue', 'O', 17], ['##tooth', 'O', 17], ['a2', 'O', 18], ['##dp', 'O', 18], ['source', 'O', 19], ['entry', 'O', 20], ['under', 'O', 21], ['the', 'O', 22], ['sound', 'O', 24], [',', 'O', 25], ['video', 'O', 26], ['and', 'O', 27], ['game', 'O', 28], ['controllers', 'O', 29], ['node', 'O', 30], ['displaying', 'O', 31], ['a', 'O', 32], ['yellow', 'O', 33], ['ex', 'O', 34], ['##cl', 'O', 34], ['##ama', 'O', 34], ['##tion', 'O', 34], ['mark', 'O', 35], ['icon', 'O', 36], ['in', 'O', 37], ['device', 'O', 38], ['manager', 'O', 39], ['.', 'O', 40]]
[['besides', 'O', 0], ['the', 'O', 1], ['blue', 'O', 2], ['##tooth', 'O', 2], ['speakers', 'O', 3], ['no', 'O', 4], ['longer', 'O', 5], ['wo

In [9]:
all_examples[:3]

[{'doc_id': 'fireeye-1',
  'seq': [['chinese', 'B-adversary', 0],
   ['cyber', 'I-adversary', 1],
   ['espionage', 'I-adversary', 2],
   ['operators', 'I-adversary', 3],
   ['modified', 'O', 4],
   ['the', 'O', 5],
   ['software', 'O', 6],
   ['packages', 'O', 7],
   ['of', 'O', 8],
   ['a', 'O', 9],
   ['legitimate', 'B-victim', 10],
   ['vendor', 'I-victim', 11],
   [',', 'I-victim', 12],
   ['nets', 'B-victim', 13],
   ['##aran', 'I-victim', 13],
   ['##g', 'I-victim', 13],
   ['computer', 'I-victim', 14],
   [',', 'O', 15],
   ['allowing', 'O', 16],
   ['access', 'O', 17],
   ['to', 'O', 18],
   ['a', 'O', 19],
   ['broad', 'O', 20],
   ['range', 'O', 21],
   ['of', 'O', 22],
   ['industries', 'O', 23],
   ['and', 'O', 24],
   ['institutions', 'O', 25],
   ['that', 'O', 26],
   ['include', 'O', 27],
   ['financial', 'B-victim', 28],
   ['services', 'I-victim', 29],
   [',', 'O', 30],
   ['transportation', 'O', 31],
   [',', 'O', 32],
   ['telecommunications', 'O', 33],
   [',', 'O'

### Test function

In [1]:
import os
os.chdir('..')

import json
from collections import OrderedDict

path = 'data/input/cybersecurity-reports/train_full.json'
with open(path, "r") as doc_keys_file:
    input_roles = json.load(doc_keys_file, object_pairs_hook=OrderedDict)
    
input_roles['fireeye-1']#['doc']

from src.preprocessing import create_sent_tagging
all_examples_sample_neg, all_examples = create_sent_tagging(input_roles)
all_examples_sample_neg[:3]

Average paragraph sent # : 3.515695067264574


[{'doc_id': 'fireeye-1',
  'seq': [['chinese', 'B-adversary', 0],
   ['cyber', 'I-adversary', 1],
   ['espionage', 'I-adversary', 2],
   ['operators', 'I-adversary', 3],
   ['modified', 'O', 4],
   ['the', 'O', 5],
   ['software', 'O', 6],
   ['packages', 'O', 7],
   ['of', 'O', 8],
   ['a', 'O', 9],
   ['legitimate', 'B-victim', 10],
   ['vendor', 'I-victim', 11],
   [',', 'I-victim', 12],
   ['nets', 'B-victim', 13],
   ['##aran', 'I-victim', 13],
   ['##g', 'I-victim', 13],
   ['computer', 'I-victim', 14],
   [',', 'O', 15],
   ['allowing', 'O', 16],
   ['access', 'O', 17],
   ['to', 'O', 18],
   ['a', 'O', 19],
   ['broad', 'O', 20],
   ['range', 'O', 21],
   ['of', 'O', 22],
   ['industries', 'O', 23],
   ['and', 'O', 24],
   ['institutions', 'O', 25],
   ['that', 'O', 26],
   ['include', 'O', 27],
   ['financial', 'B-victim', 28],
   ['services', 'I-victim', 29],
   [',', 'O', 30],
   ['transportation', 'O', 31],
   [',', 'O', 32],
   ['telecommunications', 'O', 33],
   [',', 'O'

### Preprocess all files

In [2]:
path = "data/input/cybersecurity-reports/"
filenames = ['dev_full.json', 'test.json', 'train_full.json']

In [3]:
for filename in filenames:
    with open(path + filename, "r") as doc_keys_file:
        input_roles = json.load(doc_keys_file, object_pairs_hook=OrderedDict)

    from src.preprocessing import create_sent_tagging
    all_examples_sample_neg, all_examples = create_sent_tagging(input_roles)
    
    # writing to jsonlines file
    with open('data/processed/' + filename+'l', 'a', encoding='utf-8') as f:
        for line in all_examples_sample_neg:
            f.write(json.dumps(line, ensure_ascii=False) + '\n')

Average paragraph sent # : 3.6363636363636362
Average paragraph sent # : 3.090909090909091
Average paragraph sent # : 3.515695067264574
