# CAPEC Data Analysis
__Ivan Ulloa - 1/29/2021__

- This nootebok imports an xml file containing CAPEC information.<br>
- The 4 fields used are ID, Description, Relations to other attacks and Relations to CWE.<br>
- The resulting descriptions are stored in CAPEC.txt to further process using Autophrase.

In [1]:
import xml.etree.ElementTree as ET
import xmltodict
import json
import os
import pandas as pd
import spacy

# Load English tokenizer, tagger, parser, NER and word vectors
nlp = spacy.load("en_core_web_lg")
import pprint
pp = pprint.PrettyPrinter(indent=4)

##  Load CAPEC dataset and save to JSON

In [2]:
tree = ET.parse('data/1000.xml')
xml_data = tree.getroot()
# Change the encoding type to be able to set it to the one you need
xmlstr = ET.tostring(xml_data, encoding='utf-8', method='xml')

In [3]:
capec = xmltodict.parse(xmlstr)
with open('data/capec_data.json', 'w') as f:
    f.write(json.dumps(capec))

In [4]:
data_dict = dict(xmltodict.parse(xmlstr))

In [5]:
## Extract ID, Descriptions, CAPEC relationships, and CWE Relation

In [6]:
ID = []
Desc = []
Rel_CAPEC = []
Rel_CWE = []
for i in range(len(capec['ns0:Attack_Pattern_Catalog']['ns0:Attack_Patterns']['ns0:Attack_Pattern'])):
    ID.append(capec['ns0:Attack_Pattern_Catalog']['ns0:Attack_Patterns']['ns0:Attack_Pattern'][i]['@ID'])
    Desc.append(capec['ns0:Attack_Pattern_Catalog']['ns0:Attack_Patterns']['ns0:Attack_Pattern'][i]['ns0:Description'])
    try:
        Rel_CAPEC.append(capec['ns0:Attack_Pattern_Catalog']['ns0:Attack_Patterns']['ns0:Attack_Pattern'][i]['ns0:Related_Attack_Patterns']['ns0:Related_Attack_Pattern'])
    except:
        Rel_CAPEC.append('None')
    try:
        Rel_CWE.append(capec['ns0:Attack_Pattern_Catalog']['ns0:Attack_Patterns']['ns0:Attack_Pattern'][i]['ns0:Related_Weaknesses']['ns0:Related_Weakness'])
    except:
        Rel_CWE.append('None')
dict = {'ID': ID, 'Description': Desc, 'rel_CAPEC':Rel_CAPEC, 'rel_CWE':Rel_CWE}

In [7]:
CAPEC_df = pd.DataFrame(dict)
CAPEC_df

Unnamed: 0,ID,Description,rel_CAPEC,rel_CWE
0,1,"In applications, particularly web applications...","[{'@Nature': 'ChildOf', '@CAPEC_ID': '122'}, {...","[{'@CWE_ID': '276'}, {'@CWE_ID': '285'}, {'@CW..."
1,10,This attack pattern involves causing a buffer ...,"{'@Nature': 'ChildOf', '@CAPEC_ID': '100'}","[{'@CWE_ID': '120'}, {'@CWE_ID': '302'}, {'@CW..."
2,100,Buffer Overflow attacks target improper or mis...,"{'@Nature': 'ChildOf', '@CAPEC_ID': '123'}","[{'@CWE_ID': '120'}, {'@CWE_ID': '119'}, {'@CW..."
3,101,An attacker can use Server Side Include (SSI) ...,"{'@Nature': 'ChildOf', '@CAPEC_ID': '253'}","[{'@CWE_ID': '97'}, {'@CWE_ID': '74'}, {'@CWE_..."
4,102,Session sidejacking takes advantage of an unen...,"{'@Nature': 'ChildOf', '@CAPEC_ID': '593'}","[{'@CWE_ID': '294'}, {'@CWE_ID': '522'}, {'@CW..."
...,...,...,...,...
522,94,This type of attack targets the communication ...,,"[{'@CWE_ID': '300'}, {'@CWE_ID': '290'}, {'@CW..."
523,95,This attack targets the WSDL interface made av...,"{'@Nature': 'ChildOf', '@CAPEC_ID': '54'}",{'@CWE_ID': '538'}
524,96,An application typically makes calls to functi...,"{'@Nature': 'ChildOf', '@CAPEC_ID': '603', 'ns...","[{'@CWE_ID': '589'}, {'@CWE_ID': '227'}]"
525,97,Cryptanalysis is a process of finding weakness...,"[{'@Nature': 'ChildOf', '@CAPEC_ID': '192'}, {...","[{'@CWE_ID': '327'}, {'@CWE_ID': '1240'}, {'@C..."


In [8]:
# Create corpus from CAPEC descriptions
corpus = ''
counter = 0
LIMIT = 527

for desc in Desc:
    try:
        corpus += desc +'\n'
    except:
        if desc:
            corpus += desc['html:p'][0] +'\n'
    counter += 1
    if counter == LIMIT:
        break

In [9]:
doc = nlp(corpus)

In [10]:
dash = '-' * 80
print(dash)
print("{:<50}{:<10}{:<10}".format("TEXT", "POS", "DEP"))
print(dash)
for token in doc:
    # Get the token text, part-of-speech tag and dependency label
    token_text = token.text
    token_pos = token.pos_
    token_dep = token.dep_
    # This is for formatting only
    print(f"{token_text:<50}{token_pos:<10}{token_dep:<10}")

--------------------------------------------------------------------------------
TEXT                                              POS       DEP       
--------------------------------------------------------------------------------
In                                                ADP       prep      
applications                                      NOUN      pobj      
,                                                 PUNCT     punct     
particularly                                      ADV       advmod    
web                                               NOUN      compound  
applications                                      NOUN      appos     
,                                                 PUNCT     punct     
access                                            NOUN      nsubjpass 
to                                                ADP       prep      
functionality                                     NOUN      pobj      
is                                                AUX    

bypass                                            VERB      ccomp     
access                                            NOUN      compound  
control                                           NOUN      dobj      
and/or                                            CCONJ     nsubj     
execute                                           VERB      ROOT      
functionality                                     NOUN      dobj      
not                                               PART      neg       
intended                                          VERB      acl       
by                                                ADP       agent     
the                                               DET       det       
interface                                         NOUN      compound  
implementation                                    NOUN      pobj      
,                                                 PUNCT     punct     
possibly                                          ADV       advmod    
compro

processing                                        NOUN      compound  
cycles                                            NOUN      conj      
,                                                 PUNCT     punct     
or                                                CCONJ     cc        
other                                             ADJ       amod      
resources                                         NOUN      conj      
.                                                 PUNCT     punct     
This                                              DET       det       
attack                                            NOUN      nsubj     
does                                              AUX       aux       
not                                               PART      neg       
attempt                                           VERB      ROOT      
to                                                PART      aux       
force                                             VERB      xcomp     
this  

the                                               DET       det       
value                                             NOUN      pobj      
of                                                ADP       prep      
the                                               DET       det       
message                                           NOUN      pobj      
they                                              PRON      nsubj     
are                                               AUX       aux       
protecting                                        VERB      relcl     
.                                                 PUNCT     punct     
Hash                                              NOUN      compound  
codes                                             NOUN      nsubj     
are                                               AUX       ROOT      
a                                                 DET       det       
common                                            ADJ       amod      
checks

with                                              ADP       prep      
a                                                 DET       det       
Phishing                                          NOUN      nmod      
(                                                 PUNCT     punct     
CAPEC-98                                          NOUN      nmod      
)                                                 PUNCT     punct     
attack                                            NOUN      pobj      
tailored                                          VERB      acl       
to                                                ADP       prep      
a                                                 DET       det       
category                                          NOUN      pobj      
of                                                ADP       prep      
users                                             NOUN      pobj      
in                                                ADP       prep      
order 

(                                                 PUNCT     punct     
if                                                SCONJ     mark      
an                                                DET       det       
application                                       NOUN      compound  
locks                                             VERB      parataxis 
because                                           SCONJ     mark      
it                                                PRON      nsubj     
unexpectedly                                      ADV       advmod    
failed                                            VERB      advcl     
to                                                PART      aux       
be                                                AUX       auxpass   
granted                                           VERB      xcomp     
access                                            NOUN      dobj      
)                                                 PUNCT     punct     
or    

.                                                 PUNCT     punct     

                                                 SPACE               
A                                                 DET       det       
Principal                                         ADJ       compound  
Spoof                                             NOUN      nsubj     
is                                                AUX       ROOT      
a                                                 DET       det       
form                                              NOUN      attr      
of                                                ADP       prep      
Identity                                          NOUN      compound  
Spoofing                                          NOUN      pobj      
where                                             ADV       advmod    
an                                                DET       det       
adversary                                         NOUN      nsubj     
preten

that                                              DET       mark      
scripting                                         NOUN      nsubjpass 
is                                                AUX       auxpass   
not                                               PART      neg       
expected                                          VERB      ccomp     
.                                                 PUNCT     punct     
The                                               DET       det       
adversary                                         NOUN      nsubj     
tricks                                            VERB      ROOT      
the                                               DET       det       
victim                                            NOUN      dobj      
into                                              ADP       prep      
accessing                                         VERB      pcomp     
a                                                 DET       det       
URL   

becoming                                          VERB      pcomp     
unstable                                          ADJ       acomp     
,                                                 PUNCT     punct     
freezing                                          VERB      conj      
,                                                 PUNCT     punct     
or                                                CCONJ     cc        
crashing                                          VERB      conj      
.                                                 PUNCT     punct     
However                                           ADV       advmod    
it                                                PRON      nsubj     
may                                               VERB      aux       
be                                                AUX       ROOT      
possible                                          ADJ       acomp     
to                                                PART      aux       
cause 

,                                                 PUNCT     punct     
in                                                ADP       prep      
an                                                DET       det       
attempt                                           NOUN      pobj      
to                                                PART      aux       
mislead                                           VERB      acl       
an                                                DET       det       
audit                                             NOUN      dobj      
of                                                ADP       prep      
the                                               DET       det       
log                                               NOUN      compound  
file                                              NOUN      pobj      
or                                                CCONJ     cc        
cover                                             VERB      conj      
tracks

be                                                AUX       ROOT      
virtually                                         ADV       advmod    
any                                               DET       det       
crafted                                           VERB      amod      
packet                                            NOUN      attr      
whatsoever                                        ADV       advmod    
,                                                 PUNCT     punct     
provided                                          VERB      prep      
the                                               DET       det       
adversary                                         NOUN      pobj      
can                                               VERB      aux       
identify                                          VERB      conj      
a                                                 DET       det       
functional                                        ADJ       amod      
host  

of                                                ADP       prep      
sync                                              NOUN      pobj      
or                                                CCONJ     cc        
disallowed                                        VERB      conj      
by                                                ADP       agent     
the                                               DET       det       
TCB                                               PROPN     pobj      
)                                                 PUNCT     punct     
and                                               CCONJ     cc        
detect                                            VERB      conj      
closed                                            ADJ       amod      
ports                                             NOUN      dobj      
via                                               ADP       prep      
RST                                               NOUN      compound  
packet

the                                               DET       det       
values                                            NOUN      compound  
parameters                                        NOUN      dobj      
without                                           ADP       prep      
properly                                          ADV       advmod    
validation                                        NOUN      pobj      
them                                              PRON      dobj      
first                                             ADV       advmod    
and                                               CCONJ     cc        
generates                                         VERB      conj      
the                                               DET       det       
HTML                                              NOUN      compound  
code                                              NOUN      dobj      
that                                              DET       nsubjpass 
will  

"                                                 PUNCT     punct     
virtual                                           ADJ       amod      
sale                                              NOUN      dobj      
"                                                 PUNCT     punct     
of                                                ADP       prep      
rare                                              ADJ       amod      
items                                             NOUN      pobj      
.                                                 PUNCT     punct     
As                                                SCONJ     mark      
other                                             ADJ       amod      
users                                             NOUN      nsubj     
enter                                             VERB      advcl     
the                                               DET       det       
event                                             NOUN      dobj      
,     

single                                            ADJ       amod      
card                                              NOUN      nsubj     
may                                               VERB      aux       
allow                                             VERB      ROOT      
access                                            NOUN      dobj      
to                                                ADP       prep      
a                                                 DET       det       
corporate                                         ADJ       amod      
office                                            NOUN      compound  
complex                                           NOUN      pobj      
shared                                            VERB      acl       
by                                                ADP       agent     
multiple                                          ADJ       amod      
companies                                         NOUN      pobj      
.     

of                                                ADP       prep      
the                                               DET       det       
relationship                                      NOUN      pobj      
between                                           ADP       prep      
the                                               DET       det       
adversary                                         NOUN      pobj      
and                                               CCONJ     cc        
themselves                                        PRON      conj      
.                                                 PUNCT     punct     
This                                              DET       det       
goal                                              NOUN      nsubj     
is                                                AUX       ROOT      
to                                                PART      aux       
persuade                                          VERB      xcomp     
the   

potentially                                       ADV       advmod    
replace                                           VERB      relcl     
,                                                 PUNCT     punct     
modify                                            VERB      conj      
or                                                CCONJ     cc        
insert                                            VERB      conj      
code                                              NOUN      compound  
files                                             NOUN      dobj      
containing                                        VERB      acl       
malicious                                         ADJ       amod      
logic                                             NOUN      dobj      
.                                                 PUNCT     punct     
If                                                SCONJ     mark      
an                                                DET       det       
advers

that                                              DET       nsubj     
uses                                              VERB      relcl     
a                                                 DET       det       
data                                              NOUN      compound  
leak                                              NOUN      dobj      
from                                              ADP       prep      
an                                                DET       det       
improperly                                        ADV       advmod    
implemented                                       VERB      amod      
decryption                                        NOUN      compound  
routine                                           NOUN      pobj      
to                                                PART      aux       
completely                                        ADV       advmod    
subvert                                           VERB      xcomp     
the   

or                                                CCONJ     cc        
datablock                                         NOUN      conj      
that                                              DET       nsubj     
causes                                            VERB      relcl     
the                                               DET       det       
recipient                                         NOUN      nsubj     
to                                                PART      aux       
believe                                           VERB      ccomp     
that                                              SCONJ     mark      
the                                               DET       det       
message                                           NOUN      nsubjpass 
or                                                CCONJ     cc        
datablock                                         NOUN      conj      
was                                               AUX       auxpass   
genera

,                                                 PUNCT     punct     
system                                            NOUN      conj      
or                                                CCONJ     cc        
network                                           NOUN      conj      
.                                                 PUNCT     punct     
Using                                             VERB      csubj     
this                                              DET       det       
knowledge                                         NOUN      dobj      
may                                               VERB      aux       
often                                             ADV       advmod    
pave                                              VERB      ROOT      
the                                               DET       det       
way                                               NOUN      dobj      
for                                               ADP       prep      
more  

victim                                            NOUN      compound  
manufacturer                                      NOUN      pobj      
with                                              ADP       prep      
the                                               DET       det       
intent                                            NOUN      pobj      
of                                                ADP       prep      
implanting                                        VERB      pcomp     
malware                                           NOUN      dobj      
allowing                                          NOUN      dobj      
for                                               ADP       prep      
attack                                            NOUN      compound  
control                                           NOUN      pobj      
of                                                ADP       prep      
the                                               DET       det       
victim

,                                                 PUNCT     punct     
X0                                                NOUN      appos     
=                                                 SYM       punct     
P.                                                PROPN     ROOT      
Then                                              ADV       advmod    
the                                               DET       det       
hash                                              NOUN      compound  
chain                                             NOUN      nsubjpass 
of                                                ADP       prep      
length                                            NOUN      pobj      
n                                                 NOUN      advmod    
for                                               ADP       prep      
the                                               DET       det       
original                                          ADJ       amod      
passwo

attacker                                          NOUN      nsubj     
identifies                                        VERB      ROOT      
a                                                 DET       det       
HTTP                                              PROPN     npadvmod  
Get                                               AUX       compound  
URL                                               NOUN      dobj      
such                                              ADJ       amod      
as                                                SCONJ     prep      
http://victimsite                                 PROPN     pobj      
/                                                 SYM       punct     
updateOrder                                       NOUN      appos     
,                                                 PUNCT     punct     
which                                             DET       nsubj     
calls                                             VERB      relcl     
out   

to                                                PART      aux       
use                                               VERB      xcomp     
no                                                DET       det       
encryption                                        NOUN      dobj      
(                                                 PUNCT     punct     
A5/0                                              PROPN     compound  
mode                                              NOUN      appos     
)                                                 PUNCT     punct     
or                                                CCONJ     cc        
to                                                PART      aux       
use                                               VERB      conj      
easily                                            ADV       advmod    
breakable                                         ADJ       amod      
encryption                                        NOUN      dobj      
(     

that                                              DET       nsubj     
impersonates                                      VERB      relcl     
a                                                 DET       det       
different                                         ADJ       amod      
entity                                            NOUN      dobj      
,                                                 PUNCT     punct     
and                                               CCONJ     cc        
then                                              ADV       advmod    
associates                                        VERB      conj      
a                                                 DET       det       
process                                           NOUN      nmod      
/                                                 SYM       punct     
thread                                            NOUN      dobj      
to                                                ADP       prep      
that  

via                                               ADP       prep      
exposed                                           VERB      amod      
configuration                                     NOUN      nmod      
and                                               CCONJ     cc        
properties                                        NOUN      conj      
files                                             NOUN      pobj      
that                                              DET       nsubj     
contain                                           VERB      relcl     
system                                            NOUN      compound  
passwords                                         NOUN      dobj      
,                                                 PUNCT     punct     
database                                          NOUN      compound  
connection                                        NOUN      compound  
strings                                           NOUN      conj      
,     

same                                              ADJ       amod      
character                                         NOUN      dobj      
which                                             DET       nsubj     
leads                                             VERB      relcl     
to                                                PART      aux       
filter                                            VERB      xcomp     
problems                                          NOUN      dobj      
and                                               CCONJ     cc        
opens                                             VERB      conj      
avenues                                           NOUN      dobj      
to                                                PART      aux       
attack                                            VERB      relcl     
.                                                 PUNCT     punct     

                                                 SPACE               
This  

In [11]:
dash = '-' * 80
print(dash)
print("{:<50}{:<10}".format("TEXT", "ENTITY"))
print(dash)    
for ent in doc.ents:
    # Print the entity text and its label
    print(f"{ent.text:<50}{ent.label_:<10}")

--------------------------------------------------------------------------------
TEXT                                              ENTITY    
--------------------------------------------------------------------------------
ACL                                               ORG       
Injection                                         PRODUCT   
Cross Site Scripting                              PRODUCT   
SSI                                               ORG       
one                                               CARDINAL  
UI                                                ORG       
UI                                                ORG       
clickjacked                                       PERSON    
UI                                                ORG       
UI                                                ORG       
ActiveX                                           ORG       
one                                               CARDINAL  
Restful Privilege Escalation                 

Cross-Site Scripting                              ORG       
XSS                                               ORG       
first                                             ORDINAL   
Flash                                             ORG       
Flash                                             PERSON    
Flash                                             PERSON    
Flash                                             PERSON    
Micro                                             ORG       
Micro                                             ORG       
SQL                                               ORG       
Cross-Site Scripting                              ORG       
XSS                                               ORG       
XSS                                               PRODUCT   
CAPEC                                             ORG       
1                                                 CARDINAL  
Flash                                             ORG       
Flash                   

In [12]:
# Analyze syntax
print("Noun phrases:")
pp.pprint(set([chunk.text for chunk in doc.noun_chunks]))

Noun phrases:
{   '"Log Injection-Tampering-Forging',
    '"No such domain',
    '"Restful Privilege Escalation',
    '"ScRiPt',
    '"Script',
    '"deliveries',
    '"dsquery',
    '"fishing',
    '"flash" based malware or malicious logic',
    '"forgot password',
    '"javascript',
    '"myInput&new_param=myValue',
    '"net localgroup',
    '"net user',
    '"onload() / onerror()" javascript events',
    '"password',
    '"require" call',
    '"rides',
    '"script',
    '"tasklist/svc',
    '%',
    '%s',
    "' users",
    "'Ping' scanning",
    "'evil twin' attacks",
    "'guard' sensitive functionality",
    "'half-open' scanning",
    "'magstripe",
    "'spoofing' techniques",
    '(%HEX-CODE',
    '() function',
    '(8-bit UCS/Unicode Transformation Format',
    '(AS',
    '(CDN',
    '(DLL) injection',
    '(DOM',
    '(FQDNs',
    '(ISN',
    '(Integrated Development Environment',
    '(LAC',
    '(LM',
    '(NTLM) authentication protocols',
    '(Radio Frequency Identific

    'a communication protocol implementation',
    'a communications protocol',
    'a complete path',
    'a complete semantic quality',
    'a complex format',
    'a comprehensive port scan',
    'a compromised system',
    'a compromised web site',
    'a compromised website',
    'a computer',
    'a computer system',
    'a computer technician',
    'a computer virus',
    'a concealed iframe',
    'a configuration management system',
    'a configuration/environment manipulation attack',
    'a connection',
    'a connection reset packet',
    'a consequence',
    'a consistent case',
    'a controlled manner',
    'a conversation',
    'a copper coil',
    'a copy',
    'a corporate office complex',
    'a cost perspective',
    'a counterfeit activity',
    'a counterfeit component',
    'a counterfeit site',
    'a couple',
    'a coworker',
    'a crafted URL',
    'a crafted link',
    'a crash',
    'a credential',
    'a credential prompt',
    'a credit card company',
  

    'an unlocked position',
    'an unsafe way',
    'an unscrupulous backend application',
    'an unstable state',
    'an untrusted zone',
    'an update or replacement procedure',
    'an update request',
    'an update server',
    'analysis',
    'analysis techniques',
    'and/or dots',
    'and/or information',
    'anomalous packet traits',
    'another HTTP Request',
    'another attack pattern',
    'another client',
    'another individual',
    'another language',
    'another process',
    'another vector',
    'another website',
    'anti-theft devices',
    'anticipation',
    'any ASCII data',
    'any IMAP or POP client',
    'any RFID chip-based device',
    'any SSL certificates',
    'any SYN packets',
    'any TCP header options',
    'any TCP segment',
    'any TCP stack',
    'any TCP/IP device',
    'any TCP/IP stack',
    'any ability',
    'any account',
    'any action',
    'any active TCP service',
    'any active connection',
    'any application',
    'a

    'location and network information',
    'locations',
    'lock',
    'lock picking',
    'lock picking instrument',
    'log access',
    'log files',
    'logging pages',
    'logging services',
    'logic',
    'logical functionality',
    'login credentials',
    'logins',
    'logon actions',
    'logon scripts',
    'logs and error messages',
    'longer messages',
    'loss',
    'lots',
    'low-latency',
    'lower or non-privileged accounts',
    'macro-like structures',
    'magnetic strip cards',
    'magnetic strip encoding methods',
    'magstripe cards',
    'magstripe technology',
    'mail servers',
    'mail-server commands',
    'malformed input',
    'malicious Javascript',
    'malicious XML payloads',
    'malicious actions',
    'malicious activities',
    'malicious actors',
    'malicious characters',
    'malicious classes',
    'malicious code',
    'malicious commands',
    'malicious content',
    'malicious content injection',
    'malicious data',
    

    'state sponsorship',
    'state-dependent factors',
    'stateful firewalls',
    'stateless firewall',
    'statements',
    'static or dynamic analysis techniques',
    'static text',
    'stolen / spoofed authentication credentials',
    'stolen Kerberos tickets',
    'stolen credentials',
    'storage',
    'store item pricing',
    'store price',
    'stored input data',
    'stored procedures',
    'story',
    'strange or typically unused ports',
    'strategic purposes',
    'string parameter',
    'string vulnerabilities',
    'strings',
    'strong authorization',
    'strong passwords',
    'structure',
    'structured data',
    'studies',
    'sub-component',
    'subcomponents',
    'subconscious programming',
    'subscribers',
    'subsequent attacks',
    'subsequent lookups',
    'subsequent processing',
    'subsequent processing steps',
    'subsequent requests',
    'substantially fewer packets',
    'substitute one item',
    'subtle ways',
    'successful aut

    'the protocol',
    'the protocol level',
    'the protocols',
    'the provided authentication token',
    'the provided data',
    'the provided resources',
    'the published options',
    'the published web site',
    'the purpose',
    'the purposes',
    'the quality',
    'the quality control',
    'the quantity',
    'the query',
    'the query string',
    'the quoted text',
    'the race',
    'the rainbow table',
    'the rainbow tables',
    'the range',
    'the rate',
    'the raw audio source',
    'the reader',
    'the reading',
    'the real content type',
    'the receipt',
    'the receiver',
    'the receiving end',
    'the receiving service',
    'the receiving web browser',
    'the recent attacks',
    'the recipient',
    'the recipient software',
    "the recipient's checksum calculation",
    "the recipient's email address",
    'the reference',
    'the referenced file',
    'the reflection libraries',
    'the registered domain',
    'the registry',
  

In [13]:
print("Verbs:")
pp.pprint(set([token.lemma_ for token in doc if token.pos_ == "VERB"]))

Verbs:
{   '-',
    'DEBUG=1',
    'IPC$.',
    'MAY',
    'RFC-950',
    'abstract',
    'abuse',
    'accelerate',
    'accept',
    'access',
    'accomplish',
    'accord',
    'account',
    'accumulate',
    'acheive',
    'achieve',
    'acquire',
    'act',
    'activate',
    'adapt',
    'add',
    'address',
    'adopt',
    'affect',
    'agree',
    'aid',
    'aim',
    'alert',
    'allocate',
    'allow',
    'allowlist',
    'alter',
    'alternate',
    'analyze',
    'anchor',
    'anticipate',
    'appear',
    'append',
    'apply',
    'approve',
    'arise',
    'arm',
    'array',
    'arrive',
    'articulate',
    'ask',
    'assemble',
    'assign',
    'assist',
    'associate',
    'assume',
    'attach',
    'attack',
    'attacker',
    'attempt',
    'attend',
    'attibute',
    'augment',
    'authenticate',
    'authorize',
    'automate',
    'avoid',
    'back',
    'backtrack',
    'base',
    'bear',
    'become',
    'begin',
    'behave',
    'b

In [18]:
text_file = open("CAPEC.txt", "w")
text_file.write(corpus)
text_file.close()