In [1]:
import pandas as pd
import yaml
import requests

## Define assumptions

In [2]:
my_platforms = ['Windows', 'Linux','Network'] # or None
my_techniques = ['T1204', 'T1053', 'T1543', 'T1134' ,'T1112','T1110','T1003', 'T1135', 'T1046'] # or None

## Download and load mitre attack datasources mapping

In [3]:
url = "https://raw.githubusercontent.com/mitre-attack/attack-datasources/main/docs/techniques_to_components_mapping.yaml"
response = requests.get(url)
mapping = yaml.safe_load(response.content)
df = pd.DataFrame(mapping)

In [4]:
df.head(3)

Unnamed: 0,technique_id,x_mitre_is_subtechnique,technique,tactic,platform,data_source,data_component
0,T1553.006,True,Code Signing Policy Modification,[defense-evasion],"[Windows, macOS]",windows registry,windows registry key modification
1,T1553.006,True,Code Signing Policy Modification,[defense-evasion],"[Windows, macOS]",command,command execution
2,T1553.006,True,Code Signing Policy Modification,[defense-evasion],"[Windows, macOS]",process,process creation


## So some pandas transformation

In [5]:
df = df.explode('platform')

In [6]:
df.groupby(['platform']).size().to_frame('size').sort_values(by=['size'], ascending=False)

Unnamed: 0_level_0,size
platform,Unnamed: 1_level_1
Windows,1296
macOS,917
Linux,879
IaaS,213
Office 365,176
Containers,132
Google Workspace,121
SaaS,112
Azure AD,100
PRE,89


In [7]:
if my_platforms:
    df = df.where(df["platform"].isin(my_platforms)).dropna()

In [8]:
if my_techniques:
    df = df.where(df['technique_id'].isin(my_techniques)).dropna()

In [9]:
df.head(5)

Unnamed: 0,technique_id,x_mitre_is_subtechnique,technique,tactic,platform,data_source,data_component
1094,T1543,False,Create or Modify System Process,"[persistence, privilege-escalation]",Windows,service,service creation
1094,T1543,False,Create or Modify System Process,"[persistence, privilege-escalation]",Linux,service,service creation
1095,T1543,False,Create or Modify System Process,"[persistence, privilege-escalation]",Windows,service,service modification
1095,T1543,False,Create or Modify System Process,"[persistence, privilege-escalation]",Linux,service,service modification
1096,T1543,False,Create or Modify System Process,"[persistence, privilege-escalation]",Windows,process,process creation


In [10]:
df_pivoted = pd.pivot_table(df, values="technique_id",index="technique", columns ="data_component", aggfunc='count', fill_value=0)

## Results 🤓

In [11]:
df_pivoted.style.applymap(lambda x: "background-color: red" if x>0 else "background-color: white")

data_component,active directory object access,active directory object modification,application log content,cloud service enumeration,command execution,container creation,container start,file access,file creation,file modification,image creation,instance creation,instance start,network connection creation,network traffic content,network traffic flow,os api execution,process access,process creation,process metadata,scheduled job creation,service creation,service modification,user account authentication,user account metadata,windows registry key access,windows registry key creation,windows registry key deletion,windows registry key modification
technique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
Access Token Manipulation,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0
Brute Force,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0
Create or Modify System Process,0,0,0,0,2,0,0,0,2,2,0,0,0,0,0,0,2,0,2,0,0,2,2,0,0,0,2,0,2
Modify Registry,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,1
Network Service Scanning,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
Network Share Discovery,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0
OS Credential Dumping,2,0,0,0,2,0,0,2,0,0,0,0,0,0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0
Scheduled Task/Job,0,0,0,0,2,2,0,0,2,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0
User Execution,0,0,2,0,2,2,2,0,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0


In [12]:
df_pivoted.sum(axis=0).to_frame('size').sort_values(by=['size'],ascending=False).head(10)

Unnamed: 0_level_0,size
data_component,Unnamed: 1_level_1
command execution,14
process creation,12
os api execution,8
file creation,6
network traffic content,4
network traffic flow,4
application log content,4
container creation,4
file modification,4
windows registry key creation,3
