# Parse HTTPParams

This notebook parses dataset with requests from https://github.com/Morzeux/HttpParamsDataset and saves it as JSON.

In [1]:
import pandas as pd

In [2]:
train_df = pd.read_csv('payload_train.csv')

In [3]:
train_df

Unnamed: 0,payload,length,attack_type,label
0,c/ caridad s/n,14,norm,norm
1,"campello, el",12,norm,norm
2,1442431887503330,16,norm,norm
3,nue37,5,norm,norm
4,tufts3@joll.rs,14,norm,norm
...,...,...,...,...
20707,"<!--#exec cmd=""/bin/echo '<script src'""--><!--...",110,xss,anom
20708,"<head><meta http-equiv=""content-type"" content=...",127,xss,anom
20709,"<script a="">"" id=xss src=""http://ha.ckers.org/...",63,xss,anom
20710,"<script ""a='>'"" id=xss src=""http://ha.ckers.or...",65,xss,anom


In [4]:
train = train_df[['payload', 'attack_type']]

In [5]:
train

Unnamed: 0,payload,attack_type
0,c/ caridad s/n,norm
1,"campello, el",norm
2,1442431887503330,norm
3,nue37,norm
4,tufts3@joll.rs,norm
...,...,...
20707,"<!--#exec cmd=""/bin/echo '<script src'""--><!--...",xss
20708,"<head><meta http-equiv=""content-type"" content=...",xss
20709,"<script a="">"" id=xss src=""http://ha.ckers.org/...",xss
20710,"<script ""a='>'"" id=xss src=""http://ha.ckers.or...",xss


In [6]:
test_df = pd.read_csv('payload_test.csv')

In [7]:
test_df

Unnamed: 0,payload,length,attack_type,label
0,40184,5,norm,norm
1,nuda drudes,11,norm,norm
2,"c/ del ferrocarril, 152,",24,norm,norm
3,2070765320009143,16,norm,norm
4,1902,4,norm,norm
...,...,...,...,...
10350,"geturl(""javascript:alert('xss')"")",33,xss,anom
10351,"<xml id=xss src=""http://ha.ckers.org/xsstest.x...",61,xss,anom
10352,<? echo('<scr)';,16,xss,anom
10353,"<script a="">"" '' id=xss src=""http://ha.ckers.o...",66,xss,anom


In [8]:
test = test_df[['payload', 'attack_type']]

In [9]:
full_df = pd.concat([train, test])

In [10]:
full_df

Unnamed: 0,payload,attack_type
0,c/ caridad s/n,norm
1,"campello, el",norm
2,1442431887503330,norm
3,nue37,norm
4,tufts3@joll.rs,norm
...,...,...
10350,"geturl(""javascript:alert('xss')"")",xss
10351,"<xml id=xss src=""http://ha.ckers.org/xsstest.x...",xss
10352,<? echo('<scr)';,xss
10353,"<script a="">"" '' id=xss src=""http://ha.ckers.o...",xss


In [11]:
full_df.to_json('HTTPParams.json', 'records')

In [12]:
import json

In [13]:
with open('HTTPParams.json', 'r') as f:
    custom = json.load(f)

In [14]:
custom

[{'payload': 'c/ caridad s/n', 'attack_type': 'norm'},
 {'payload': 'campello, el', 'attack_type': 'norm'},
 {'payload': '1442431887503330', 'attack_type': 'norm'},
 {'payload': 'nue37', 'attack_type': 'norm'},
 {'payload': 'tufts3@joll.rs', 'attack_type': 'norm'},
 {'payload': '22997112x', 'attack_type': 'norm'},
 {'payload': 'arenas de san juan', 'attack_type': 'norm'},
 {'payload': '19245', 'attack_type': 'norm'},
 {'payload': 'fennell', 'attack_type': 'norm'},
 {'payload': 'd50allecido', 'attack_type': 'norm'},
 {'payload': 'genny', 'attack_type': 'norm'},
 {'payload': '03248i367ca', 'attack_type': 'norm'},
 {'payload': 'grubel8@albeiteria.kw', 'attack_type': 'norm'},
 {'payload': '83497200r', 'attack_type': 'norm'},
 {'payload': 'martn de yeltes', 'attack_type': 'norm'},
 {'payload': '1769471856078209', 'attack_type': 'norm'},
 {'payload': 'cascabela', 'attack_type': 'norm'},
 {'payload': 'ludolfo', 'attack_type': 'norm'},
 {'payload': 'snerd@bwds.tj', 'attack_type': 'norm'},
 {'p