In [2]:
!pip install nfstream

Collecting nfstream
  Downloading nfstream-6.4.3-cp37-cp37m-manylinux1_x86_64.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 15.0 MB/s 
Collecting dpkt>=1.9.7
  Downloading dpkt-1.9.7.2-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 60.1 MB/s 
[?25hCollecting psutil>=5.8.0
  Downloading psutil-5.9.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (280 kB)
[K     |████████████████████████████████| 280 kB 44.0 MB/s 
Installing collected packages: psutil, dpkt, nfstream
  Attempting uninstall: psutil
    Found existing installation: psutil 5.4.8
    Uninstalling psutil-5.4.8:
      Successfully uninstalled psutil-5.4.8
Successfully installed dpkt-1.9.7.2 nfstream-6.4.3 psutil-5.9.0


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from nfstream import NFStreamer, NFPlugin
import pandas as pd
import pickle
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier

In [4]:
class Packet40Counter(NFPlugin):
  def on_init(self, pkt, flow):
    if pkt.ip_size == 40:
      flow.udps.packet_with_40_ip_size = 1
    else:
      flow.udps.packet_with_40_ip_size = 0
    
  def on_ipdate(self, pkt, flow):
    if pkt.ip_size == 40:
      flow.udps.packet_with_40_ip_size += 1

col_list = ['src_ip',
            'dst_ip',
            'bidirectional_packets',
            'bidirectional_bytes',
            'application_name',
            'application_category_name']
train_col_list = ['bidirectional_packets', 'bidirectional_bytes']

#1
def summary_data(filename) -> str:
  streamer = NFStreamer(filename, udps=Packet40Counter()).to_pandas()
  if 'VPN' in streamer['application_category_name'].unique():
    app_bytes = streamer.groupby(['application_name',
            'application_category_name'], as_index=False, sort=True)['bidirectional_bytes'].sum()
    return f'Protocol and data for {filename}:\n' + f'{app_bytes.to_markdown()}'
  else:
    return f'No VPN traffic detected in {filename}, only ,' + ','.join([x for x in streamer['application_name'].unique()])

#2
def print_info(filename) -> str:
  streamer = NFStreamer(filename, udps=Packet40Counter()).to_pandas()
  return streamer.pivot_table(index=['src_ip',
                                   'dst_ip',
                                   'application_name'],
                            aggfunc='first')[['bidirectional_packets','bidirectional_bytes','application_category_name']].to_markdown()

#3
def find_start_and_end_of_attaching(filename) -> str:
  streamer = NFStreamer(filename, udps=Packet40Counter()).to_pandas()
  answer = ""
  answer += "Start date:"
  answer += datetime.fromtimestamp(streamer['bidirectional_first_seen_ms'].min()/1000.0).strftime('%Y-%m-%d %H:%M:%S')
  answer += "\nEnd date:"
  answer += datetime.fromtimestamp(streamer['bidirectional_last_seen_ms'].max()/1000.0).strftime('%Y-%m-%d %H:%M:%S')
  return answer

#4
def print_useful_info(filename) -> str:
  streamer = NFStreamer(filename, udps=Packet40Counter()).to_pandas()
  return streamer.pivot_table(index=['src_ip', 'application_name'],
                              values='src2dst_duration_ms', aggfunc='median').to_markdown()

if __name__ == '__main__':
  print('Проверка наличия vpn трафика:\n')
  print(summary_data('drive/MyDrive/ipsec.pcap'))
  print('\n\n')
  print('Информация по src_ip, dst_ip и application_name:\n')
  print(print_info('drive/MyDrive/ipsec.pcap'))
  print('\n\n')
  print(find_start_and_end_of_attaching('drive/MyDrive/ipsec.pcap'))
  print('\n\n')
  print('Вывод полезной информации:\n')
  print(print_useful_info('drive/MyDrive/ipsec.pcap'))


Проверка наличия vpn трафика:

Protocol and data for drive/MyDrive/ipsec.pcap:
|    | application_name   | application_category_name   |   bidirectional_bytes |
|---:|:-------------------|:----------------------------|----------------------:|
|  0 | DNS                | Network                     |                  3287 |
|  1 | DNS.Google         | Advertisement               |                 21613 |
|  2 | DNS.Google         | Cloud                       |                   890 |
|  3 | DNS.Google         | Web                         |                 55094 |
|  4 | DNS.GoogleServices | Web                         |                 11897 |
|  5 | DNS.YouTube        | Media                       |                 84112 |
|  6 | ICMP               | Network                     |                  1962 |
|  7 | IPsec              | VPN                         |              51865351 |
|  8 | LLMNR              | Network                     |                   888 |
|  9 | MDNS        

**Отчет**

In [5]:
filenames = ['drive/MyDrive/ipsec.pcap', 
             'drive/MyDrive/ipsec2.pcap', 
             'drive/MyDrive/wireguard.pcap', 
             'drive/MyDrive/wireguard2.pcap', 
             'drive/MyDrive/openvpn.pcap', 
             'drive/MyDrive/openvpn2.pcap', 
             'drive/MyDrive/novpn.pcap', 
             'drive/MyDrive/wg&ovpn&novpn.pcap'] 

#5
def train_model(fname):
  streamer = NFStreamer(fname).to_pandas()
  X = streamer[train_col_list]
  Y = streamer["application_category_name"].apply(lambda x: 1 if 'VPN' in x else 0)
  model = RandomForestClassifier()
  model.fit(X,Y)
  fmodel = 'model.pkl'
  with open(fmodel, 'wb') as file:
    pickle.dump(model, file)

def test_model(ftests, fmodel):
  for ftest in ftests:
    streamer = NFStreamer(ftest).to_pandas()
    Xtest = streamer[train_col_list]
    Ytest = streamer["application_category_name"].apply(lambda x: 1 if 'VPN' in x else 0)
    with open(fmodel, 'rb') as file:
      model = pickle.load(file)
    score = model.score(Xtest, Ytest)
    print('Точность модели на файле {fname}: {modelScore}'.format(fname=ftest, modelScore=score))

train_model('drive/MyDrive/wg&ovpn&novpn.pcap')
test_model(filenames, 'model.pkl')
#/5

def create_report():
  with open("drive/MyDrive/summary.md", 'w') as md:
    for filename in filenames:
      md.write("-" * 20 + "\n\n")
      md.write(f"#Отчёт {filename}\n\n")
      md.write("##Проверка наличия vpn трафика:\n" + summary_data(filename) + "\n\n")
      md.write("##Информация по src_ip, dst_ip и application_name:\n\n")
      md.write(print_info(filename) + "\n\n")
      md.write("##Начало и конец захвата трафика:\n\n")
      md.write(find_start_and_end_of_attaching(filename) + "\n\n")
      md.write("##Вывод полезной информации:\n\n")
      md.write(print_useful_info(filename) + "\n\n")

create_report()

Точность модели на файле drive/MyDrive/ipsec.pcap: 1.0
Точность модели на файле drive/MyDrive/ipsec2.pcap: 1.0
Точность модели на файле drive/MyDrive/wireguard.pcap: 1.0
Точность модели на файле drive/MyDrive/wireguard2.pcap: 1.0
Точность модели на файле drive/MyDrive/openvpn.pcap: 1.0
Точность модели на файле drive/MyDrive/openvpn2.pcap: 0.9444444444444444
Точность модели на файле drive/MyDrive/novpn.pcap: 1.0
Точность модели на файле drive/MyDrive/wg&ovpn&novpn.pcap: 1.0
