In [None]:
''' Notes
Part 1: Initial process to capture the processes for the first time is added to the node profiler/agent.
Part 2: 2nd part will run and look for new processes running on existing nodes.

* process_scanner_v2.py is the client side process monitor that checks for new processes for a node.
* v2 of agent program captures processes first time(complete code on windows_client_v2.py and linux_client_v2.py
* server_process_monitor.py is running on server, notifying changes on client nodes.

https://www.tecmint.com/command-line-tools-to-monitor-linux-performance/
https://www.redhat.com/sysadmin/process-management-htopv
https://superuser.com/questions/809989/is-there-a-way-to-copy-text-in-htop#:~:text=Run%20your%20processes%2C%20switch%20to,without%20releasing%20Ctrl%20%2B%20Shift).
https://www.tecmint.com/save-top-command-output-to-a-file/
https://unix.stackexchange.com/questions/106847/what-does-aux-mean-in-ps-aux
https://superuser.com/questions/1224502/linux-command-for-exporting-output-to-a-csv-file-in-separated-columns

windows:
https://superuser.com/questions/914782/how-do-you-list-all-processes-on-the-command-line-in-windows
https://www.ghacks.net/2018/07/06/save-all-windows-processes-to-a-text-file/#:~:text=Just%20tap%20on%20Start%2C%20type,and%20number%2C%20and%20memory%20usage.

top, htop, atop, ps, iotop (disk read/writes), glances
top -p PID

Linux capture: 
ps aux | awk '{print $1","$2","$3","$4","$5","$6","$7","$8","$9","$10","$11}' > linux_processes.csv # all columns
ps aux | awk '{print $2","$8","$1","$4}' > linux_processes.csv # selected columns
Windows Capture:
tasklist /v /FO csv > win_processes.csv
Count running processes: ps axu | wc -l

'''

In [2]:
''' PART 1'''
import pandas as pd 

data_linux = pd.read_csv("linux_processes.csv") 
print('Linux capture')
data_linux.head()

Linux capture


Unnamed: 0,USER,PID,%CPU,%MEM,VSZ,RSS,TTY,STAT,START,TIME,COMMAND
0,root,1,0.0,0.5,225916,5788,?,Ss,13:09,0:06,/sbin/init
1,root,2,0.0,0.0,0,0,?,S,13:09,0:00,[kthreadd]
2,root,3,0.0,0.0,0,0,?,I<,13:09,0:00,[rcu_gp]
3,root,4,0.0,0.0,0,0,?,I<,13:09,0:00,[rcu_par_gp]
4,root,6,0.0,0.0,0,0,?,I<,13:09,0:00,[kworker/0:0H-kb]


In [3]:
data_win = pd.read_csv("win_processes.csv") 
print('Windows capture')
data_win.head()

Windows capture


Unnamed: 0,Image Name,PID,Session Name,Session#,Mem Usage,Status,User Name,CPU Time,Window Title
0,System Idle Process,0,Services,0,8 K,Unknown,NT AUTHORITY\SYSTEM,236:46:35,
1,System,4,Services,0,"11,756 K",Unknown,,0:32:58,
2,Registry,148,Services,0,"71,788 K",Unknown,,0:00:02,
3,smss.exe,740,Services,0,"1,044 K",Unknown,,0:00:00,
4,csrss.exe,416,Services,0,"5,528 K",Unknown,,0:00:02,


In [4]:
subset_linux = data_linux[["PID", "COMMAND", "USER", "%MEM"]] # how much memory is being used in percentage
# print(subset_linux.dtypes) # column data types
print('Total memory in use', subset_linux['%MEM'].sum(), '% from', len(subset_linux), 'running processes.')
print('Linux processes', len(subset_linux))
subset_linux.head()

Total memory in use 79.0 % from 234 running processes.
Linux processes 234


Unnamed: 0,PID,COMMAND,USER,%MEM
0,1,/sbin/init,root,0.5
1,2,[kthreadd],root,0.0
2,3,[rcu_gp],root,0.0
3,4,[rcu_par_gp],root,0.0
4,6,[kworker/0:0H-kb],root,0.0


In [5]:
pd.options.mode.chained_assignment = None  # default='warn' # disable overwrite warning
subset_win = data_win[["PID", "Image Name", "User Name", "Mem Usage"]] # Memory usage in KB
subset_win['User Name'] = subset_win['User Name'].fillna('-') # Fill N/A usernames with -
print('Windows processes', len(subset_win))
subset_win.head()

Windows processes 322


Unnamed: 0,PID,Image Name,User Name,Mem Usage
0,0,System Idle Process,NT AUTHORITY\SYSTEM,8 K
1,4,System,-,"11,756 K"
2,148,Registry,-,"71,788 K"
3,740,smss.exe,-,"1,044 K"
4,416,csrss.exe,-,"5,528 K"


In [6]:
# convert mem usage value from tasklist values
def memory_percentage(value):
    v = int(value.rstrip(' K').replace(",", "")) # remove trailing ' K', comma, and convert to int value memory usage
    from psutil import virtual_memory
    mem = virtual_memory() # get total memory. # print('Memory:', mem) # print memory tuples
    total_mem_kb = mem.total / 1000 # Bytes to KB
    return round((v / total_mem_kb * 100), 4)
    
# print('% =', memory_percentage('10,940 K')) # Test function
for i in range(len(subset_win)): # update mem usage values from Bytes to %
    # print(subset_win['Mem Usage'].values[i])
    pd.options.mode.chained_assignment = None  # default='warn' # disable overwrite warning 
    subset_win['Mem Usage'].values[i] = memory_percentage(subset_win['Mem Usage'].values[i])

# subset_win.dtypes
subset_win['Mem Usage'] = pd.to_numeric(subset_win['Mem Usage'])
subset_win = subset_win.sort_values(by=['Mem Usage'], ascending=False) # Decending order
print('Total memory in use', subset_win['Mem Usage'].sum(), '% from', len(subset_win), 'running processes.')

print('Windows processes', len(subset_win))
subset_win.head()
# subset_win.dtypes

Total memory in use 28.240099999999998 % from 322 running processes.
Windows processes 322


Unnamed: 0,PID,Image Name,User Name,Mem Usage
57,3832,Memory Compression,-,2.7282
262,18808,OneDrive.exe,CHEETAH\AdEeL,1.7598
201,16224,chrome.exe,CHEETAH\AdEeL,1.086
33,2484,bdservicehost.exe,-,0.8869
220,20896,chrome.exe,CHEETAH\AdEeL,0.7675


In [7]:
pd.set_option("display.max_rows", None, "display.max_columns", None) # show all data
print('Windows subset', len(subset_win))
subset_win

Windows subset 322


Unnamed: 0,PID,Image Name,User Name,Mem Usage
57,3832,Memory Compression,-,2.7282
262,18808,OneDrive.exe,CHEETAH\AdEeL,1.7598
201,16224,chrome.exe,CHEETAH\AdEeL,1.086
33,2484,bdservicehost.exe,-,0.8869
220,20896,chrome.exe,CHEETAH\AdEeL,0.7675
153,14068,explorer.exe,CHEETAH\AdEeL,0.6784
203,16664,chrome.exe,CHEETAH\AdEeL,0.6769
305,19756,powershell_ise.exe,CHEETAH\AdEeL,0.5119
265,20640,chrome.exe,CHEETAH\AdEeL,0.4856
206,17184,chrome.exe,CHEETAH\AdEeL,0.4811


In [8]:
subset_win = subset_win.groupby(["Image Name", "User Name"]).size().reset_index(name="Processes")
subset_win = subset_win.sort_values(by=['Processes'], ascending=False, ignore_index=True) # Decending order

# make strings of rows to add in the db
win_process_list = []
for i in range(len(subset_win)): # update mem usage values from Bytes to %
    pd.options.mode.chained_assignment = None  # default='warn' # disable overwrite warning 
    l = subset_win.iloc[i,:].apply(str).values
    win_process_list.append(",".join(l))

print('Windows process count', len(subset_win))

subset_win

Windows process count 145


Unnamed: 0,Image Name,User Name,Processes
0,svchost.exe,-,91
1,chrome.exe,CHEETAH\AdEeL,25
2,RuntimeBroker.exe,CHEETAH\AdEeL,13
3,cmd.exe,CHEETAH\AdEeL,7
4,conhost.exe,CHEETAH\AdEeL,7
5,svchost.exe,CHEETAH\AdEeL,7
6,VirtualBoxVM.exe,CHEETAH\AdEeL,6
7,python.exe,CHEETAH\AdEeL,5
8,slack.exe,CHEETAH\AdEeL,5
9,bdservicehost.exe,-,3


In [9]:
pd.set_option("display.max_rows", None, "display.max_columns", None) # show all data
subset_linux = subset_linux.sort_values(by=['%MEM'], ascending=False) # Decending order

print('Linux subset', len(subset_linux))
subset_linux

Linux subset 234


Unnamed: 0,PID,COMMAND,USER,%MEM
157,1951,/usr/bin/gnome-shell,osboxes,24.4
195,2232,nautilus-desktop,osboxes,6.5
142,1777,/usr/lib/xorg/Xorg,osboxes,5.4
219,13464,/usr/bin/nautilus,osboxes,5.3
227,14258,/usr/lib/gnome-terminal/gnome-terminal-server,osboxes,3.4
103,1569,/usr/bin/gnome-shell,gdm,3.0
222,13532,/usr/lib/gvfs/gvfsd-smb-browse,osboxes,1.8
223,13540,/usr/lib/gvfs/gvfsd-smb,osboxes,1.8
48,274,/lib/systemd/systemd-journald,root,1.5
206,2794,update-notifier,osboxes,1.2


In [10]:
subset_linux = subset_linux.groupby(["COMMAND", "USER"]).size().reset_index(name="Processes")
subset_linux = subset_linux.sort_values(by=['Processes'], ascending=False, ignore_index=True) # Decending order

# make strings of rows to add in the db
linux_process_list = []
for i in range(len(subset_linux)): # update mem usage values from Bytes to %
    # print(subset_linux['%MEM'].values[i])
    pd.options.mode.chained_assignment = None  # default='warn' # disable overwrite warning 
    l = subset_linux.iloc[i,:].apply(str).values
    linux_process_list.append(",".join(l))

print('Linux process count', len(subset_linux))
subset_linux

Linux process count 218


Unnamed: 0,COMMAND,USER,Processes
0,/usr/bin/VBoxClient,osboxes,8
1,[ext4-rsv-conver],root,3
2,/bin/sh,osboxes,2
3,/usr/bin/dbus-daemon,osboxes,2
4,/usr/bin/dbus-daemon,gdm,2
5,/usr/sbin/kerneloops,kernoops,2
6,gdm-session-worker,root,2
7,/usr/bin/python3,root,2
8,avahi-daemon:,avahi,2
9,(sd-pam),gdm,1


In [11]:
l = subset_linux.iloc[0,:].apply(str).values
s = ",".join(l)
print(s)

/usr/bin/VBoxClient,osboxes,8


In [12]:
subset_linux.dtypes

COMMAND      object
USER         object
Processes     int64
dtype: object

In [13]:
# Check versions
import sys
print(sys.executable)
print(sys.version)
print(sys.version_info)
print(pd.show_versions()) 

C:\Users\malik_aa\anaconda3\python.exe
3.11.5 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:26:23) [MSC v.1916 64 bit (AMD64)]
sys.version_info(major=3, minor=11, micro=5, releaselevel='final', serial=0)





INSTALLED VERSIONS
------------------
commit           : 0f437949513225922d851e9581723d82120684a6
python           : 3.11.5.final.0
python-bits      : 64
OS               : Windows
OS-release       : 10
Version          : 10.0.22631
machine          : AMD64
processor        : Intel64 Family 6 Model 165 Stepping 5, GenuineIntel
byteorder        : little
LC_ALL           : None
LANG             : None
LOCALE           : English_United States.1252

pandas           : 2.0.3
numpy            : 1.24.3
pytz             : 2023.3.post1
dateutil         : 2.8.2
setuptools       : 68.0.0
pip              : 23.3.1
Cython           : None
pytest           : 7.4.0
hypothesis       : None
sphinx           : 5.0.2
blosc            : None
feather          : None
xlsxwriter       : None
lxml.etree       : 4.9.3
html5lib         : None
pymysql          : None
psycopg2         : None
jinja2           : 3.1.2
IPython          : 8.15.0
pandas_datareader: None
bs4              : 4.12.2
bottleneck       : 1.

In [14]:
''' Process Monitor PART 1 v2.0 Windows, Linux, Mac
    complete code on windows_client_v2.py
'''

import psutil, json
import pandas as pd
procs = {p.pid: p.info for p in psutil.process_iter(['pid', 'name', 'username', 'exe', 'cpu_percent', 'memory_percent', 'cpu_times', 'num_threads'])}
group_processes = {}
for name, group in pd.DataFrame.from_records(list(procs.values())).groupby('name'):
    group_processes[name] = list(group[['pid', 'exe', 'username', 'num_threads', 'cpu_percent', 'memory_percent', 'cpu_times']].to_dict(orient='index').values())
    
print('Done.\nTotal', len(group_processes), 'running.')
print(json.dumps(group_processes, indent=2))



Done.
Total 129 running.
{
  "": [
    {
      "pid": 204,
      "exe": "",
      "username": null,
      "num_threads": 0,
      "cpu_percent": 0.0,
      "memory_percent": 0.22474169376923858,
      "cpu_times": [
        0.0,
        0.0,
        0.0,
        0.0
      ]
    }
  ],
  "AGMService.exe": [
    {
      "pid": 5488,
      "exe": "C:\\Program Files (x86)\\Common Files\\Adobe\\AdobeGCClient\\AGMService.exe",
      "username": null,
      "num_threads": 4,
      "cpu_percent": 0.0,
      "memory_percent": 0.050605364264819985,
      "cpu_times": [
        0.5625,
        0.265625,
        0.0,
        0.0
      ]
    }
  ],
  "AggregatorHost.exe": [
    {
      "pid": 9756,
      "exe": "C:\\Windows\\System32\\AggregatorHost.exe",
      "username": null,
      "num_threads": 3,
      "cpu_percent": 0.0,
      "memory_percent": 0.024208317078204047,
      "cpu_times": [
        0.0,
        0.015625,
        0.0,
        0.0
      ]
    }
  ],
  "ApplicationFrameHost.exe": [

In [15]:
''' PART 2: look for new processes running on existing nodes. WINDOWS '''
import socket, os

# Get hostname and hostIP
hostname = socket.gethostname() # getting the hostname by socket.gethostname() method
#ip_address = socket.gethostbyname(hostname) # getting the IP address using socket.gethostbyname() method
print(f" HostName: {hostname}") # printing the hostname and ip_address
# print(f" IP Address: {ip_address}") 
# IP to connect outside
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
ip_address = s.getsockname()[0] 
print(f" HostIP: {ip_address}")
s.close()

# Capture current processes
import pandas as pd 
pd.options.mode.chained_assignment = None  # default='warn' # disable overwrite warning 
print(' Removing old files...', end=' ')
os.system('del win_apps_list.txt, win_node_config.txt, win_open_ports.txt, win_processes.csv') # remove if file exists.
print('Done.\n Capturing currently running processes...', end=' ')
os.system('cmd /c "tasklist /v /FO csv > win_processes.csv"') # Capture running processes.
import pandas as pd 
data_win = pd.read_csv("win_processes.csv") 
subset_win = data_win[["PID", "Image Name", "User Name", "Mem Usage"]] # Memory usage in KB
subset_win['User Name'] = subset_win['User Name'].fillna('-') # Fill N/A usernames with -
# convert mem usage value from tasklist values
def memory_percentage(value):
    v = int(value.rstrip(' K').replace(",", "")) # remove trailing ' K', comma, and convert to int value memory usage
    from psutil import virtual_memory
    mem = virtual_memory() # get total memory. # print('Memory:', mem) # print memory tuples
    total_mem_kb = mem.total / 1000 # Bytes to KB
    return round((v / total_mem_kb * 100), 4)
    
# print('% =', memory_percentage('10,940 K')) # Test function
for i in range(len(subset_win)): # update mem usage values from Bytes to %
    # print(subset_win['Mem Usage'].values[i])
    subset_win['Mem Usage'].values[i] = memory_percentage(subset_win['Mem Usage'].values[i])

# subset_win.dtypes
subset_win['Mem Usage'] = pd.to_numeric(subset_win['Mem Usage'])
subset_win = subset_win.sort_values(by=['Mem Usage'], ascending=False) # Decending order
print('Done. \n ** Total memory in use', round(subset_win['Mem Usage'].sum(), 2), '% from', len(subset_win), 'running processes. **')

# Prepare new dataframe (process, user, count)
subset_win = subset_win.groupby(["Image Name", "User Name"]).size().reset_index(name="Processes")
subset_win = subset_win.sort_values(by=['Processes'], ascending=False, ignore_index=True) # Decending order
# make strings of rows to add in the db
win_process_list = []
for i in range(len(subset_win)): # update mem usage values from Bytes to %
    pd.options.mode.chained_assignment = None  # default='warn' # disable overwrite warning 
    l = subset_win.iloc[i,:].apply(str).values
    win_process_list.append(",".join(l))
        
print(' Retrieving previously recorded processes...', end=' ')

# Database connection and existing process retrieval...
import couchdb2, json, os
os.environ["PATH"] += os.pathsep + 'C:/Graphviz/bin' # 'C:/Graphviz2.38/bin'
db_user, db_pass, db_host, db_port, db_name = 'admin', 'Samsung_1234', '192.168.0.113', '5984', 'cyvid_nodes'
server = couchdb2.Server("http://%s:%s@%s:%s" % (db_user, db_pass, db_host, db_port))

if server.up(): # if server is up and ready
    if db_name in server: # already existing database # if db.exists():
        db = server[db_name]
    else: # create database if does not exist
        server.create(db_name) # create database
else: # exit if server not running
    print('Server not responding, exiting program!')
    import sys
    sys.exit()

found_document = {}
db_process_list = []
for doc in db:
    # if doc['HostIP']==ip_address: # db ip for test host is on different network so using hostname instead, use this later
    if doc['HostName']==hostname: # disable this and use above line later.
        print('Done.')
        # print('Done.\n Host ('+hostname+') profile found with IP '+ip_address)
        found_document = doc # keeping found document data in found_doc
        db_process_list = doc['ProcessList']

# Check lengths of database and new lists 
print(' Recorded processes in the database: '+str(len(db_process_list))+'\n Currently running processes: '+str(len(win_process_list)))

# Remove Processes count from the strings inside lists (process, user, count)
sub_db_process_list, sub_win_process_list=[], []
for i in db_process_list:
    x = i.split(',')
    sub_db_process_list.append(x[0] + ',' + x[1])
for i in win_process_list:
    x = i.split(',')
    sub_win_process_list.append(x[0] + ',' + x[1])
    
# Process discovery mode
process_discovery_mode = True

# find the items that new not in the db process list and newly discovered during process scan process.
import numpy as np
process_not_found = np.setdiff1d(sub_win_process_list, sub_db_process_list) # elements in win_process_list NOT in db_process_list
len(process_not_found)

# find the newly discovered process item details from win_process_list
matching = []
for item in process_not_found:
    # print(item)
    matching.extend([s for s in win_process_list if item in s])

if len(matching) > 0:
    print('\n'+'\033[1m'+' ** New discovered processes ('+str(len(matching))+') **\n', matching, '\033[0m') # print process details

    # insert process entry to the database if process discovery mode is off
    if process_discovery_mode:
        print('\n Inserting to database...', end=' ')
        found_document['ProcessList'].extend(matching) # extend the list with new elements
        db.put(found_document)
        print('Done.\n')
    else:
        print(' Process discovery mode is off, newly detected processes are not added to the database\n')

else:
    print('\n ** No new process found. **\n')


 HostName: Office223
 HostIP: 129.108.144.73
 Removing old files... Done.
 Capturing currently running processes... Done. 
 ** Total memory in use 33.85 % from 289 running processes. **
 Retrieving previously recorded processes... 

ConnectTimeout: HTTPConnectionPool(host='192.168.0.113', port=5984): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x000001DA1C4494D0>, 'Connection to 192.168.0.113 timed out. (connect timeout=None)'))

In [None]:
''' PART 2: look for new processes running on existing nodes. LINUX ''' 
import socket, os

# Get hostname and hostIP
hostname = socket.gethostname() # getting the hostname by socket.gethostname() method
#ip_address = socket.gethostbyname(hostname) # getting the IP address using socket.gethostbyname() method
print(f" HostName: {hostname}") # printing the hostname and ip_address
# print(f" IP Address: {ip_address}") 
# IP to connect outside
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
ip_address = s.getsockname()[0] 
print(f" HostIP: {ip_address}")
s.close()

# Capture current processes
import pandas as pd 
pd.options.mode.chained_assignment = None  # default='warn' # disable overwrite warning 
print(' Removing old files...', end=' ')
from os import path
if path.exists("linux_apps_list.txt"): os.system('rm linux_apps_list.txt')
if path.exists("linux_node_config.txt"): os.system('rm linux_node_config.txt')
if path.exists("linux_open_ports.txt"): os.system('rm linux_open_ports.txt')
if path.exists("linux_processes.csv"): os.system('rm linux_processes.csv')

print('Done.\n Capturing system information...', end=' ')
os.system('ps aux | awk \'{print $1","$2","$3","$4","$5","$6","$7","$8","$9","$10","$11}\' > linux_processes.csv') # Capture running processes.
print('Done.')

data_linux = pd.read_csv("linux_processes.csv")
subset_linux = data_linux[["PID", "COMMAND", "USER", "%MEM"]] # how much memory is being used in percentage
print(' ** Total memory in use', subset_linux['%MEM'].sum(), '% from', len(subset_linux), 'running processes. **')
pd.set_option("display.max_rows", None, "display.max_columns", None) # show all data
subset_linux = subset_linux.sort_values(by=['%MEM'], ascending=False) # Decending order
subset_linux = subset_linux.groupby(["COMMAND", "USER"]).size().reset_index(name="Processes")
subset_linux = subset_linux.sort_values(by=['Processes'], ascending=False, ignore_index=True) # Decending order

# make strings of rows to add in the db
linux_process_list = []
for i in range(len(subset_linux)): # update mem usage values from Bytes to %
    # print(subset_linux['%MEM'].values[i])
    pd.options.mode.chained_assignment = None  # default='warn' # disable overwrite warning 
    l = subset_linux.iloc[i,:].apply(str).values
    linux_process_list.append(",".join(l))


print(' Retrieving previously recorded processes...', end=' ')

# Database connection and existing process retrieval...
import couchdb2, json, os
os.environ["PATH"] += os.pathsep + 'C:/Graphviz/bin' # 'C:/Graphviz2.38/bin'
db_user, db_pass, db_host, db_port, db_name = 'admin', 'Samsung_1234', '192.168.0.113', '5984', 'cyvid_nodes'
server = couchdb2.Server("http://%s:%s@%s:%s" % (db_user, db_pass, db_host, db_port))

if server.up(): # if server is up and ready
    if db_name in server: # already existing database # if db.exists():
        db = server[db_name]
    else: # create database if does not exist
        server.create(db_name) # create database
else: # exit if server not running
    print('Server not responding, exiting program!')
    import sys
    sys.exit()

found_document = {}
db_process_list = []
for doc in db:
    # if doc['HostIP']==ip_address: # db ip for test host is on different network so using hostname instead, use this later
    if doc['HostName']==hostname: # disable this and use above line later.
        print('Done.')
        #print('Done.\n Host ('+hostname+') profile found with IP '+ip_address)
        found_document = doc # keeping found document data in found_doc
        db_process_list = doc['ProcessList']

# Check lengths of database and new lists 
print(' Recorded processes in the database: '+str(len(db_process_list))+'\n Currently running processes: '+str(len(linux_process_list)))

# Remove Processes count from the strings inside lists (process, user, count)
sub_db_process_list, sub_linux_process_list=[], []
for i in db_process_list:
    x = i.split(',')
    sub_db_process_list.append(x[0] + ',' + x[1])
for i in linux_process_list:
    x = i.split(',')
    sub_linux_process_list.append(x[0] + ',' + x[1])
    
# Process discovery mode
process_discovery_mode = True

# find the items that new not in the db process list and newly discovered during process scan process.
import numpy as np
process_not_found = np.setdiff1d(sub_linux_process_list, sub_db_process_list) # elements in linux_process_list NOT in db_process_list
len(process_not_found)

# find the newly discovered process item details from linux_process_list
matching = []
for item in process_not_found:
    # print(item)
    matching.extend([s for s in linux_process_list if item in s])

if len(matching) > 0:
    print('\n'+'\033[1m'+' ** New discovered processes ('+str(len(matching))+') **\n', matching, '\033[0m') # print process details

    # insert process entry to the database if process discovery mode is off
    if process_discovery_mode:
        print('\n Inserting to database...', end=' ')
        found_document['ProcessList'].extend(matching) # extend the list with new elements
        db.put(found_document)
        print('Done.\n')
    else:
        print(' Process discovery mode is off, newly detected processes are not added to the database\n')

else:
    print('\n ** No new process found. **\n')


In [None]:
''' Process Monitor PART 2 v2.0 Windows, Linux, Mac '''
# Capture currently running processes and match with recorded process, highlight and add newly found processes.
# https://pypi.org/project/psutil/

import socket, os, psutil, json

# Get hostname and hostIP
hostname = socket.gethostname() # getting the hostname by socket.gethostname() method
#ip_address = socket.gethostbyname(hostname) # getting the IP address using socket.gethostbyname() method
print('HostName:', hostname) # printing the hostname and ip_address
# print(f" IP Address: {ip_address}") 
# IP to connect outside
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
ip_address = s.getsockname()[0] 
print('HostIP:', ip_address)
s.close()

# Capture current processes...
import pandas as pd 
print(' Curently running processes...', end=' ')
current_processes, node_document, db_processes, new_processes = {}, {}, {}, {}

procs = {p.pid: p.info for p in psutil.process_iter(['pid', 'name', 'username', 'exe', 'cpu_percent', 'memory_percent', 'cpu_times', 'num_threads'])}
for name, group in pd.DataFrame.from_records(list(procs.values())).groupby('name'):
    current_processes[name] = list(group[['pid', 'exe', 'username', 'num_threads', 'cpu_percent', 'memory_percent', 'cpu_times']].to_dict(orient='index').values())

print(len(current_processes))
print(' Previously recorded processes...', end=' ')

# Database connection and existing process retrieval...
import couchdb2, json, os
os.environ["PATH"] += os.pathsep + 'C:/Graphviz/bin' # 'C:/Graphviz2.38/bin'
db_user, db_pass, db_host, db_port, db_name = 'admin', 'Samsung_1234', '192.168.0.113', '5984', 'cyvid_nodes'
server = couchdb2.Server("http://%s:%s@%s:%s" % (db_user, db_pass, db_host, db_port))

if server.up(): # if server is up and ready
    if db_name in server: # already existing database # if db.exists():
        db = server[db_name]
    else: # create database if does not exist
        server.create(db_name) # create database
else: # exit if server not running
    print('Server not responding, exiting program!')
    import sys
    sys.exit()

for doc in db:
    # if doc['HostIP']==ip_address: # db ip for test host is on different network so using hostname instead, use this later
    if doc['HostName']==hostname: # disable this and use above line later.
        #print('Done.\n Host ('+hostname+') profile found with IP '+ip_address)
        node_document = doc # keeping found document data in found_doc
        try:
            db_processes = doc['RunningProcesses']
            new_processes = db_processes
        except KeyError:
            db_processes, new_processes = {}, {}

# Check lengths of database and new lists 
print(len(db_processes))

# Process discovery mode
process_discovery_mode, search_key_found = True, False
found_processes, found_apps = 0, 0

# Match newly discovered processes with previously recorded processes...
print(' Finding the difference...\n')
for i in current_processes:
    if i in db_processes: # find app name if present in db
        # print('key', i, 'present with', len(current_processes[i]), 'values currently, and', len(db_processes[i]), 'in db')
        for item_a in current_processes[i]: # looping through keys in the current process item
            search_key_found = False # force false for new key
            for item_b in db_processes[i]: # looping through keys in the db process item
                if item_a['pid'] == item_b['pid']:
                    # print('PID', item_a['pid'], 'found in db')
                    search_key_found = True
                    break
            if not search_key_found: # if key (PID) is not found
                print('\033[1m'+' ** New PID', item_a['pid'], '\033[0m'+'found under the application'+'\033[1m', i, '\033[0m'+'not recorded previously')
                found_processes += 1
                new_processes[i].append(item_a) # add PID

    else: # Key not found, a new process is found, log it.
        print('\033[1m'+' ## New application', i, '\033[0m'+'found (not recorded previously) with', len(current_processes[i]), 'processes:')
        doc = {i: current_processes[i]}
        print(json.dumps(doc, indent=2))
        found_apps += 1
        found_processes += len(current_processes[i])
        new_processes.update(doc) # remove [] from list, convert to dict, and add item

print('\033[1m'+'\n Found '+str(found_apps)+' new application(s) and '+str(found_processes)+' new process(es) among the '+str(len(new_processes))+' new discoveries'+'\033[0m')

if found_apps != 0 or found_processes != 0:
    if process_discovery_mode:
        print(' Saving new discoveries to database...', end=' ')
        # Update the node document
        node_document = {
                            '_id': node_document['_id'],
                            '_rev': node_document['_rev'],
                            'HostName': node_document['HostName'],
                            'HostIP': node_document['HostIP'],
                            'HostGateway': node_document['HostGateway'],
                            'HostOS': node_document['HostOS'],
                            'applications': node_document['applications'],
                            'DeviceType': node_document['DeviceType'],
                            'ControlPolicy': node_document['ControlPolicy'],
                            'AdversarialPolicy': node_document['AdversarialPolicy'],
                            'OpenPorts': node_document['OpenPorts'],
                            'ServicesProvided': node_document['ServicesProvided'],
                            'ServicesReceived': node_document['ServicesReceived'],
                            'RunningProcesses': new_processes    
                        }
        db.put(node_document)
        print('Done.')

In [None]:
# Serverside process monitor...

# Database connection and existing process retrieval...
import couchdb2, json, os, schedule, time, os
os.environ["PATH"] += os.pathsep + 'C:/Graphviz/bin' # 'C:/Graphviz2.38/bin'
db_user, db_pass, db_host, db_port, db_name = 'admin', 'Samsung_1234', '192.168.0.113', '5984', 'cyvid_nodes'
server = couchdb2.Server("http://%s:%s@%s:%s" % (db_user, db_pass, db_host, db_port))
master_doc = {} # document that will contain nodes data


if server.up(): # if server is up and ready
    if db_name in server: # already existing database # if db.exists():
        db = server[db_name]
    else: # create database if does not exist
        server.create(db_name) # create database
else: # exit if server not running
    print('Server not responding, exiting program!')
    import sys
    sys.exit()

def load_db_master():
    t = time.localtime()
    current_time = time.strftime("%H:%M:%S", t)
    print('\n'+current_time, 'Fetching process data...')
    db = server[db_name]
    for doc in db:
        app_list, users = [], []
        try: # loading process list only for all nodes
            # print(doc['HostName'], 'processes in db\t\t', len(doc['RunningProcesses']))
            for key in doc['RunningProcesses']:
                app_list.append(key)
                for j in doc['RunningProcesses'][key]:
                    if j['username'] not in users:
                        users.append(j['username'])
            print('\n', doc['HostName']+':', len(doc['RunningProcesses']), 'processes running,\n', 'users:', users)
#             for item in users:
#                 print(item, end=' ')
            master_doc[doc['HostName']] = app_list
            # print(json.dumps(master_doc, indent=2))
            
        except KeyError:
            # print(doc['HostName'], 'processes in db \t 0')
            master_doc[doc['HostName']] = [] # add empty list when nothing found
    print()

# Find difference of twi lists
def Diff(li1, li2):
    return (list(list(set(li1)-set(li2)) + list(set(li2)-set(li1))))

def check_for_changes():
    t = time.localtime()
    current_time = time.strftime("%H:%M:%S", t)
    print(current_time, 'Looking for changes...', end=' ')
    found_change = False
    for doc2 in db:
        app_list = []
        try:
            for key in doc2['RunningProcesses']:
                app_list.append(key)
            # find changes in previous and current
            changes = Diff(master_doc[doc2['HostName']], app_list)
            
            if len(changes) > 0:
                found_change = True
                # '\033[1m'+' ** New PID', item_a['pid'], '\033[0m'
                print('\033[1m'+'** Changes found **', '\033[0m', '\n')
                print(doc2['HostName']+':', 'Previous', str(len(master_doc[doc2['HostName']]))+',', 'Current', 
                  str(len(app_list))+',', 'New', str(len(changes))+'.')
                print('New:', '\033[1m', changes, '\033[0m')
                
                master_doc.clear() # reset master doc to update it
                load_db_master() # reload new master doc, not to repeat changes each time
            
        except KeyError:
            # print('KeyError', doc2['HostName'])
            continue
    if not found_change: print('No change.')


t = time.localtime()
current_time = time.strftime("%H:%M:%S", t)
print('\nScheduler started at', current_time)

load_db_master() 
# print(json.dumps(master_doc, indent=2))

check_for_changes()
# Every n minutes 
schedule.every(5).minutes.do(check_for_changes)

while True: 
    # Checks whether a scheduled task is pending to run or not 
    schedule.run_pending()
    time.sleep(1)

In [None]:
import couchdb2, json, os, schedule, time, os
os.environ["PATH"] += os.pathsep + 'C:/Graphviz/bin' # 'C:/Graphviz2.38/bin'
db_user, db_pass, db_host, db_port, db_name = 'admin', 'Samsung_1234', '192.168.0.113', '5984', 'cyvid_nodes'
server = couchdb2.Server("http://%s:%s@%s:%s" % (db_user, db_pass, db_host, db_port))


if server.up(): # if server is up and ready
    if db_name in server: # already existing database # if db.exists():
        db = server[db_name]
    else: # create database if does not exist
        server.create(db_name) # create database
else: # exit if server not running
    print('Server not responding, exiting program!')
    import sys
    sys.exit()

db = server[db_name]
users = []
for node in db:
    users.clear()
    try:
        # print(len(node['RunningProcesses']), 'process in', node['HostName'])
        for i in node['RunningProcesses']:
            # print (i) # not needed for now
            for j in node['RunningProcesses'][i]:
                # print(j['username'])
                if j['username'] not in users:
                    users.append(j['username'])
        print('\nNode', node['HostName'], 'running', len(node['RunningProcesses']), 'processes.')
        print('Process users:', users)
    except KeyError:
        continue

In [16]:
for item in users:
    print(item, end=', ')


NameError: name 'users' is not defined

In [17]:
# difference of two lists
def Diff(li1, li2):
    return (list(list(set(li1)-set(li2)) + list(set(li2)-set(li1))))

a = ['adeel', 'nazia', 'hareem', 'malik']
b = ['adeel', 'malik']
Diff(a, b)

['nazia', 'hareem']

In [32]:
# Adding dictionary in dictionary
master_doc = {}

doc1 = {
    "_id": "111111",
    'name': 'test 1', 
    'level': 1
}
doc2 = {
    "_id": "222222",
    'name': 'test 2', 
    'level': 2
}

master_doc[doc1['_id']] = doc1
master_doc[doc2['_id']] = doc2
print(master_doc.get('222222')) # get key values from dictionary 

master_doc

{'_id': '222222', 'name': 'test 2', 'level': 2}


{'111111': {'_id': '111111', 'name': 'test 1', 'level': 1},
 '222222': {'_id': '222222', 'name': 'test 2', 'level': 2}}

In [35]:
doc3 = {
    'name': 'test 3', 
    'level': 3
}
# master_doc[doc1['_id'].update(doc3)
# for key,value in master_doc.items():
#     if key == '111111':
#          print(value)

if "111111" in master_doc:
    master_doc["111111"].update(doc3)

len(master_doc['111111'])

3