<a href="https://colab.research.google.com/github/yochana4/gitfiles/blob/main/VRV_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:

from google.colab import files

print("Upload the log file (sample.log)...")
uploaded = files.upload()


import re
import csv
from collections import Counter


LOG_FILE = 'sample.log'
OUTPUT_FILE = 'log_analysis_results.csv'
FAILED_LOGIN_THRESHOLD = 10


ip_requests = Counter()
endpoints = Counter()
failed_logins = Counter()


ip_regex = r'^(\d+\.\d+\.\d+\.\d+)'
endpoint_regex = r'"[A-Z]+\s(\/[^\s]*)'
failed_login_regex = r'401|Invalid credentials'

with open(LOG_FILE, 'r') as file:
    for line in file:
        # Extract IP address
        ip_match = re.search(ip_regex, line)
        if ip_match:
            ip_address = ip_match.group(1)
            ip_requests[ip_address] += 1

        # Extract endpoint
        endpoint_match = re.search(endpoint_regex, line)
        if endpoint_match:
            endpoint = endpoint_match.group(1)
            endpoints[endpoint] += 1

        # Detect failed logins
        if re.search(failed_login_regex, line):
            if ip_match:
                failed_logins[ip_address] += 1


most_accessed_endpoint = endpoints.most_common(1)[0]


suspicious_ips = {ip: count for ip, count in failed_logins.items() if count > FAILED_LOGIN_THRESHOLD}

print("\nRequests per IP:")
for ip, count in ip_requests.most_common():
    print(f"{ip:<20} {count}")

print("\nMost Frequently Accessed Endpoint:")
print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

print("\nSuspicious Activity Detected:")
if suspicious_ips:
    for ip, count in suspicious_ips.items():
        print(f"{ip:<20} {count}")
else:
    print("No suspicious activity detected.")


with open(OUTPUT_FILE, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)

    writer.writerow(['Requests per IP'])
    writer.writerow(['IP Address', 'Request Count'])
    for ip, count in ip_requests.most_common():
        writer.writerow([ip, count])

    # Write Most Accessed Endpoint
    writer.writerow([])
    writer.writerow(['Most Accessed Endpoint'])
    writer.writerow(['Endpoint', 'Access Count'])
    writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

    # Write Suspicious Activity
    writer.writerow([])
    writer.writerow(['Suspicious Activity'])
    writer.writerow(['IP Address', 'Failed Login Count'])
    for ip, count in suspicious_ips.items():
        writer.writerow([ip, count])

print(f"\nResults saved to {OUTPUT_FILE}")

# Provide the CSV file for download
from google.colab import files
files.download(OUTPUT_FILE)


Upload the log file (sample.log)...


Saving sample.log to sample (2).log

Requests per IP:
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
No suspicious activity detected.

Results saved to log_analysis_results.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>