In [15]:
import re
import csv
from collections import Counter

# Constants
LOG_FILE = "sample.log"
OUTPUT_FILE_CSV = "VRV_analysis_results.csv"
LOGIN_FAILED_THRESHOLD = 10
PATTERN = r'(?P<ip_address>\S+) .* "(?P<method>\S+) (?P<endpoint>\S+) HTTP/\S+" (?P<status>\d+)'

# Counters for data aggregation
ip_requests = Counter()
endpoint_requests = Counter()
failed_logins = Counter()

# Processing the log file
with open(LOG_FILE, "r") as file:
    for line in file:
        match = re.match(PATTERN, line)
        if match:
            ip = match.group("ip_address")
            endpoint = match.group("endpoint")
            status = int(match.group("status"))

            # Task 1: Count requests per IP Address
            ip_requests[ip] += 1

            # Count requests per endpoint
            endpoint_requests[endpoint] += 1

            # Task 3: Count failed login attempts (status 401)
            if status == 401 or "Invalid credentials" in line:
                failed_logins[ip] += 1

# Task 2: Determine the most accessed endpoint
most_accessed_endpoint = max(endpoint_requests.items(), key=lambda x: x[1], default=("None", 0))

# Task 4: results to CSV
with open(OUTPUT_FILE_CSV, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)

    # Write requests per IP ADDRESS
    writer.writerow(["REQUESTS PER IP ADDRESS"])
    writer.writerow([])
    writer.writerow(["IP ADDRESS", "REQUEST COUNTS"])
    for ip, count in ip_requests.most_common():
        writer.writerow([ip, count])

    # most accessed endpoint
    writer.writerow([])
    writer.writerow(["MOST ACCESSED ENDPOINT"])
    writer.writerow([])
    writer.writerow(["ENDPOINT", "ACCESS COUNTS"])
    writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

    # suspicious activity
    writer.writerow([])
    writer.writerow(["SUSPICIOUS ACTIVITY"])
    writer.writerow([])
    writer.writerow(["IP ADDRESS", "LOGIN FAILED COUNTS"])
    suspicious_found = False
    for ip, count in failed_logins.items():
        if count > LOGIN_FAILED_THRESHOLD:
            writer.writerow([ip, count])
            suspicious_found = True
    if not suspicious_found:
        writer.writerow(["None", "None"])

print("************************************")
print("REQUESTS PER IP-ADDRESS:")
print("************************************")

print(f"{'IP Address':<20} {'count'}")
print("************************************")

for ip, count in ip_requests.most_common():
    print(f"{ip:<20} {count}")

print("\n***********************************")
print("\nMost Accessed IP-ADDRESS :")
print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

print("\n**********************************")
print("\nSuspicious Activity Detected:")
print(f"{'IP Address':<20} {'Failed Attempts'}")
if failed_logins:
    suspicious_found = False
    for ip, count in failed_logins.items():
        if count > LOGIN_FAILED_THRESHOLD:
            print(f"{ip:<20} {count}")
            suspicious_found = True
    if not suspicious_found:
        print(f"{'None':<20}{'None'}")
else:
    print("None")

print("\n*********************************")


************************************
REQUESTS PER IP-ADDRESS:
************************************
IP Address           count
************************************
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

***********************************

Most Accessed IP-ADDRESS :
/login (Accessed 13 times)

**********************************

Suspicious Activity Detected:
IP Address           Failed Attempts
None                None

*********************************
