**Log File Analysis in Python**

**Overview**
This notebook demonstrates log file analysis using Python.
It includes the following steps:
1. Counting requests per IP address.
2. Identifying the most frequently accessed endpoint.
3. Detecting suspicious activity based on failed login attempts.
4. Saving results to a CSV file.


In [2]:
from collections import Counter
from typing import List, Tuple, Dict
import csv

**1. Counting Requests Per IP Address**

In [3]:
def count_requests_per_ip(file_path: str) -> List[Tuple[str, int]]:
    """
    Counts the number of requests per IP address.
    :param file_path: Path to the log file.
    :return: A sorted list of (IP address, request count).
    """
    ip_counter = Counter()

    with open(file_path, 'r') as file:
        for line in file:
            ip = line.split(' ')[0]  # Extract IP from the log line
            ip_counter[ip] += 1

    # Return sorted by request count (descending)
    return ip_counter.most_common()

**2. Finding the Most Frequently Accessed Endpoint**

In [4]:
def find_most_frequent_endpoint(file_path: str) -> Tuple[str, int]:
    """
    Identifies the most frequently accessed endpoint.
    :param file_path: Path to the log file.
    :return: (Most accessed endpoint, access count).
    """
    endpoint_counter = Counter()

    with open(file_path, 'r') as file:
        for line in file:
            try:
                endpoint = line.split(' ')[6]  # Extract endpoint
                endpoint_counter[endpoint] += 1
            except IndexError:
                continue  # Skip malformed lines

    # Return the most common endpoint or a default value if none are found
    return endpoint_counter.most_common(1)[0] if endpoint_counter else ("No Endpoint", 0)


**3. Detecting Suspicious Activity**

In [5]:
def detect_suspicious_activity(file_path: str, threshold: int = 10) -> Dict[str, int]:
    """
    Detects suspicious IPs with failed login attempts exceeding a threshold.
    :param file_path: Path to the log file.
    :param threshold: Threshold for failed login attempts.
    :return: Dictionary of suspicious IPs with their failed login counts.
    """
    failed_attempts = Counter()

    with open(file_path, 'r') as file:
        for line in file:
            if "401" in line or "Invalid credentials" in line:  # Indicators of failed login
                ip = line.split(' ')[0]
                failed_attempts[ip] += 1

    # Filter IPs exceeding the threshold
    return {ip: count for ip, count in failed_attempts.items() if count > threshold}


**4. Saving Results to CSV**

In [6]:
def save_results_to_csv(
    ip_counts: List[Tuple[str, int]],
    most_frequent_endpoint: Tuple[str, int],
    suspicious_ips: Dict[str, int],
    output_file: str = 'log_analysis_results.csv'
):
    """
    Saves the analysis results to a CSV file with a consistent format.
    :param ip_counts: List of (IP address, request count).
    :param most_frequent_endpoint: (Endpoint, access count).
    :param suspicious_ips: Dictionary of suspicious IPs and counts.
    :param output_file: Path to the output CSV file.
    """
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Write section headers and data with consistent structure
        writer.writerow(["Section", "Data"])

        # Requests per IP
        writer.writerow(["Requests per IP"])
        writer.writerow(["IP Address", "Request Count"])
        for ip, count in ip_counts:
            writer.writerow([ip, count])

        # Most Accessed Endpoint
        writer.writerow([])  # Blank row for separation
        writer.writerow(["Most Accessed Endpoint"])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow([most_frequent_endpoint[0], most_frequent_endpoint[1]])

        # Suspicious Activity
        writer.writerow([])  # Blank row for separation
        writer.writerow(["Suspicious Activity"])
        writer.writerow(["IP Address", "Failed Login Count"])
        for ip, count in suspicious_ips.items():
            writer.writerow([ip, count])

    print(f"Results saved to {output_file}")


**5. Main Function**

In [8]:
def main(file_path: str):
    """
    Main function to execute log file analysis.
    :param file_path: Path to the log file.
    """
    print("Processing log file...")

    # Step 1: Count Requests per IP
    ip_counts = count_requests_per_ip(file_path)
    print(f"Processed {len(ip_counts)} unique IP addresses.")

    # Step 2: Identify Most Accessed Endpoint
    most_frequent_endpoint = find_most_frequent_endpoint(file_path)
    print(f"Most Accessed Endpoint: {most_frequent_endpoint[0]} ({most_frequent_endpoint[1]} times)")

    # Step 3: Detect Suspicious Activity
    suspicious_ips = detect_suspicious_activity(file_path)
    print(f"Suspicious IPs detected: {len(suspicious_ips)}")

    # Step 4: Save Results to CSV
    save_results_to_csv(ip_counts, most_frequent_endpoint, suspicious_ips)

log_file_path = 'sample.log'  
main(log_file_path)

Processing log file...
Processed 5 unique IP addresses.
Most Accessed Endpoint: /login (13 times)
Suspicious IPs detected: 0
Results saved to log_analysis_results.csv
