# WHOIS Domain Age Checker

This code needs to be check and cited: https://github.com/stephensheridan/python-domain-age/blob/master/domain_age.py

In [None]:
# Author: Stephen Sheridan
# Date: 14/03/2019

# Whois example output : as can be seen whois output format is not standardised

# Domain:                  itb.ie
# Registration Date:       11-January-1999
# Domain Name: yahoo.com
# Creation Date: 1995-01-18T00:00:00-0800
# domain:        BIT-EXCHANGER.RU
# created:       2016-07-04T15:30:24Z
# Domain name:
#         naturesaid.co.uk
#     Relevant dates:
#         Registered on: 25-Oct-1999

# NOTE: cannot grep with word "created" as it appears in more than one place in whois info

import subprocess
from datetime  import *
from domain_tests import *
import dateutil.parser as p

#url1 = "itb.ie"
url1 = "night-fever.it"

def getDaysAlive(url):
    """
    Uses whois and grep to return number of days a domain has been alive based
    on the created/registered date.

    Parameters:
    argument1 (String): domain to check

    Returns:
    int: -1 No match from whois - domain does not exist
         -2 whois didn't return what we expected - no reg date
         -3 there was a problem parsing a valid date from the whois data

    Note1: This function is dependant on whois and grep being available form the command line
    Note2: This function is also dependant on the dateutil library: pip install py-dateutil
   """
    # Use grep to strip out the part of the output that we need
    grep_filter = " | grep -E \"Registration Date|Registered on|Creation Date|created:|Created:|Registration Time:|No match for domain\""

    # Call the whois and pipe the output to grep
    whois_data = subprocess.Popen("whois " + url + grep_filter, shell=True, stdout=subprocess.PIPE).stdout.read()

    # whois could not find a match for the domain - doesn't exist
    if ("No match for domain" in whois_data):
        return -1

    # Split the output based on carriage returns (each line of output from grep)
    whois_data = whois_data.strip().split('\n')

    # Only one date entry found - should be two ?? (Server followed by creation date of domain)
    if (len(whois_data) == 1):
        return -2

    # Try to parse a datetime object out of the string
    # NOTE: we are assuming that the last entry in the list returned from whois and grep
    # will be the actual registration/creation data of the domain in question: whois_data[-1]
    try:
        # Fingers crossed we get a valid date out of the string
        reg_date =  p.parse(whois_data[-1].lower(), fuzzy=True)
    except:
        return -3

    # Get datetime stamp based on NOW!
    today = datetime.today()
    # Timezone and no timezones can cause problems when comparing
    # Strip timezone info from each datetime object (not a very good idea - fudge!!)
    today = today.replace(tzinfo=None)
    reg_date = reg_date.replace(tzinfo=None)
    # Return the days alive (Diff between dates)
    return today - reg_date


# Test the function with a list of domain names ...........
failed = []
for domain in test_domains:
    days_alive = getDaysAlive(domain)
    if (days_alive == -1):
        failed.append(domain)
    print domain + " days alive = " + str(getDaysAlive(domain))

print "No. of domain names tested: " + str(len(test_domains))
print "No. of failures: " + len(failures)

# HTTP Secuirity Header Checker 

Still need to be developed 

# Basic Website security tool

Based on this website: https://www.freecodecamp.org/news/build-a-web-application-security-scanner-with-python/

Sql Injection, Cross -site scripting, sensitive information exposure, basic authentication weakness 

This code doesnt work in a jupyter environmwent, needs to be converted into python and experimented with. 

In [3]:
!pip install requests beautifulsoup4 urllib3 colorama

Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting beautifulsoup4
  Downloading beautifulsoup4-4.14.3-py3-none-any.whl.metadata (3.8 kB)
Collecting urllib3
  Downloading urllib3-2.6.3-py3-none-any.whl.metadata (6.9 kB)
Collecting charset_normalizer<4,>=2 (from requests)
  Downloading charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl.metadata (38 kB)
Collecting idna<4,>=2.5 (from requests)
  Downloading idna-3.11-py3-none-any.whl.metadata (8.4 kB)
Collecting certifi>=2017.4.17 (from requests)
  Downloading certifi-2026.1.4-py3-none-any.whl.metadata (2.5 kB)
Collecting soupsieve>=1.6.1 (from beautifulsoup4)
  Downloading soupsieve-2.8.3-py3-none-any.whl.metadata (4.6 kB)
Downloading requests-2.32.5-py3-none-any.whl (64 kB)
   ---------------------------------------- 0.0/64.7 kB ? eta -:--:--
   ---------------------------------------- 64.7/64.7 kB 3.4 MB/s eta 0:00:00
Downloading beautifulsoup4-4.14.3-py3-none-any.whl (107 kB)
   ------------


[notice] A new release of pip is available: 24.0 -> 26.0.1
[notice] To update, run: C:\Users\Josiah\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [4]:
#required packages 
import requests 
from bs4 import BeautifulSoup 
import urllib.parse 
import colorama 
import re 
from concurrent.futures import ThreadPoolExecutor 
import sys 
from typing import List, Dict, Set 

In [5]:
# building out the bread and butter class 
class WebSecurityScanner:
    def __init__(self, target_url: str, max_depth: int = 3):
        """
        Initialize the security scanner with a target URL and maximum crawl depth.

        Args:
            target_url: The base URL to scan
            max_depth: Maximum depth for crawling links (default: 3)
        """
        self.target_url = target_url
        self.max_depth = max_depth
        self.visited_urls: Set[str] = set()
        self.vulnerabilities: List[Dict] = []
        self.session = requests.Session()

        # Initialize colorama for cross-platform colored output
        colorama.init()

    def normalize_url(self, url: str) -> str:
        """Normalize the URL to prevent duplicate checks"""
        parsed = urllib.parse.urlparse(url)
        return f"{parsed.scheme}://{parsed.netloc}{parsed.path}"

    # checking content on the page 
    def crawl(self, url: str, depth: int = 0) -> None:
        """
        Crawl the website to discover pages and endpoints.

        Args:
            url: Current URL to crawl
            depth: Current depth in the crawl tree
        """
        if depth > self.max_depth or url in self.visited_urls:
            return

        try:
            self.visited_urls.add(url)
            response = self.session.get(url, verify=False)
            soup = BeautifulSoup(response.text, 'html.parser')

            # Find all links in the page
            links = soup.find_all('a', href=True)
            for link in links:
                next_url = urllib.parse.urljoin(url, link['href'])
                if next_url.startswith(self.target_url):
                    self.crawl(next_url, depth + 1)

        except Exception as e:
            print(f"Error crawling {url}: {str(e)}")

    def check_sql_injection(self, url: str) -> None:
        """Test for potential SQL injection vulnerabilities"""
        sql_payloads = ["'", "1' OR '1'='1", "' OR 1=1--", "' UNION SELECT NULL--"]

        for payload in sql_payloads:
            try:
                # Test GET parameters
                parsed = urllib.parse.urlparse(url)
                params = urllib.parse.parse_qs(parsed.query)

                for param in params:
                    test_url = url.replace(
                        f"{param}={params[param][0]}",
                        f"{param}={payload}"
                    )
                    response = self.session.get(test_url)

                    # Look for SQL error messages
                    if any(error in response.text.lower() for error in
                           ['sql', 'mysql', 'sqlite', 'postgresql', 'oracle']):
                        self.report_vulnerability({
                            'type': 'SQL Injection',
                            'url': url,
                            'parameter': param,
                            'payload': payload
                        })

            except Exception as e:
                print(f"Error testing SQL injection on {url}: {str(e)}")

    def check_xss(self, url: str) -> None:
        """Test for potential Cross-Site Scripting vulnerabilities"""
        xss_payloads = [
            "<script>alert('XSS')</script>",
            "<img src=x onerror=alert('XSS')>",
            "javascript:alert('XSS')"
        ]

        for payload in xss_payloads:
            try:
                # Test GET parameters
                parsed = urllib.parse.urlparse(url)
                params = urllib.parse.parse_qs(parsed.query)

                for param in params:
                    test_url = url.replace(
                        f"{param}={params[param][0]}",
                        f"{param}={urllib.parse.quote(payload)}"
                    )
                    response = self.session.get(test_url)

                    if payload in response.text:
                        self.report_vulnerability({
                            'type': 'Cross-Site Scripting (XSS)',
                            'url': url,
                            'parameter': param,
                            'payload': payload
                        })

            except Exception as e:
                print(f"Error testing XSS on {url}: {str(e)}")

    def check_sensitive_info(self, url: str) -> None:
        """Check for exposed sensitive information"""
        sensitive_patterns = {
            'email': r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
            'phone': r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b',
            'ssn': r'\b\d{3}-\d{2}-\d{4}\b',
            'api_key': r'api[_-]?key[_-]?([\'"|`])([a-zA-Z0-9]{32,45})\1'
        }

        try:
            response = self.session.get(url)

            for info_type, pattern in sensitive_patterns.items():
                matches = re.finditer(pattern, response.text)
                for match in matches:
                    self.report_vulnerability({
                        'type': 'Sensitive Information Exposure',
                        'url': url,
                        'info_type': info_type,
                        'pattern': pattern
                    })

        except Exception as e:
            print(f"Error checking sensitive information on {url}: {str(e)}")

    def scan(self) -> List[Dict]:
        """
        Main scanning method that coordinates the security checks

        Returns:
            List of discovered vulnerabilities
        """
        print(
            f"\n{colorama.Fore.BLUE}Starting security scan of "
            f"{self.target_url}{colorama.Style.RESET_ALL}\n"
        )

        # First, crawl the website
        self.crawl(self.target_url)

        # Then run security checks on all discovered URLs
        with ThreadPoolExecutor(max_workers=5) as executor:
            for url in self.visited_urls:
                executor.submit(self.check_sql_injection, url)
                
                executor.submit(self.check_xss, url)
                executor.submit(self.check_sensitive_info, url)

        return self.vulnerabilities

    def report_vulnerability(self, vulnerability: Dict) -> None:
        """Record and display found vulnerabilities"""
        self.vulnerabilities.append(vulnerability)
        print(f"{colorama.Fore.RED}[VULNERABILITY FOUND]{colorama.Style.RESET_ALL}")
        for key, value in vulnerability.items():
            print(f"{key}: {value}")
        print()

In [None]:
if __name__ == "__main__":
    target_url = input("Enter the target URL (e.g., https://example.com): ").strip()

    if not target_url:
        print("Error: No URL provided.")
        sys.exit(1)

    scanner = WebSecurityScanner(target_url)
    vulnerabilities = scanner.scan()

    # Print summary
    print(f"\n{colorama.Fore.GREEN}Scan Complete!{colorama.Style.RESET_ALL}")
    print(f"Total URLs scanned: {len(scanner.visited_urls)}")
    print(f"Vulnerabilities found: {len(vulnerabilities)}")
