<a href="https://colab.research.google.com/github/tusharraja/PP/blob/main/sha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
with open('test_file.txt', 'w') as f:
    f.write('This is a test file for checksum calculation.')
    f.write('It contains some arbitrary data....')

In [None]:
import hashlib

def calculate_md5(filepath):
    hasher = hashlib.md5()
    with open(filepath, 'rb') as f:
        while True:
            chunk = f.read(4096)
            if not chunk:
                break
            hasher.update(chunk)
    return hasher.hexdigest()

def calculate_sha512(filepath):
    hasher = hashlib.sha512()
    with open(filepath, 'rb') as f:
        while True:
            chunk = f.read(4096)
            if not chunk:
                break
            hasher.update(chunk)
    return hasher.hexdigest()

filename = 'test_file.txt'

md5_checksum = calculate_md5(filename)
sha512_checksum = calculate_sha512(filename)

print(f"MD5 Checksum of {filename}: {md5_checksum}")
print(f"SHA512 Checksum of {filename}: {sha512_checksum}")

MD5 Checksum of test_file.txt: ad092983500636912dc8552192400fea
SHA512 Checksum of test_file.txt: 84f223a5628442f145e661e5ea6b6ebb6f610d60507eb7ffa1bd40f6f85112f6d6ad22c2295d68f811646327cc78fc30a246ca9d0193b91cd88101c9fc2463f4


In [None]:
import hashlib

def calculate_sha512(filepath):
    hasher = hashlib.sha512()
    with open(filepath, 'rb') as f:
        while True:
            chunk = f.read(4096)
            if not chunk:
                break
            hasher.update(chunk)
    return hasher.hexdigest()

filename = 'test_file.txt'
known_good_sha512 = 'b29c4bbd1ba91f384929bae47a9516dcb5782daa7c83feab2e6ccfc88b4718acaf2b8b09374762ba8aea0f40eac4f9cd5dc40e1b7b472911db8bb8ca47a36832'

current_sha512 = calculate_sha512(filename)

if current_sha512 == known_good_sha512:
    print(f"The file '{filename}' has not been tampered with. SHA512: {current_sha512}")
else:
    print(f"WARNING: The file '{filename}' may have been tampered with!")
    print(f"Known good SHA512: {known_good_sha512}")
    print(f"Current SHA512:    {current_sha512}")


with open('sha512_checksum.txt', 'w') as f:
    f.write(current_sha512)
print(f"Current SHA512 checksum saved to sha512_checksum.txt")

Known good SHA512: b29c4bbd1ba91f384929bae47a9516dcb5782daa7c83feab2e6ccfc88b4718acaf2b8b09374762ba8aea0f40eac4f9cd5dc40e1b7b472911db8bb8ca47a36832
Current SHA512:    84f223a5628442f145e661e5ea6b6ebb6f610d60507eb7ffa1bd40f6f85112f6d6ad22c2295d68f811646327cc78fc30a246ca9d0193b91cd88101c9fc2463f4
Current SHA512 checksum saved to sha512_checksum.txt


In [1]:
import hashlib

def calculate_sha512(filepath):
    hasher = hashlib.sha512()
    with open(filepath, 'rb') as f:
        while True:
            chunk = f.read(4096)
            if not chunk:
                break
            hasher.update(chunk)
    return hasher.hexdigest()

filename = 'test_file.txt'
current_sha512 = calculate_sha512(filename)

output_filename = 'checksum.sha'
with open(output_filename, 'w') as f:
    f.write(current_sha512)
print(f"SHA512 checksum saved to {output_filename}")

SHA512 checksum saved to checksum.sha


In [2]:
with open('checksum.sha', 'r') as f:
    content = f.read()
print(f"Content of checksum.sha:\n{content}")

Content of checksum.sha:
84f223a5628442f145e661e5ea6b6ebb6f610d60507eb7ffa1bd40f6f85112f6d6ad22c2295d68f811646327cc78fc30a246ca9d0193b91cd88101c9fc2463f4


In [7]:
import hashlib
import os
from datetime import datetime

def compute_file_hash(filepath, hash_algorithm='sha256'):

    hash_algorithms = {
        'md5': hashlib.md5(),
        'sha1': hashlib.sha1(),
        'sha256': hashlib.sha256(),
        'sha512': hashlib.sha512()
    }

    if hash_algorithm.lower() not in hash_algorithms:
        raise ValueError(f"Unsupported hash algorithm: {hash_algorithm}")

    hash_obj = hash_algorithms[hash_algorithm.lower()]

    try:
        with open(filepath, 'rb') as file:
            chunk_size = 8192  # 8KB
            while chunk := file.read(chunk_size):
                hash_obj.update(chunk)

        return hash_obj.hexdigest()

    except FileNotFoundError:
        raise FileNotFoundError(f"File not found: {filepath}")
    except Exception as e:
        raise Exception(f"Error computing hash: {e}")

def generate_all_hashes(filepath):

    algorithms = ['md5', 'sha1', 'sha256', 'sha512']
    hashes = {}

    for algorithm in algorithms:
        try:
            hashes[algorithm.upper()] = compute_file_hash(filepath, algorithm)
        except Exception as e:
            hashes[algorithm.upper()] = f"Error: {e}"

    return hashes

def create_hash_report(filepath, report_filename='hash_report.txt'):

    try:
        file_size = os.path.getsize(filepath)
        filename = os.path.basename(filepath)
        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        hashes = generate_all_hashes(filepath)

        with open(report_filename, 'w') as report_file:

            report_file.write("FILE HASH INTEGRITY REPORT\n")

            report_file.write(f"Filename: {filename}\n")
            report_file.write(f"Filepath: {filepath}\n")
            report_file.write(f"File Size: {file_size} bytes\n")
            report_file.write(f"Report Generated: {current_time}\n\n")

            report_file.write("HASH VALUES:\n")


            for algorithm, hash_value in hashes.items():
                report_file.write(f"{algorithm:>8}: {hash_value}\n")



        print(f"Hash report generated: {report_filename}")
        return hashes

    except Exception as e:
        print(f"Error generating hash report: {e}")
        return None

def create_checksum_file(filepath, hash_algorithm='sha256'):

    try:
        hash_value = compute_file_hash(filepath, hash_algorithm)
        filename = os.path.basename(filepath)
        checksum_filename = f"{filepath}.{hash_algorithm.lower()}"

        with open(checksum_filename, 'w') as checksum_file:
            checksum_file.write(f"{hash_value}  {filename}\n")

        print(f"Checksum file created: {checksum_filename}")
        return checksum_filename

    except Exception as e:
        print(f"Error creating checksum file: {e}")
        return None

def create_all_checksum_files(filepath):

    algorithms = ['md5', 'sha1', 'sha256', 'sha512']
    checksum_files = {}

    print(f"Creating checksum files for: {filepath}")

    for algorithm in algorithms:
        checksum_file = create_checksum_file(filepath, algorithm)
        if checksum_file:
            checksum_files[algorithm.upper()] = checksum_file

    return checksum_files

def verify_checksum(checksum_filepath):

    try:
        algorithm = checksum_filepath.split('.')[-1].lower()
        if algorithm not in ['md5', 'sha1', 'sha256', 'sha512']:
            return False, "Unknown hash algorithm in checksum file"

        with open(checksum_filepath, 'r') as checksum_file:
            line = checksum_file.read().strip()
            parts = line.split('  ')

            if len(parts) != 2:
                return False, "Invalid checksum file format"

            stored_hash = parts[0].strip()
            original_filename = parts[1].strip()

        checksum_dir = os.path.dirname(checksum_filepath)
        if checksum_dir == '':
            checksum_dir = '.'
        original_filepath = os.path.join(checksum_dir, original_filename)

        if not os.path.exists(original_filepath):
            return False, f"Original file not found: {original_filename}"

        current_hash = compute_file_hash(original_filepath, algorithm)

        if stored_hash.lower() == current_hash.lower():
            return True, f"Checksum OK (Authentic) - {algorithm.upper()}"
        else:
            return False, f"Checksum FAILED (Tampered) - {algorithm.upper()}"

    except Exception as e:
        return False, f"Error during verification: {e}"

def verify_file_integrity(filepath, verbose=True):

    algorithms = ['md5', 'sha1', 'sha256', 'sha512']
    results = {}

    if verbose:
        print(f"\nVerifying integrity of: {filepath}")


    for algorithm in algorithms:
        checksum_file = f"{filepath}.{algorithm}"

        if os.path.exists(checksum_file):
            is_valid, message = verify_checksum(checksum_file)
            results[algorithm.upper()] = {
                'valid': is_valid,
                'message': message,
                'checksum_file': checksum_file
            }

            if verbose:
                status = "yes" if is_valid else "no"
                print(f"{status} {algorithm.upper()}: {message}")
        else:
            if verbose:
                print(f"? {algorithm.upper()}: No checksum file found ({checksum_file})")

    return results

def display_verification_summary(results):

    if not results:
        print("\nNo verification results to display.")
        return

    valid_count = sum(1 for result in results.values() if result['valid'])
    total_count = len(results)

    print(f"\nVerification Summary:")
    print(f"Valid checksums: {valid_count}/{total_count}")

    if valid_count == total_count:
        print(" FILE INTEGRITY VERIFIED: All checksums match.")
    elif valid_count == 0:
        print(" TAMPERING DETECTED: All checksums failed.")
    else:
        print(f" PARTIAL INTEGRITY: {valid_count} out of {total_count} checksums valid.")

def run_experiment():


    print("HASH FUNCTION FILE INTEGRITY VERIFICATION EXPERIMENT")



    test_file = "test_file.txt"
    test_content = """Gukesh Dommaraju is an Indian chess grandmaster and the reigning World Chess Champion. A chess prodigy, Gukesh is the youngest undisputed world champion, the youngest player to have surpassed a FIDE rating of 2750, doing so at the age of 17, and the third-youngest to have surpassed 2700 Elo at the age of 16. Wikipedia
Born: 29 May 2006 (age 19 years), Chennai
Education: Velammal Vidyalaya Ayanambakkam
FIDE rating: 2752 (October 2025)
Peak ranking: No. 3 (March 2025)
Peak rating: 2794 (October 2024)
Title: Grandmaster (2019)
World Champion: 2024â€“present
"""

    with open(test_file, 'w') as f:
        f.write(test_content)

    print(f"\n Test file created")
    print(f"   File: {test_file}")
    print(f"   Size: {os.path.getsize(test_file)} bytes")


    print(f"\nGenerating hash report...")
    hashes = create_hash_report(test_file, 'hash_report.txt')

    print("\n   Generated hashes:")
    for alg, hash_val in hashes.items():
        print(f"   {alg:>6}: {hash_val}")


    print(f"\n Creating checksum files...")
    create_all_checksum_files(test_file)


    print(f"\nInitial (authentic file)")
    initial_results = verify_file_integrity(test_file)
    display_verification_summary(initial_results)


    print(f"\n File tampering simulation...")
    print("   Adding malicious content to file...")

    with open(test_file, 'a') as f:
        f.write("\n\n[@321412MKAFSMKFAMSCS()(!(!@()!#@&&^^&**(^%$#$ABDJASJDASJFIJIQUFJF)))]")

    print(f"   File tampered!")
    print(f"   New size: {os.path.getsize(test_file)} bytes")


    print(f"\n Verification after tampering")
    tampered_results = verify_file_integrity(test_file)
    display_verification_summary(tampered_results)




    print("\nBEFORE TAMPERING:")
    for alg, result in initial_results.items():
        status = " AUTHENTIC" if result['valid'] else " FAILED"
        print(f"  {alg:>6}: {status}")

    print("\nAFTER TAMPERING:")
    for alg, result in tampered_results.items():
        status = " AUTHENTIC" if result['valid'] else " TAMPERED"
        print(f"  {alg:>6}: {status}")

    print(f"\nCONCLUSION:")
    print("All hash algorithms successfully detected file tampering!")
    print("Even minor changes result in completely different hash values.")

    print(f"\nGENERATED FILES:")
    print(f" {test_file} (test file)")
    print(f" hash_report.txt (comprehensive report)")
    print(f" {test_file}.md5 (MD5 checksum)")
    print(f" {test_file}.sha1 (SHA-1 checksum)")
    print(f" {test_file}.sha256 (SHA-256 checksum)")
    print(f" {test_file}.sha512 (SHA-512 checksum)")

if __name__ == "__main__":
    run_experiment()


HASH FUNCTION FILE INTEGRITY VERIFICATION EXPERIMENT

 Test file created
   File: test_file.txt
   Size: 561 bytes

Generating hash report...
Hash report generated: hash_report.txt

   Generated hashes:
      MD5: df5624fe6748df2e0b07165222f7a8e4
     SHA1: fef2b7bb452ede91488d0cef930c926a8f4c0812
   SHA256: d20076ac9f18df4e6e0b0e5b5843e9065db849f277e87f9ed40137d0ee356760
   SHA512: 147d922056c874c4c90e8e42c75865f28dc88c29dbcc6d7f4f4181d509b5ad55f8b32808b89fb01c547f94e3aa7848e27335d1d8cea5e68ff0b26f2f8ae1fbf5

 Creating checksum files...
Creating checksum files for: test_file.txt
Checksum file created: test_file.txt.md5
Checksum file created: test_file.txt.sha1
Checksum file created: test_file.txt.sha256
Checksum file created: test_file.txt.sha512

Initial (authentic file)

Verifying integrity of: test_file.txt
yes MD5: Checksum OK (Authentic) - MD5
yes SHA1: Checksum OK (Authentic) - SHA1
yes SHA256: Checksum OK (Authentic) - SHA256
yes SHA512: Checksum OK (Authentic) - SHA512

Verifi