In [4]:
# Install required libraries
!pip install pandas
!pip install requests

import pandas as pd
import requests
import hashlib
import os

def download_dataset(url, filename):
    """Download a dataset from a URL and save it to a local file."""
    response = requests.get(url)
    with open(filename, 'w') as file:
        file.write(response.text)
    print(f"Dataset downloaded and saved to {filename}")

def load_dataset(filename):
    """Load a dataset from a local file into a pandas DataFrame."""
    return pd.read_csv(filename)

def hash_row(row):
    """Calculate the SHA256 hash of a row."""
    row_string = ','.join(row.values.astype(str))
    row_bytes = row_string.encode('utf-8')
    return hashlib.sha256(row_bytes).hexdigest()

def add_hash_column(df):
    """Add a hash column to the DataFrame and return the list of hashes."""
    hash_list = df.apply(hash_row, axis=1)
    df['row_hash'] = hash_list
    return df, hash_list.tolist()

def save_dataset(df, filename):
    """Save the DataFrame to a CSV file."""
    df.to_csv(filename, index=False)
    print(f"DataFrame with hashes saved to {filename}")

# List of dataset URLs
dataset_urls = [
    "https://storage-devolvedai.s3.amazonaws.com/web-app-test/datasets/65bb4fb1dd8a7e5c56e05f49/md/95e4c701-efeb-4de5-a3b4-6cf8da8e4e0a-iris.md"
    # Add more dataset URLs here
]

# Directory to save datasets
os.makedirs('datasets', exist_ok=True)

# Process each dataset
for url in dataset_urls:
    # Extract filename from URL
    filename = os.path.join('datasets', os.path.basename(url))

    # Download, load, hash, and save the dataset
    download_dataset(url, filename)
    df = load_dataset(filename)
    df_hashed, hash_list = add_hash_column(df)
    save_dataset(df_hashed, filename.replace('.md', '_hashed.csv'))

    # Print the list of hash values for each row
    print("List of hash values for each row:")
    for h in hash_list:
        print(h)


Dataset downloaded and saved to datasets/95e4c701-efeb-4de5-a3b4-6cf8da8e4e0a-iris.md
DataFrame with hashes saved to datasets/95e4c701-efeb-4de5-a3b4-6cf8da8e4e0a-iris_hashed.csv
List of hash values for each row:
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0
9b2d5b4