In [1]:
# Importing Necessary Libraries to compete data acquisition
import os
import hashlib
import subprocess

In [2]:
def write_sha256_hash(file_path, sha_file_path):
    """Compute SHA256 hash of a file and write it to a .sha file."""
    with open(file_path, "rb") as f:
        file_content = f.read()
        sha256hash = hashlib.sha256(file_content).hexdigest()

    with open(sha_file_path, "w") as f:
        f.write(sha256hash)
    print(f"SHA256 hash written to {sha_file_path}")

# Function to download, move, unzip datasets and check their SHA hash
def download_and_move_dataset(dataset_identifier, dataset_directory='data'):
    # Extract dataset name from the identifier
    dataset_name = dataset_identifier.split('/')[-1]
    
    # Kaggle command to download dataset to the specified path
    subprocess.run(['kaggle', 'datasets', 'download', '-d', dataset_identifier, '--path', dataset_directory, '--unzip'], check=True)
    
    # The dataset might contain multiple files, so we calculate the SHA hash for each
    for filename in os.listdir(dataset_directory):
        # Check if the filename ends with typical file formats instead
        if filename.endswith(('.csv', '.json')):
            file_path = os.path.join(dataset_directory, filename)
            sha_file_path = os.path.join(dataset_directory, f"{filename}.sha")
            write_sha256_hash(file_path, sha_file_path)

datasets = ['zynicide/wine-reviews', 'elvinrustam/wine-dataset']
for dataset in datasets:
    download_and_move_dataset(dataset)

Dataset URL: https://www.kaggle.com/datasets/zynicide/wine-reviews
License(s): CC-BY-NC-SA-4.0
Downloading wine-reviews.zip to data


100%|██████████| 50.9M/50.9M [00:02<00:00, 19.7MB/s]



SHA256 hash written to data/winemag-data-130k-v2.csv.sha
SHA256 hash written to data/winemag-data-130k-v2.json.sha
SHA256 hash written to data/winemag-data_first150k.csv.sha
Dataset URL: https://www.kaggle.com/datasets/elvinrustam/wine-dataset
License(s): CC0-1.0
Downloading wine-dataset.zip to data

SHA256 hash written to data/WineDataset.csv.sha


100%|██████████| 308k/308k [00:00<00:00, 5.61MB/s]


SHA256 hash written to data/winemag-data-130k-v2.csv.sha
SHA256 hash written to data/winemag-data-130k-v2.json.sha
SHA256 hash written to data/winemag-data_first150k.csv.sha
