# Sentiment Dataset Stamper Demo


This sample creates a tamper-proof dataset history.<br>


## Imports

In [None]:
from datetime import datetime
import json
import os
import pprint
import random
from dotenv import load_dotenv
from vbase import (
    VBaseClient,
    ForwarderCommitmentService,
    VBaseDataset,
    VBaseJsonObject,
)
from aws_utils import (
    create_s3_client_from_env,
    write_s3_object,
)

#  Install vBase requirements.
!pip install git+https://github.com/validityBase/vbase-py.git
!wget --no-clobber https://raw.githubusercontent.com/validityBase/vbase-py-samples-collab/main/samples/collab_utils.py

## Configuration

The producer's sovereign cryptographic identity.

In [None]:
PK = "0xabfc6c981e4e9f1f26175bc40aef73248d467617309c5e04e83da34171999076"

The dataset name.

In [None]:
DATASET_NAME = "sentiment_dataset_" + datetime.now().strftime("%Y%m%d%H%M%S")

Additional configuration.

In [None]:
BUCKET_NAME = "vbase-test"
N_TIME_PERIODS = 10
FOLDER_NAME = "samples/sentiment_dataset_history/"
DATASET_FOLDER_NAME = FOLDER_NAME + DATASET_NAME
ADDRESS = "0xA401F59d7190E4448Eb60691E3bc78f1Ef03e88C"

## Setup

Load the information necessary to call vBase APIs.

In [None]:
# Initialize the environment using Google Collab secrets, if possible.
try_add_user_secrets_to_env([
    "VBASE_API_KEY",
    "VBASE_FORWARDER_URL",
    "VBASE_COMMITMENT_SERVICE_PRIVATE_KEY",
    "AWS_ACCESS_KEY_ID",
    "AWS_SECRET_ACCESS_KEY"
])
load_dotenv(verbose=True, override=True)
forwarder_url = os.environ.get("VBASE_FORWARDER_URL")
api_key = os.environ.get("VBASE_API_KEY")

Connect to AWS.

In [None]:
boto_client = create_s3_client_from_env()

Connect to vBase.

In [None]:
vbc = VBaseClient(
    ForwarderCommitmentService(
        forwarder_url,
        api_key,
        PK,
    )
)

## Create and Stamp Records

Create the vBase dataset object.

In [None]:
ds = VBaseDataset(vbc, DATASET_NAME, VBaseJsonObject)
print(f"Created dataset: {pprint.pformat(ds.to_dict())}")

Create sample records.

In [None]:
random.seed(1234)
for i_record in range(N_TIME_PERIODS):
    # Create a random record in [0, 100].
    record = json.dumps(
        {
            "AAPL": round(random.random() * 100),
            "MSFT": round(random.random() * 100),
            "TSLA": round(random.random() * 100),
        }
    )
    print(f"Record: {pprint.pformat(record)}")

    # Add the record to the vBase dataset object.
    receipt = ds.add_record(record)
    print(f"Stamp receipt: {pprint.pformat(receipt)}")

    # Save the record.
    write_s3_object(
        boto_client,
        BUCKET_NAME,
        DATASET_FOLDER_NAME,
        f"record_{i_record}.json",
        record,
    )

Display the shareable dataset history URL.

In [None]:
print(
    "Data saved to: "
    "http://vbase-test.s3-website-us-east-1.amazonaws.com/?prefix="
    f"{DATASET_FOLDER_NAME}"
)
print(f"Dataset info: name = {ds.name}, owner = {ds.owner}")

## Summary


Process<br>
* We used only a private key and dataset records as inputs.<br>
* We created a tamper-proof history of dataset records.<br>
* Data was not shared with vBase or any other third party.<br>



Key Implications<br>
* We can produce an easily verifiable dataset record.<br>
* We can selectively share the dataset history.<br>
* The record and all analytics can be independently calculated and verified forever.<br>
