In [None]:
%load_ext autoreload
%matplotlib inline
%autoreload 2

In [None]:
import os
import re
import io
import ast
import sys
import json
import boto3 
import pymongo 
import subprocess
import numpy as np
import pandas as pd
import seaborn as sns

from matplotlib import cm
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from mpl_toolkits.mplot3d import Axes3D


from PIL import Image
from urllib import request
from collections import Counter

sys.path.append("../")
import cabutils

In [None]:
sns.set_style("darkgrid")
sns.set_context("notebook")

In [None]:
from matplotlib import rcParams
rcParams["font.size"] = 18
rcParams["figure.titlesize"] = 26

In [None]:
projName = "mlve"
experiment = "tdw"
experimentName = f"{experiment}_surface-normals"
S3_BUCKET_NAME = "mlve-v1"

To download data (@Yoni only), run: 
ssh -i /Users/yoni/Desktop/cocosci_pem/Cocosci_WebExperiments.pem -nNL 8000:localhost:27017  ubuntu@ec2-3-90-78-57.compute-1.amazonaws.com

# Downloading data from ec2 server (mostly just instructions for thomas)

In `settings.conf` change the `MONGODB_PORT` to 8000, and the `MONGODB_HOST` to `localhost`. Then run the ssh port into the ec2 server: 

```
ssh -i path/to/pem/key/maybe-named-something-like/Cocosci_WebExperiments.pem -fNL 8000:localhost:27017 ubuntu@ec2-54-91-252-25.compute-1.amazonaws.com
```

Change the path to the pem key, but otherwise this should all stay the same.

In [None]:
conn = cabutils.get_db_connection()
db = conn[projName + "_outputs"]
col = db[experimentName]

In [None]:
def results_to_df():
    results = []
    cursor = col.find({})
    for document in cursor:
        results.append(document)
    
    df = pd.DataFrame(results)
    return df

df = results_to_df()

In [None]:
for i, x in df[df["trial_type"] == "survey-text"].iterrows():
    print(x["response"])

In [None]:
df.info()

In [None]:
df.drop("trial_type", axis=1, inplace=True)
df.drop("rt", axis=1, inplace=True)
df.drop("response", axis=1, inplace=True)
df.drop("inputid", axis=1, inplace=True)

In [None]:
df.drop(df[df["indicatorFinalDirection"].isna() == True].index, inplace=True)

In [None]:
df.drop(df[df["trueArrowDirection"].apply(lambda x: x == [None, None, None]) == True].index, inplace=True)

In [None]:
def download_from_s3(url, resource_type="image"):
    s3 = boto3.resource('s3', region_name="us-east-2")
    bucket = s3.Bucket(S3_BUCKET_NAME)
    item = bucket.Object(url)
    if resource_type == "image":
        file_stream = io.BytesIO()
        item.download_fileobj(file_stream)
        img = Image.open(file_stream)
        return img
    
    else:
        return item

In [None]:
def download_from_url(url):
    obj = request.urlretrieve(url)
    image = Image.open(obj[0])
    return image

In [None]:
def draw_circles(ax, locs, size=10, edgecolor="red", facecolor="none", lw=2):
    """
    draws circle patches on an image at specified locations
    """
    for point in locs:
        circ = patches.Circle((point[1], point[0]), size, ec=edgecolor, fc=facecolor, lw=lw)
        ax.add_patch(circ)
    return ax


In [None]:
def cos_similarity(a, b):
    a = np.array(a)
    b = np.array(b)
    return a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [None]:
def geodesic_dist(a, b):
    a = np.array(a)
    b = np.array(b)
    if not a.shape or not b.shape:
        print("No data for: ", a, b)
        return float("nan")

    a = a / np.linalg.norm(a)
    b = b / np.linalg.norm(b)
    
    dist = np.arccos(a.dot(b))
    return dist

### Check user consistency

In [None]:
potential_fails = []
for user, rows in df.groupby("userID"):
    attention_checks = rows[rows["attention_check"]]
    ifd = attention_checks["indicatorFinalDirection"].values
    tad = attention_checks["trueArrowDirection"].values
    
    dists = [geodesic_dist(a, b) for a, b in zip(ifd, tad)]
    score = np.mean(dists)
    if score > 1:
        print(user, score)
        potential_fails.append(user)
print(potential_fails)

In [None]:
userID = "611cc87fae0fb55e133eca3d"
print(len(df[df["userID"] == userID]))
print(df[df["userID"] == userID]["geodesic_distance"].median())

In [None]:
df["geodesic_distance"] = df.apply(lambda x: geodesic_dist(x["indicatorFinalDirection"], x["trueArrowDirection"]), axis=1)
df["cosine_similarity"] = df.apply(lambda x: cos_similarity(x["indicatorFinalDirection"], x["trueArrowDirection"]), axis=1)

In [None]:
df[df["userID"] == "5dafea4de40355001651fa2f"]["geodesic_distance"]

In [None]:
for user in potential_fails:
    print(df[df["userID"] == user]["geodesic_distance"].mean(), user)

In [None]:
geo_errors = []
n_users = 0
for user, rows in df.groupby("userID"):
    geo_errors.append(rows["geodesic_distance"].mean())
    n_users += 1
    
plt.hist(geo_errors, bins=n_users )
plt.show()
    

In [None]:

fig, axs = plt.subplots(1, 2, figsize=(18,9))
sns.histplot(df["geodesic_distance"], ax=axs[0])
sns.histplot(df["cosine_similarity"], ax=axs[1])

axs[0].set_title(f"Geodesic error across all trials", fontsize=18)
axs[1].set_title(f"Cosine similarity across all trials", fontsize=18)
plt.suptitle(f"{experimentName}")

plt.show()

# Generate cleaned version

In [None]:
df.head()

In [None]:
participants = df.groupby("userID")
participants_failed = []
i = 0 
batch_idxs = []
for index, user_results in participants:
    i += 1
    batch_idxs.append(user_results.iloc[0]["batch_idx"])
    
    if len(user_results) < 100:
        print("USER ID: " + user_results["userID"].iloc[0] + " did not finish the experiment")
        participants_failed.append(user_results["userID"].iloc[0])
        continue
    
    if user_results["geodesic_distance"].mean() > 1.5:
        print("USER ID: " + user_results["userID"].iloc[0] + " did not do too well")
        continue
        
    attention_checks = user_results[user_results["attention_check"]] == True
    attention_score = attention_checks["geodesic_distance"].mean()
    if attention_score > 1:
        print("USER ID: " + user_results["userID"].iloc[0] + "scored: " + attention_score)
        participants_failed.append(user_results["userID"].iloc[0])

print(participants_failed)

# failed_participants = df["userID"].apply(lambda x: x in participants_failed)
# df = df[~failed_participants]
# attention_checks = df["stimulus"].apply(lambda x: "ground_truth" in x)
# df = df[~attention_checks]

# my_data = df["userID"] == "yoni_test2"
# df = df[~my_data]

In [None]:
attention_indexes = df[df["attention_check"] == True].index
df.drop(attention_indexes, axis=0, inplace=True)

In [None]:
df.info()

# To-Do:

2. Check consistency
3. Rank participants by score
4. Check which trials had the lowest score

## Visualization Ideas

4. Each image w/ probe locations --> put em in a zip
3. Heatmap on a sphere?
1. Download image -- overlay subject estimates with arrow
6. Correlate (X, Y) with estimate + error
5. NSD --> Check participant distribution (histogram, heatmap)
7. Surface normal UNet / model?

### long shot vis
1. Compare distribution of histograms across datasets

## Plot the distribution of surface normals

In [None]:
indicator_directions = np.array([x for x in df["indicatorFinalDirection"]])
true_directions = np.array([x for x in df["trueArrowDirection"]])

In [None]:
### from tqdm.notebook import tqdm

def near( p, pntList, d0 ):
    cnt=0
    for pj in pntList:
        dist=geodesic_dist(p, pj)
        if dist < d0:
            cnt += 1 - dist/d0
    return cnt


"""
https://stackoverflow.com/questions/22128909/plotting-the-temperature-distribution-on-a-sphere-with-python
"""

def calculate_heatmap(pointList, threshold=0.1):

    u = np.linspace( 0, 2 * np.pi, 60)
    v = np.linspace( 0, np.pi, 60 )

    # create the sphere surface
    XX = 10 * np.outer( np.cos( u ), np.sin( v ) )
    YY = 10 * np.outer( np.sin( u ), np.sin( v ) )
    ZZ = 10 * np.outer( np.ones( np.size( u ) ), np.cos( v ) )

    pbar = tqdm(total = len(XX) * len(XX[0]))

    WW = XX.copy()
    for i in range( len( XX ) ):
        for j in range( len( XX[0] ) ):
            x = XX[ i, j ]
            y = YY[ i, j ]
            z = ZZ[ i, j ]
            dist = near(np.array( [x, y, z ] ), pointList, threshold)
            WW[ i, j ] = dist
            pbar.update(1)

    heatmap = WW / np.amax( WW )
    return heatmap, (XX, YY, ZZ)

In [None]:
participant_heatmap, spherical_coords = calculate_heatmap(indicator_directions)
if experimentName != "nsd_surface-normals":
    gt_heatmap, spherical_coords = calculate_heatmap(true_directions)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(16, 8))
g = sns.histplot(true_directions, ax=axs[0])
axs[0].legend(["x","y","z"])
axs[0].set_title("True Surface Normal Distribution")

g = sns.histplot(indicator_directions, ax=axs[1])
axs[1].legend(["x","y","z"])
axs[1].set_title("Participant Surface Normal Distribution")
plt.suptitle(f"{experimentName}")

## Visualize the distribution of sampled points on images

In [None]:
plot_images = False

In [None]:
if plot_images:
    save_dir = os.path.join(f"figures/{experimentName}")
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    sns.set_style("white")
    for url, row in df.groupby("imageURL"):
        point_locs = row["arrowPixelPosition"]
        locs = set()
        for point in point_locs:
            locs.add(tuple(point))

        locs = list(locs)
        image = download_from_url(url)
        plt.close()
        fig,ax = plt.subplots(1, figsize=(12, 12))
        ax.set_aspect('equal')

        ax = draw_circles(ax, locs, size=15, lw=4)
        ax = draw_circles(ax, locs, size=10, lw=4)

        ax.imshow(image)
    #     pixelPositions = row["arrowPixelPosition"]
    #     idfs = row["indicatorFinalDirection"]
    #     for i in range(len(pixelPositions)):
    #         loc = pixelPositions.iloc[i]
    #         ifd = idfs.iloc[i]
    #         r = lambda: np.random.randint(0,255)
    #         color = '#%02X%02X%02X' % (r(),r(),r())
    #         ax.quiver(loc[1], loc[0], ifd[0], ifd[2], scale=ifd[1], color=color, angles="xy", scale_units='y')

        save_title = url.split("/")[-1]
        plt.savefig(os.path.join(save_dir, save_title))
        plt.show()


    sns.set_style("darkgrid")   

In [None]:
indicator_directions.shape, heatmap.shape

In [None]:

fig = go.Figure(data=[go.Scatter3d(x=spherical_coords[0].flatten(), 
                                   y=spherical_coords[1].flatten(), 
                                   z=spherical_coords[2].flatten(),
                                   mode='markers',
                                   marker=dict(
                                    size=5,
                                    color=participant_heatmap.flatten(),                # set color to an array/list of desired values
                                    colorscale='inferno',   # choose a colorscale
                                    opacity=0.8
                                ))])
fig.update_layout({"title": f"{experimentName}: Participant Surface Normal Distribution"})

fig.show()


In [None]:

fig = go.Figure(data=[go.Scatter3d(x=spherical_coords[0].flatten(), 
                                   y=spherical_coords[1].flatten(), 
                                   z=spherical_coords[2].flatten(),
                                   mode='markers',
                                   marker=dict(
                                    size=3,
                                    color=gt_heatmap.flatten() + np.random.random(3600),                # set color to an array/list of desired values
                                    colorscale='inferno',   # choose a colorscale
                                    opacity=0.8
                                ))])
fig.update_layout({"title": f"{experimentName}: Participant Surface Normal Distribution"})

fig.show()


In [None]:
fig, axs = plt.subplots(3, 1, figsize=(12, 12))
for i in range(3):
    axs[i].scatter(indicator_directions[:, i], true_directions[:, i])
    print(np.corrcoef(indicator_directions[:, i], true_directions[:, i]))
plt.show()


In [None]:
fig = plt.figure(figsize=(16,12))
ax = fig.add_subplot(projection='3d')

ax.scatter(indicator_directions[:, 0], indicator_directions[:, 1], indicator_directions[:, 2], color="red", alpha=0.7)
ax.scatter(true_directions[:, 0], true_directions[:, 1], true_directions[:, 2], color="blue")

plt.show()

In [None]:

fig = go.Figure(data=[go.Scatter3d(x=indicator_directions[:, 0], 
                                   y=indicator_directions[:, 1], 
                                   z=indicator_directions[:, 2],
                                   mode='markers',
                                   marker=dict(
                                    color="blue",
                                    size=5,
                                    opacity=0.8
                                ))])
fig.update_layout({"title": f"{experimentName}: Participant Surface Normal Distribution"})
fig.show()

fig = go.Figure(data=[go.Scatter3d(x=indicator_directions[:, 0], 
                                   y=indicator_directions[:, 1], 
                                   z=indicator_directions[:, 2],
                                   mode='markers',
                                   marker=dict(
                                    color="red",
                                    size=5,
                                    colorscale='Viridis',   # choose a colorscale
                                    opacity=0.8
                                ))])
fig.update_layout({"title": f"{experimentName}: Participant Surface Normal Distribution"})



fig.show()


In [None]:

fig = go.Figure(data=[go.Scatter3d(x=true_directions[:, 0], 
                                   y=true_directions[:, 1], 
                                   z=true_directions[:, 2],
                                   mode='markers',
                                   marker=dict(
                                    size=5,
                                    color=true_directions[:, 0],                # set color to an array/list of desired values
                                    colorscale='Viridis',   # choose a colorscale
                                    opacity=0.8
                                ))])

fig.update_layout({"title": f"{experimentName}: True Surface Normal Distribution"})
fig.show()

In [None]:
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1, projection='3d')

ax.plot_surface( XX, YY,  ZZ, cstride=1, rstride=1, facecolors=cm.jet( heatmap ) )
plt.show() 


In [None]:

fig = go.Figure(data=[go.Scatter3d(x=true_directions[:, 0], 
                                   y=true_directions[:, 1], 
                                   z=true_directions[:, 2],
                                   mode='markers',
                                   marker=dict(
                                    size=5,
                                    color=heatmap,                # set color to an array/list of desired values
                                    colorscale='Viridis',   # choose a colorscale
                                    opacity=0.8
                                ))])

fig.update_layout({"title": f"{experimentName}: True Surface Normal Distribution"})
fig.show()

In [None]:
x = np.linspace(-4, 4, 6)
y = np.linspace(-4, 4, 6)
X, Y = np.meshgrid(x, y)
U = X + Y
V = Y - X

# plot
fig, ax = plt.subplots()

ax.quiver(X, Y, U, V, color="C0", angles='xy',
          scale_units='xy', scale=5, width=.015)

ax.set(xlim=(-5, 5), ylim=(-5, 5))

plt.show()