In [None]:
import wandb

api = wandb.Api(timeout=19)
sweep = api.sweep('carla-pedestrians/sensitivity/sweeps/j7yihb85')

In [None]:
from pedestrians_scenarios.karma.pose.skeleton import CARLA_SKELETON
import pandas as pd

columns = pd.MultiIndex.from_tuples([('missing_joint_probabilities', k) for k in CARLA_SKELETON.__members__.keys()] + [('F1Score', 'summary'), ('F1Score', 'max')])

In [None]:
import os

if not os.path.exists('sensitivity_analysis.csv'):
    rows = []

    for run in sweep.runs:
        try:
            row = list(run.config['missing_joint_probabilities']) + [run.summary['hp/F1Score'], run.history()['hp/F1Score'].max()]
            rows.append(row)
        except:
            pass

    df = pd.DataFrame(rows, columns=columns)
    df.to_csv('sensitivity_analysis.csv')
else:
    df = pd.read_csv('sensitivity_analysis.csv', skiprows=2, names=columns)

In [None]:
df.head(10)

In [None]:
df.mean().sort_values(ascending=False)

In [None]:
a = [item[1] for item in df.columns.values[:26]]
a


In [None]:
X = df.iloc[:, :26]
y = df.iloc[:, 26]

In [None]:
# decision tree for feature importance on a regression problem
from sklearn.datasets import make_regression
from sklearn.tree import DecisionTreeRegressor
from matplotlib import pyplot
# define dataset
# X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, random_state=1)
# define the model
model = DecisionTreeRegressor()
# fit the model
model.fit(X, y)
# get importance
importance = model.feature_importances_
# summarize feature importance
# for i,v in enumerate(importance):
# 	print('Feature: %0d, Score: %.5f' % (i,v))

# feature_importance = pd.DataFrame({'feature': a, 'importance': model.feature_importances_}).sort_values('importance', ascending=False)
# plot feature importance
# feature_importance.head(2)
# pyplot.bar([x for x in range(len(importance))], importance)
# pyplot.show()

In [None]:
feature_importance = pd.DataFrame({'feature': a, 'importance': model.feature_importances_}).sort_values('importance', ascending=False)

In [None]:
feature_importance.plot.barh(x='feature', y='importance', figsize=(20, 10), legend=False)

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, random_state=1)
model=LinearRegression()

model.fit(X,y)
# importance=model.coef_
# importance=np.sort(importance)
#plotting the features and their score in ascending order
feature_importance_lr = pd.DataFrame({'feature': a, 'importance': model.coef_}).sort_values('importance', ascending=False)

feature_importance_lr.plot.barh(x='feature', y='importance', figsize=(20, 10), legend=False)

In [None]:
import numpy as np

joints = df.values[:, :-2]
f1score = df.values[:, -2:].max(axis=1).reshape(-1, 1)

X = np.concatenate((joints, f1score), axis=1)


In [None]:
from sklearn import cluster, mixture
from sklearn.neighbors import kneighbors_graph

params = {
    "quantile": 0.3,
    "eps": 0.3,
    "damping": 0.9,
    "preference": -200,
    "n_neighbors": 3,
    "n_clusters": 3,
    "min_samples": 7,
    "xi": 0.05,
    "min_cluster_size": 0.1,
}

# estimate bandwidth for mean shift
bandwidth = cluster.estimate_bandwidth(X, quantile=params["quantile"])

# connectivity matrix for structured Ward
connectivity = kneighbors_graph(
    X, n_neighbors=params["n_neighbors"], include_self=False
)
# make connectivity symmetric
connectivity = 0.5 * (connectivity + connectivity.T)

# ============
# Create cluster objects
# ============
ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
two_means = cluster.MiniBatchKMeans(n_clusters=params["n_clusters"])
ward = cluster.AgglomerativeClustering(
    n_clusters=params["n_clusters"], linkage="ward", connectivity=connectivity
)
spectral = cluster.SpectralClustering(
    n_clusters=params["n_clusters"],
    eigen_solver="arpack",
    affinity="nearest_neighbors",
)
dbscan = cluster.DBSCAN(eps=params["eps"])
optics = cluster.OPTICS(
    min_samples=params["min_samples"],
    xi=params["xi"],
    min_cluster_size=params["min_cluster_size"],
)
affinity_propagation = cluster.AffinityPropagation(
    damping=params["damping"], preference=params["preference"], random_state=0
)
average_linkage = cluster.AgglomerativeClustering(
    linkage="average",
    affinity="cityblock",
    n_clusters=params["n_clusters"],
    connectivity=connectivity,
)
birch = cluster.Birch(n_clusters=params["n_clusters"])
gmm = mixture.GaussianMixture(
    n_components=params["n_clusters"], covariance_type="full"
)

clustering_algorithms = (
    ("MiniBatch\nKMeans", two_means),
    ("Affinity\nPropagation", affinity_propagation),
    ("MeanShift", ms),
    ("Spectral\nClustering", spectral),
    ("Ward", ward),
    ("Agglomerative\nClustering", average_linkage),
    ("DBSCAN", dbscan),
    ("OPTICS", optics),
    ("BIRCH", birch),
    ("Gaussian\nMixture", gmm),
)

In [None]:
%matplotlib widget

from itertools import cycle, islice
import numpy as np
import time
import warnings
import matplotlib.pyplot as plt

# plt.subplots_adjust(
#     left=0.02, right=0.98, bottom=0.001, top=0.95, wspace=0.05, hspace=0.5
# )

plot_num = 1
for name, algorithm in clustering_algorithms:
    t0 = time.time()

    # catch warnings related to kneighbors_graph
    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="the number of connected components of the "
            + "connectivity matrix is [0-9]{1,2}"
            + " > 1. Completing it to avoid stopping the tree early.",
            category=UserWarning,
        )
        warnings.filterwarnings(
            "ignore",
            message="Graph is not fully connected, spectral embedding"
            + " may not work as expected.",
            category=UserWarning,
        )
        algorithm.fit(X)

    t1 = time.time()
    if hasattr(algorithm, "labels_"):
        labels = algorithm.labels_.astype(int)
    else:
        labels = algorithm.predict(X)

    colors = np.array(
        list(
            islice(
                cycle(
                    [
                        "#377eb8",
                        "#ff7f00",
                        "#4daf4a",
                        "#f781bf",
                        "#a65628",
                        "#984ea3",
                        "#999999",
                        "#e41a1c",
                        "#dede00",
                    ]
                ),
                int(max(labels) + 1),
            )
        )
    )
    # add black color for outliers (if any)
    colors = np.append(colors, ["#000000"])

    # plt.subplot(4, 3, plot_num)
    # plt.title(name, size=10)
    # plt.scatter(X[:, 0], X[:, -1], s=10, color=colors[labels])

    # plt.xlim(-0.5, 1.5)
    # plt.ylim(0, 1.5)
    # plt.xticks(())
    # plt.yticks(())
    
    plot_num += 1

# plt.show()