In [None]:
import torch
import numpy as np

from pandas.api.types import is_numeric_dtype

import matplotlib.pyplot as plt
%matplotlib widget

In [None]:
# Check that GPU is available, and that pyTorch is using it
!nvidia-smi
print(f'Torch is using CUDA: {torch.cuda.is_available()}')

In [None]:
from dataportal import DataportalClient

token = ''
client = DataportalClient(token)
list(client.fromDataset('5Gdata').listFiles())[:3]

In [None]:
df_raw = client.getData('1')
df_raw

In [None]:
# Extract all columns with numeric types
numeric_cols = []
for col in df_raw.columns:
    if is_numeric_dtype(df_raw[col].dtype):
        numeric_cols.append(col)
df = df_raw[numeric_cols]

df.info(memory_usage='deep')

In [None]:
# Cast dataframe into a torch tensor on GPU
X = torch.from_numpy(df.to_numpy('float64')).to('cuda')

# Standardize data to zero mean, unit variance
X_mean = torch.mean(X, 0)
X_std = torch.std(X, 0)
for (i, x) in enumerate(X_std):
    if x == 0:
        X_std[i] = torch.tensor([1])
X_norm = (X - X_mean) / X_std

In [None]:
# Calculate the correlation between columns
X_corr = torch.corrcoef(torch.t(X_norm))

# Save to memory from GPU and then plot the results
plt.matshow(X_corr.cpu())
plt.title('Correlation between columns')
plt.show()

In [None]:
# Perform singular value decomposition, and cast results to memory from GPU
U, V, S = torch.linalg.svd(X_norm, full_matrices=False)
T = (U*V).cpu()

# Plot the singular values
plt.figure()
plt.stem(V.cpu())
plt.title('Singular values')
plt.show()

# Plot a subset of datapoints in 2D corresponding to the 2 largest singular value
nbrprint = 10000
points = np.random.choice(T.shape[0], nbrprint, replace=False)

plt.figure()
plt.plot(T[points, 0], T[points, 1], '.')
plt.title('Data reduced from 26 to 2 dimensions')
plt.show()