In [None]:
import numpy as np
# Generate random data
rng = np.random.RandomState(42)
X = rng.randn(100, 20)
n_components = 2

In [None]:
from sklearn.decomposition import PCA
from acpca import ACPCA


# Fit PCA
pca = PCA(n_components=1)
pca_transformed = pca.fit_transform(X)

# Fit ACPCA with L=0.0
acpca = ACPCA(n_components=1, Y=np.zeros((X.shape[0], )), L=0, preprocess=True, use_implicit=False, scale_x=False, scale_y=False, center_x=True, center_y=True)
acpca_transformed = acpca.fit_transform(X)

# Compare the results
np.concatenate([pca_transformed, acpca_transformed], axis=1)

In [None]:
# Load rda dataset into pandas
import pandas as pd
import rdata
import numpy as np

# Load the .rda file
parsed_data = rdata.parser.parse_file("../data/data_example1.rda")

# Convert the R objects to a pandas DataFrame
converted_data = rdata.conversion.convert(parsed_data)

# load to pandas if 'data_example1' , X, Y
data = pd.DataFrame(converted_data['data_example1']['X'])
data.columns= [f'gene_{i}' for i in range(data.shape[1])]
Y = converted_data['data_example1']['Y']
data['batch_labels'] = np.argmax(Y, axis=1)
data['point_annotation'] = converted_data['data_example1']['lab']
data.to_csv('../data/data_example1.csv', index=False)


In [None]:
# Load the .rda file
parsed_data = rdata.parser.parse_file("../data/data_fly_worm.rda")

# Convert the R objects to a pandas DataFrame
converted_data = rdata.conversion.convert(parsed_data)

# load to pandas if 'data_example1' , X, Y
data_1 = pd.DataFrame(converted_data['data_fly_worm']['data_fly'])
data_2 = pd.DataFrame(converted_data['data_fly_worm']['data_worm'])
data_1.columns= converted_data['data_fly_worm']['fly_gene']

data_merged = pd.DataFrame(converted_data['data_fly_worm']['X'])
data_merged['species'] = converted_data['data_fly_worm']['X_species']
data_merged['time'] = converted_data['data_fly_worm']['X_time']
data_merged['Y'] = np.argmax(converted_data['data_fly_worm']['Y'].values, axis=1)
#data_2.columns= converted_data['data_fly_worm']['worm_gene']
#data_2.columns= [f'gene_{i}' for i in range(data_2.shape[1])]

data_merged.to_csv('../data/data_fly_worm_merged.csv', index=False)
#Y1 = pd.DataFrame(converted_data['data_fly_worm']['Y'])

# batch_labels = np.argmax(Y, axis=1)
# data['batch_labels'] = batch_labels
# data.to_csv('../data/data_fly_worm.csv', index=False)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt


data = pd.read_csv('../data/data_example1.csv')

from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X = data.iloc[:, :-2].values
y = data['batch_labels'].values
X_pca_i = pca.fit_transform(X)

plt.scatter(X_pca_i[:, 0], X_pca_i[:, 1], c=y, cmap='viridis')
# annotate each point with time
for i, txt in enumerate(data['point_annotation']):
    plt.annotate(txt, (X_pca_i[i, 0], X_pca_i[i, 1]), fontsize=8)
plt.colorbar()
plt.show()

In [None]:
import sys
import matplotlib.pyplot as plt
import pandas as pd
sys.path.append('/home/vlad/projects/acpca')
from acpca.transform import ACPCA

data = pd.read_csv('../data/data_example1.csv')

X = data.iloc[:, :-2].values
y = data['batch_labels'].values

acpca = ACPCA(n_components=2, Y=y, L=1,center_x=True, align_orientation=True)

X_pca = acpca.fit_transform(X)

plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis')
# annotate each point with time
for i, txt in enumerate(data['point_annotation']):
    plt.annotate(txt, (X_pca[i, 0], X_pca[i, 1]), fontsize=8)
plt.colorbar()
plt.show()

In [None]:
import sys
import matplotlib.pyplot as plt
import pandas as pd
sys.path.append('/home/vlad/projects/acpca')
from acpca.transform import ACPCA

data = pd.read_csv('../data/data_example1.csv')

X = data.iloc[:, :-2].values
y = data['batch_labels'].values

acpca = ACPCA(n_components=2, Y=y, L=-1, use_implicit=True, align_orientation=True,lambda_method='silhouette', kernel='linear', gamma=0.5, preprocess=False, scale_x=True, center_x=True, center_y=True)

X_pca = acpca.fit_transform(X)

plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis')
# annotate each point with time
for i, txt in enumerate(data['point_annotation']):
    plt.annotate(txt, (X_pca[i, 0], X_pca[i, 1]), fontsize=8)
plt.colorbar()
plt.show()

In [None]:
acpca.plot_lambda_optimization()

In [None]:
import numpy as np
from sklearn.decomposition import PCA
from numpy.testing import assert_allclose

rng = np.random.RandomState(42)

X = rng.randn(100, 20)
n_components = 1

# Fit PCA
pca = PCA(n_components=n_components)
pca_transformed = pca.fit_transform(X)

# Fit ACPCA with L=0.0
acpca = ACPCA(n_components=n_components, Y=np.ones((X.shape[0], )), L=0, use_implicit=False, preprocess=True, scale_x=False, center_x=True, center_y=True)

acpca_transformed = acpca.fit_transform(X)



In [None]:
np.concatenate([pca_transformed, acpca_transformed], axis=1)

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv('../data/data_example1.csv')

X = data.iloc[:, :-2].values
y = data['batch_labels'].values

acpca = ACPCA(n_components=2, Y=y, L=0.0)

X_pca = acpca.fit(X).transform(X)

plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis')
# annotate each point with time
for i, txt in enumerate(data['point_annotation']):
    plt.annotate(txt, (X_pca[i, 0], X_pca[i, 1]), fontsize=8)
plt.colorbar()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt


data = pd.read_csv('../data/data_fly_worm_merged.csv')

from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X = data.iloc[:, :-3].values
y = data['species'].map({'fly': 0, 'worm': 1}).values
y2 = data['Y'].values
X_pca = pca.fit_transform(X)

plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis')
# annotate each point with time
for i, txt in enumerate(data['time']):
    plt.annotate(txt, (X_pca[i, 0], X_pca[i, 1]), fontsize=8)
plt.colorbar()
plt.show()

In [None]:
import sys
sys.path.append('/home/vlad/projects/acpca')
from acpca.transform import ACPCA

acpca = ACPCA(
    n_components=2, 
    Y=y, 
    L=-1, 
    lambda_method='original', 
    kernel='linear',
    gamma=0.5,
    align_orientation=True, 
    preprocess=True, 
    use_implicit=True, 
    scale_x=False,
    scale_y=False,
    center_x=True, 
    center_y=True, 
)

X_acpca = acpca.fit(X).transform(X)

# Plot ACPCA
plt.scatter(X_acpca[:, 0], X_acpca[:, 1], c=y2, cmap='viridis')
for i, txt in enumerate(data['time']):
    plt.annotate(txt, (X_acpca[i, 0], X_acpca[i, 1]), fontsize=8)
plt.colorbar()
plt.show()

In [None]:
acpca.plot_lambda_optimization()

In [None]:
import sys
sys.path.append('/home/vlad/projects/acpca')
from acpca.transform import ACPCA
from acpca.utils import create_synthetic_data

X, X_true, Y, batch_labels = create_synthetic_data(
    n_samples=300, n_genes=100, num_batches=10, n_groups=3, random_state=42,
    apply_scale_effect=True,
    apply_dropout_effect=False, 
    apply_technical_noise=False,
    apply_batch_structure=True 
)

In [None]:
import pandas as pd
from sklearn.decomposition import PCA
pd.DataFrame(PCA(n_components=2).fit_transform(X)).plot.scatter(x=0, y=1, c=Y, cmap='viridis')


In [None]:
acpca = ACPCA(n_components=2, Y=batch_labels, L=0, preprocess=True, use_implicit=True, scale_x=False,scale_y=False, center_x=True, center_y=True, lambda_method='original', kernel='linear')
acpca_res = acpca.fit_transform(X)
df_acpca_res = pd.DataFrame(acpca_res)
df_acpca_res.plot.scatter(x=0, y=1, c=Y, cmap='viridis')


In [None]:
acpca = ACPCA(n_components=2, Y=batch_labels, L=-1, preprocess=True, use_implicit=True, scale_x=True,scale_y=True, center_x=True, center_y=True, lambda_method='original', kernel='sigmoid', gamma=0.5)
acpca_res = acpca.fit_transform(X)
df_acpca_res = pd.DataFrame(acpca_res)
df_acpca_res.plot.scatter(x=0, y=1, c=Y, cmap='viridis')


In [None]:
acpca.plot_lambda_optimization()

In [None]:
pd.DataFrame(ACPCA(n_components=2, Y=batch_labels, L=-1, preprocess=False, use_implicit=False, scale_x=True, center_x=True, center_y=True, lambda_method='original', kernel='linear').fit_transform(X)).plot.scatter(x=0, y=1, c=batch_labels, cmap='viridis')
