# Quantum Spatial Transcriptomics Framework Demo

This notebook demonstrates the Quantum Spatial Transcriptomics framework for predicting spatial hotspots of gene expression using variational quantum algorithms.

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Add the src directory to the path
sys.path.append('../src')

# Import our modules
import preprocess
import encoding
import circuits
from run_vqe import SpatialVQE
from run_qaoa import SpatialQAOA
import visualize

## 1. Generate Synthetic Data

For demonstration purposes, we'll create a synthetic spatial transcriptomics dataset to test our quantum algorithms.

In [None]:
# Create data directory if it doesn't exist
if not os.path.exists('../data'):
    os.makedirs('../data')

# Set random seed for reproducibility
np.random.seed(42)

# Create a 10x10 grid of spots (100 spots total)
n_spots = 100
grid_size = int(np.sqrt(n_spots))
x = np.linspace(0, 1, grid_size)
y = np.linspace(0, 1, grid_size)
X, Y = np.meshgrid(x, y)
coords = np.column_stack((X.flatten(), Y.flatten()))

# Generate gene expression data for 5 genes
n_genes = 5
gene_names = [f'Gene{i+1}' for i in range(n_genes)]

# Create spot IDs
spot_ids = [f'spot_{i+1:03d}' for i in range(n_spots)]

# Create different spatial patterns for each gene
expression_data = np.zeros((n_spots, n_genes))

# Gene1: Gaussian peak in the center
center_x, center_y = 0.5, 0.5
expression_data[:, 0] = np.exp(-10 * ((coords[:, 0] - center_x)**2 + (coords[:, 1] - center_y)**2))

# Gene2: Gradient from left to right
expression_data[:, 1] = coords[:, 0]

# Gene3: Circular pattern
radius = 0.3
dist_from_center = np.sqrt((coords[:, 0] - 0.5)**2 + (coords[:, 1] - 0.5)**2)
expression_data[:, 2] = np.exp(-30 * (dist_from_center - radius)**2)

# Gene4: Two hotspots
hotspot1 = np.exp(-15 * ((coords[:, 0] - 0.2)**2 + (coords[:, 1] - 0.2)**2))
hotspot2 = np.exp(-15 * ((coords[:, 0] - 0.8)**2 + (coords[:, 1] - 0.8)**2))
expression_data[:, 3] = hotspot1 + hotspot2

# Gene5: Random pattern with spatial correlation
from scipy.ndimage import gaussian_filter
random_expr = np.random.rand(grid_size, grid_size)
smoothed_expr = gaussian_filter(random_expr, sigma=1)
expression_data[:, 4] = smoothed_expr.flatten()

# Scale expression values to be more realistic
expression_data = np.round(expression_data * 100)

# Create DataFrame
df = pd.DataFrame(expression_data, columns=gene_names, index=spot_ids)
df['x'] = coords[:, 0]
df['y'] = coords[:, 1]

# Save to CSV
df.to_csv('../data/synthetic_data.csv')

print(f"Created synthetic dataset with {n_spots} spots and {n_genes} genes")
print(f"Data saved to '../data/synthetic_data.csv'")
df.head()

## 2. Visualize Synthetic Data

Let's visualize our synthetic data to better understand the spatial patterns of gene expression.

In [None]:
# Create a figure to visualize all genes
fig, axes = plt.subplots(1, 5, figsize=(20, 4))

for i, gene in enumerate(gene_names):
    ax = axes[i]
    sc = ax.scatter(df['x'], df['y'], c=df[gene], cmap='viridis', s=100, edgecolor='k')
    ax.set_title(gene)
    plt.colorbar(sc, ax=ax)
    ax.set_xlabel('X')
    ax.set_ylabel('Y')

plt.tight_layout()
plt.savefig('../data/synthetic_data_visualization.png', dpi=150, bbox_inches='tight')
plt.show()

## 3. Preprocessing

Now let's use our preprocessing module to prepare the data for quantum analysis.

In [None]:
# Select a specific gene to analyze
target_gene = 'Gene1'  # Gaussian peak pattern

# Preprocess the data
expr_vector, coord_map, n_qubits, index_map = preprocess.process_data(
    '../data/synthetic_data.csv',
    target_gene=target_gene,
    max_spots=16  # Downsample to fit qubit budget
)

print(f"Preprocessed data: vector length {len(expr_vector)}, using {n_qubits} qubits")
print(f"Selected coordinates:")
for i, (x, y) in enumerate(coord_map[:len(expr_vector)]):
    print(f"  Spot {i}: ({x:.2f}, {y:.2f}) - Expression: {expr_vector[i]:.4f}")

## 4. Quantum Encoding

Let's explore the different encoding methods for our data.

In [None]:
# Normalize vector for amplitude encoding
norm_vector = expr_vector / np.linalg.norm(expr_vector)

# Create circuits with different encodings
amp_circuit = encoding.amplitude_encoding(norm_vector, n_qubits)
angle_circuit = encoding.angle_encoding(expr_vector)
binary_circuit = encoding.binary_encoding(expr_vector, threshold=0.5)

# Print circuits
print("Amplitude Encoding:")
print(amp_circuit)

print("\nAngle Encoding:")
print(angle_circuit)

print("\nBinary Encoding:")
print(binary_circuit)

## 5. Create the Hamiltonian

Now we'll create the Hamiltonian operator for our expression values.

In [None]:
# Create Hamiltonian for finding maximum expression location
hamiltonian = encoding.hamiltonian_encoding(expr_vector)
print("Hamiltonian for maximum expression:")
print(hamiltonian)

# Calculate spatial weights for region detection
from scipy.spatial.distance import pdist, squareform
distances = squareform(pdist(coord_map[:len(expr_vector)]))
scale = 0.2 * np.mean(distances)
spatial_weights = np.exp(-distances / scale)
np.fill_diagonal(spatial_weights, 0)

# Create Hamiltonian for region detection
region_hamiltonian = encoding.hamiltonian_for_region_detection(
    expr_vector, spatial_weights, alpha=1.0, beta=0.1
)
print("\nHamiltonian for region detection:")
print(region_hamiltonian)

## 6. VQE for Maximum Expression Prediction

Now we'll use the Variational Quantum Eigensolver (VQE) to find the location with maximum gene expression.

In [None]:
# Create VQE solver
vqe_solver = SpatialVQE(
    '../data/synthetic_data.csv',
    target_gene=target_gene,
    max_spots=16,
    optimizer='cobyla',
    ansatz_depth=2
)

# Skip preprocessing since we already did it
vqe_solver.expr_vector = expr_vector
vqe_solver.coord_map = coord_map
vqe_solver.n_qubits = n_qubits
vqe_solver.index_map = index_map

# Setup and run
vqe_solver.setup()
result = vqe_solver.run()

# Analyze results
max_idx, max_coords, probabilities = vqe_solver.analyze_results()
print(f"Maximum expression found at index {max_idx}")
if max_coords is not None:
    print(f"Coordinates: ({max_coords[0]:.2f}, {max_coords[1]:.2f})")
    
# Visualize
fig = vqe_solver.visualize(probabilities, save_path='../data/vqe_result.png')

## 7. QAOA for Region Detection

Next, we'll use the Quantum Approximate Optimization Algorithm (QAOA) to identify regions of high gene expression.

In [None]:
# Create QAOA solver
qaoa_solver = SpatialQAOA(
    '../data/synthetic_data.csv',
    target_gene=target_gene,
    max_spots=16,
    optimizer='cobyla',
    p_steps=1,
    alpha=1.0,
    beta=0.1
)

# Skip preprocessing since we already did it
qaoa_solver.expr_vector = expr_vector
qaoa_solver.coord_map = coord_map
qaoa_solver.n_qubits = n_qubits
qaoa_solver.index_map = index_map

# Calculate spatial weights
qaoa_solver._calculate_spatial_weights()

# Setup and run
qaoa_solver.setup()
result = qaoa_solver.run()

# Analyze results
region_mask, region_indices, probabilities = qaoa_solver.analyze_results(threshold=0.4)
print(f"Identified region with {np.sum(region_mask)} spots")

# Visualize
fig = qaoa_solver.visualize(region_mask, probabilities=probabilities, save_path='../data/qaoa_result.png')

## 8. Multi-Panel Visualization

Finally, let's create a comprehensive visualization of all our results.

In [None]:
# Create multi-panel visualization
fig = visualize.create_multi_panel_visualization(
    coord_map[:len(expr_vector)], 
    expr_vector, 
    probabilities, 
    region_mask
)

plt.suptitle(f"Quantum Analysis of {target_gene} Expression", fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig('../data/complete_analysis.png', dpi=200, bbox_inches='tight')
plt.show()

## 9. Save Results

Let's save all our results to a CSV file for further analysis.

In [None]:
# Save results to CSV
results_df = visualize.save_results_to_csv(
    '../data/quantum_analysis_results.csv',
    coord_map[:len(expr_vector)],
    expr_vector,
    probabilities,
    region_mask,
    index_map
)

# Display results
results_df.head()

## 10. Conclusion

In this notebook, we've demonstrated how to use our Quantum Spatial Transcriptomics framework to:

1. Preprocess spatial transcriptomics data
2. Encode the data into quantum circuits
3. Create Hamiltonians that capture the gene expression patterns
4. Use VQE to find the location with maximum gene expression
5. Use QAOA to identify regions of high gene expression
6. Visualize and analyze the results

This framework provides a foundation for applying quantum algorithms to spatial transcriptomics data analysis, with potential for extension to more complex scenarios as quantum hardware advances.