In [29]:
import pyreadr
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr

# Enable automatic conversion between pandas dataframes and R data frames
pandas2ri.activate()

# Import the R packages needed
base = importr('base')
asreml = importr('asreml')

# Load your data from .rds files using pyreadr
ped_result = pyreadr.read_r('data/pedigree_19_23P_1031.rds')
blue_result = pyreadr.read_r('data/blue_yield.rds')

# Extracting the data frames from the results
ped_df = ped_result[None]  # Assuming it's the first and only item in the result
blue_df = blue_result[None]  # Assuming it's the first and only item in the result

# Convert pandas dataframes to R data frames and put them into the R environment
ro.globalenv['ped'] = pandas2ri.py2rpy(ped_df)
ro.globalenv['blue'] = pandas2ri.py2rpy(blue_df)



In [30]:
blue_df

Unnamed: 0,Name,predicted.value,std.error,status,Block,DAM,SIRE,Fam
0,23TC2XM3066,817.157834,55.459393,Estimable,2,ZMN00545,ZMN00392,ZMN00392_ZMN00545
1,23TC2XM4004,736.076043,55.698624,Estimable,2,20W041,20W065-43-2,20W065-43-2_20W041
2,D178070A,369.039247,46.982307,Estimable,6,ZMN00824,ZMN00547,ZMN00547_ZMN00824
3,D178070A,358.845544,39.343070,Estimable,3,ZMN00824,ZMN00547,ZMN00547_ZMN00824
4,D178070A,525.068916,54.857062,Estimable,2,ZMN00824,ZMN00547,ZMN00547_ZMN00824
...,...,...,...,...,...,...,...,...
2779,ZD958,268.765362,15.973387,Estimable,3,ZMN00113,ZMN00337,ZMN00337_ZMN00113
2780,ZD958,510.696306,21.263064,Estimable,1,ZMN00113,ZMN00337,ZMN00337_ZMN00113
2781,ZD958,314.059170,13.043818,Estimable,4,ZMN00113,ZMN00337,ZMN00337_ZMN00113
2782,ZD958,333.884811,20.252772,Estimable,7,ZMN00113,ZMN00337,ZMN00337_ZMN00113


In [31]:
ped_df

Unnamed: 0_level_0,Name,FGenoID,MGenoID
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,DMY1F,0,0
3,DH605F,0,0
4,PHHJC,0,0
5,PH4CV,0,0
6,ZMN01635,0,0
...,...,...,...
2537,D2284159,ZMN01648,ZMN00735
2559,D2284296,ZMN01705,ZMN00735
2680,D2284308,ZMN01707,ZMN00735
27721,D2284109,ZMN01638,ZMN00735


In [32]:
# Convert factors as in your R code
ro.r('''
ainv <- asreml::ainverse(ped)
blue$Name <- as.factor(blue$Name)
blue <- merge(blue, ped, by = "Name")
asr2 <- asreml::asreml(predicted.value ~ 1,
                       random = ~ vm(Name, ainv),
                       residual = ~ idv(units),
                       data = blue)
''')

# To extract results from the asr2 model in R to Python
varcomp = ro.r('summary(asr2)$varcomp')
g = ro.r('coef(asr2)$random')



Pedigree  order (P2): Individual "WG603" moved to record 1
Pedigree  order (P1): Individual "PH4CV" moved to record 1
Pedigree  order (P2): Individual "WG5603" moved to record 2


In [33]:
# Converting R objects back to pandas DataFrames or numpy arrays if needed
import pandas as pd
# Extract row names from the R object
g_row_names = ro.r('rownames(coef(asr2)$random)')


g_df = pd.DataFrame(g)

# Convert the row names to a Python list
g_row_names_py = list(g_row_names)

# Assign the row names to the pandas DataFrame index
g_df.index = g_row_names_py
g_df['Name'] = [name.split('_')[1] for name in g_df.index]

g_df.columns = ['Value', 'Name']
g_df = g_df[['Name', 'Value']]
g_df

Unnamed: 0,Name,Value
"vm(Name, ainv)_PH4CV",PH4CV,0.000000
"vm(Name, ainv)_WG5603",WG5603,0.000000
"vm(Name, ainv)_WG603",WG603,0.000000
"vm(Name, ainv)_DMY1F",DMY1F,0.000000
"vm(Name, ainv)_DH605F",DH605F,-23.800054
...,...,...
"vm(Name, ainv)_D2284159",D2284159,16.686925
"vm(Name, ainv)_D2284296",D2284296,14.130782
"vm(Name, ainv)_D2284308",D2284308,16.686925
"vm(Name, ainv)_D2284109",D2284109,16.201840


In [34]:
import gblup
gblup.run_asreml_ped_gblup(ped_df,blue_df)

Pedigree  order (P2): Individual "WG603" moved to record 1
Pedigree  order (P1): Individual "PH4CV" moved to record 1
Pedigree  order (P2): Individual "WG5603" moved to record 2


Unnamed: 0,Name,Value
"vm(Name, ainv)_PH4CV",PH4CV,0.000000
"vm(Name, ainv)_WG5603",WG5603,0.000000
"vm(Name, ainv)_WG603",WG603,0.000000
"vm(Name, ainv)_DMY1F",DMY1F,0.000000
"vm(Name, ainv)_DH605F",DH605F,-23.800054
...,...,...
"vm(Name, ainv)_D2284159",D2284159,16.686925
"vm(Name, ainv)_D2284296",D2284296,14.130782
"vm(Name, ainv)_D2284308",D2284308,16.686925
"vm(Name, ainv)_D2284109",D2284109,16.201840


In [35]:
import  numpy as np
G_matrix = np.random.randint(0, 3, size=(6000, 8366))

# Convert the numpy matrix to a pandas DataFrame for easier manipulation and visualization
G_matrix = pd.DataFrame(G_matrix)

# Optionally, name the columns and index
G_matrix.columns = [f"marker_{i+1}" for i in range(G_matrix.shape[1])]
G_matrix.index = [f"IID_{i+1}" for i in range(G_matrix.shape[0])]
G_matrix

Unnamed: 0,marker_1,marker_2,marker_3,marker_4,marker_5,marker_6,marker_7,marker_8,marker_9,marker_10,...,marker_8357,marker_8358,marker_8359,marker_8360,marker_8361,marker_8362,marker_8363,marker_8364,marker_8365,marker_8366
IID_1,1,2,0,0,1,2,1,2,0,1,...,0,2,0,0,0,1,0,2,1,2
IID_2,2,0,0,2,0,2,1,1,2,1,...,2,2,2,0,2,1,2,0,2,1
IID_3,2,2,1,2,0,0,1,2,1,0,...,0,2,1,1,0,0,1,2,0,1
IID_4,0,1,0,0,0,1,0,1,0,1,...,2,2,0,0,2,1,1,1,1,0
IID_5,0,2,1,2,1,2,0,2,0,2,...,1,0,2,1,1,2,1,2,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IID_5996,1,1,1,1,2,1,0,1,0,2,...,2,1,1,0,1,2,0,2,1,1
IID_5997,1,1,2,2,0,1,1,1,2,0,...,0,1,2,0,1,0,2,1,1,2
IID_5998,2,2,1,0,2,1,1,1,1,1,...,2,0,1,2,0,2,0,2,1,1
IID_5999,2,2,1,2,0,0,1,0,0,0,...,0,2,1,0,1,0,1,1,2,2


In [36]:
# Pass the adjusted DataFrame into the R environment
ro.globalenv['G_matrix'] = pandas2ri.py2rpy(G_matrix)

# Verify by printing the first few rows in R
# Ensure that the dimnames (row and column names) are correctly set in R
ro.r('''
dimnames(G_matrix) <- list(rownames(G_matrix), colnames(G_matrix))
''')

ro.r('head(G_matrix)')

Unnamed: 0,marker_1,marker_2,marker_3,marker_4,marker_5,marker_6,marker_7,marker_8,marker_9,marker_10,...,marker_8357,marker_8358,marker_8359,marker_8360,marker_8361,marker_8362,marker_8363,marker_8364,marker_8365,marker_8366
IID_1,1,2,0,0,1,2,1,2,0,1,...,0,2,0,0,0,1,0,2,1,2
IID_2,2,0,0,2,0,2,1,1,2,1,...,2,2,2,0,2,1,2,0,2,1
IID_3,2,2,1,2,0,0,1,2,1,0,...,0,2,1,1,0,0,1,2,0,1
IID_4,0,1,0,0,0,1,0,1,0,1,...,2,2,0,0,2,1,1,1,1,0
IID_5,0,2,1,2,1,2,0,2,0,2,...,1,0,2,1,1,2,1,2,1,0
IID_6,0,0,1,0,0,2,0,0,2,1,...,2,1,0,2,1,0,0,1,0,2


In [37]:
blue_df = pd.DataFrame({
    "Name": ["IID_3", "IID_4", "IID_5"],
    "Value": [2.4, 2.4, 5.3]
})



In [None]:
ro.globalenv['G_matrix'] = pandas2ri.py2rpy(G_matrix)

# Verify by printing the first few rows in R
# Ensure that the dimnames (row and column names) are correctly set in R
ro.r('''
dimnames(G_matrix) <- list(rownames(G_matrix), colnames(G_matrix))
''')

ro.globalenv['blue'] = pandas2ri.py2rpy(blue_df)
#ro.r('rownames(blue)')
ro.r('''
library(asreml)
library(ASRgenomics)
library(tidyverse)
G_matrix <- as.matrix(G_matrix)
Gmat = G.matrix(G_matrix)$G
rownames(Gmat) <- rownames(G_matrix)
colnames(Gmat) <- rownames(G_matrix)
diag(Gmat) = diag(Gmat) + 0.01
ginv = G.inverse(Gmat,sparseform = T)
ginv = ginv$Ginv.sparse

attr(ginv,"rowNames") %>% head

blue$Name <- as.factor(blue$Name)


asr2 <- asreml(Value ~ 1,
               random = ~ vm(Name,ginv), 
               residual = ~idv(units),
               data = blue)
summary(asr2)$varcomp

''')
g = ro.r('coef(asr2)$random')

g_row_names = ro.r('rownames(coef(asr2)$random)')

g_df = pd.DataFrame(g)

# Convert the row names to a Python list
g_row_names_py = list(g_row_names)

# Assign the row names to the pandas DataFrame index
g_df.index = g_row_names_py
g_df['Name'] = [name.split('_')[1:] for name in g_df.index]
g_df.columns = ['Value', 'Name']
g_df = g_df[['Name', 'Value']]

Initial data: 
	Number of Individuals: 6000 
	Number of Markers: 8366 

Missing data check: 
	Total SNPs: 8366 
	 0 SNPs dropped due to missing data threshold of 0.5 
	Total of: 8366  SNPs 

MAF check: 
	No SNPs with MAF below 0 

Heterozigosity data check: 
	No SNPs with heterozygosity, missing threshold of =  0 

Summary check: 
	Initial:  8366 SNPs 
	Final:  8366  SNPs ( 0  SNPs removed) 
 
Completed! Time = 470.51  seconds 


R[write to console]: Reciprocal conditional number for original matrix is: 5.75725494224797e-05

R[write to console]: Reciprocal conditional number for inverted matrix is: 5.43301590950856e-05

R[write to console]: Inverse of matrix G does not appear to be ill-conditioned.



In [None]:
g_df

In [None]:
import gblup
g_gblup = gblup.run_asreml_G_gblup(G_matrix,blue_df)
g_gblup