# **Crear una base de datos para entrenar el modelo**

In [None]:
import numpy as np
import pandas as pd
from tools import distancia_comovil

## Probar la función *distancia_comovil*

In [None]:
H0 = 70. #km/s/Mpc
Omega_m0 = 0.3 #Parámetro de densidad de materia
Omega_lambda0 = 0.7 #Parámetro de densidad de energía oscura

#Cálculo de la distancia por redshift = 3
distancia_comovil(3., H0, Omega_m0, 1.-Omega_m0)

6360.085385767909

## Generar una base de datos tomando en cuenta un universo abierto o plano

- $Omega_m + \Omega_\Lambda = 1$: Universo **plano**
- $Omega_m + \Omega_\Lambda < 1$: Universo **abierto** (hiperbólico)
- $Omega_m + \Omega_\Lambda > 1$: Universo **cerrado** (esférico)

In [None]:
num_samples = 50000 #Tamaño del dataset
data = [] #Lista para almacenar los datos generados

#Generar datos hasta alacnzar el tamaño del dataset
while len(data) < num_samples:
    #Valores aleatorios para los parámetros
    z = np.random.uniform(0.0, 3.0)
    H0 = np.random.uniform(60, 80)
    Omega_m = np.random.uniform(0.0, 1.0)
    Omega_lambda = np.random.uniform(0.0, 1.0)
    
    if Omega_m + Omega_lambda <= 1.0: #Universo abierto o plano
        try:
            d_comov = distancia_comovil(z, H0, Omega_m, Omega_lambda) #Calcular la distancia comóvil
            data.append([z, H0, Omega_m, Omega_lambda, d_comov]) #Agregar los datos a la lista: [z, H0, Ω_m, Ω_Λ, distancia_comóvil]
        except Exception:
            pass #En caso de error numérico, ignorar y continuar

#Convertir a df y guardarlo en un csv
df = pd.DataFrame(data, columns=['z', 'H0', 'Omega_m', 'Omega_lambda', 'distancia_comovil'])
df.to_csv("resources/bd_distancia_comovil.csv", index=False)
df

Unnamed: 0,z,H0,Omega_m,Omega_lambda,distancia_comovil
0,0.980344,77.258948,0.314412,0.643842,2967.421185
1,1.900220,66.291946,0.132204,0.300051,8048.691480
2,2.388191,63.901488,0.010962,0.373246,15783.988320
3,0.743945,66.519929,0.442077,0.077182,3158.794189
4,2.345171,60.154353,0.613123,0.056029,5694.571886
...,...,...,...,...,...
49995,2.202306,70.934340,0.050854,0.432223,9865.059939
49996,1.366149,74.226408,0.656950,0.165678,3334.714407
49997,1.700273,75.246121,0.000598,0.098985,21071.376589
49998,1.852311,62.879959,0.105529,0.857282,6962.796782


In [None]:
#Rangos de las variables
df.agg(['min', 'max'])

Unnamed: 0,z,H0,Omega_m,Omega_lambda,distancia_comovil
min,9.3e-05,60.000204,2.7e-05,1.8e-05,0.984102
max,2.999935,79.999648,0.999064,0.99619,69596.25355


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   z                  50000 non-null  float64
 1   H0                 50000 non-null  float64
 2   Omega_m            50000 non-null  float64
 3   Omega_lambda       50000 non-null  float64
 4   distancia_comovil  50000 non-null  float64
dtypes: float64(5)
memory usage: 1.9 MB


In [7]:
df.describe()

Unnamed: 0,z,H0,Omega_m,Omega_lambda,distancia_comovil
count,50000.0,50000.0,50000.0,50000.0,50000.0
mean,1.495777,70.020021,0.333671,0.334527,5021.713355
std,0.864135,5.779358,0.235971,0.23645,3323.326042
min,9.3e-05,60.000204,2.7e-05,1.8e-05,0.984102
25%,0.75179,65.037834,0.134336,0.133505,2964.16884
50%,1.494601,70.009496,0.293052,0.294984,4627.584455
75%,2.242156,75.046772,0.498915,0.501965,6372.92404
max,2.999935,79.999648,0.999064,0.99619,69596.25355
