### Enviroment 

In [7]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine, text
import json

### Crear conexion a la BD postgres

In [8]:
credentials = "../credentials.json"

with open(credentials) as f:
    creds = json.load(f)

conn = psycopg2.connect(    
    host=creds["host"],
    database=creds["database"],
    user=creds["user"],
    password=creds["password"],
    port=creds["port"]
)

engine = create_engine(f'postgresql://{creds["user"]}:{creds["password"]}@{creds["host"]}:{creds["port"]}/{creds["database"]}')

In [11]:
# Crear nueva tabla (candidates_wkshop_EDA) con la misma información de la tabla ya enviada (candidates_wkshop_data)
df = pd.read_csv("../data/candidates.csv", low_memory=False, encoding='ISO-8859-1', sep=";")

try:
    with engine.connect() as connection:
        # Paso 1: Eliminar tabla si existe
        connection.execute(text("DROP TABLE IF EXISTS candidates_wkshop_EDA;"))
        table_name = "candidates_wkshop_EDA"  # Nombre de la nueva tablan
        df.to_sql(table_name, engine, if_exists='replace', index=False)
        print(f"Tabla '{table_name}' creada y datos cargados en la base de datos '{creds['database']}'.")

        
        # Paso 2: Crear nueva tabla con estructura y datos
        connection.execute(text("""
            CREATE TABLE candidates_wkshop_EDA 
            AS 
            SELECT * FROM candidates_wkshop_data;
        """))
        
        # Paso 3: Verificar creación
        result = connection.execute(text("""
            SELECT COUNT(*) 
            FROM information_schema.tables 
            WHERE table_name = 'candidates_wkshop_EDA';
        """))
        
        if result.scalar() == 1:
            print("Tabla candidates_wkshop_EDA creada exitosamente!")
            
            # Verificar conteo de registros
            count_original = connection.execute(text("SELECT COUNT(*) FROM candidates_wkshop_data;")).scalar()
            count_copy = connection.execute(text("SELECT COUNT(*) FROM candidates_wkshop_EDA;")).scalar()
            
            print(f"\nRegistros en tabla original: {count_original}")
            print(f"Registros en tabla copia: {count_copy}")
            
            # Mostrar muestra de datos
            sample = pd.read_sql("SELECT * FROM candidates_wkshop_EDA LIMIT 5;", connection)
            print("\nMuestra de la nueva tabla:")
            print(sample)
            
        else:
            print("Error: No se pudo crear la tabla")

except Exception as e:
    print(f"Error durante la creación de la tabla: {str(e)}")
#finally:
#    engine.dispose()

Tabla 'candidates_wkshop_EDA' creada y datos cargados en la base de datos 'candidates_wkshop'.
Tabla candidates_wkshop_EDA creada exitosamente!

Registros en tabla original: 50000
Registros en tabla copia: 50000

Muestra de la nueva tabla:
   First Name   Last Name                      Email Application Date  \
0  Bernadette   Langworth        leonard91@yahoo.com       2021-02-26   
1      Camryn    Reynolds        zelda56@hotmail.com       2021-09-09   
2       Larue      Spinka   okey_schultz41@gmail.com       2020-04-14   
3        Arch      Spinka     elvera_kulas@yahoo.com       2020-10-01   
4       Larue  Altenwerth  minnie.gislason@gmail.com       2020-05-20   

   Country  YOE  Seniority                         Technology  \
0   Norway    2     Intern                      Data Engineer   
1   Panama   10     Intern                      Data Engineer   
2  Belarus    4  Mid-Level                     Client Success   
3  Eritrea   25    Trainee                          QA Manual

In [None]:
df = pd.read_sql("SELECT * FROM candidates_wkshop_data;", engine)

df['Application Date'] = pd.to_datetime(df['Application Date'])
df['Code Challenge Score'] = pd.to_numeric(df['Code Challenge Score'], errors='coerce')
df['Technical Interview Score'] = pd.to_numeric(df['Technical Interview Score'], errors='coerce')

df_hired = df[(df['Code Challenge Score'] >= 7) & (df['Technical Interview Score'] >= 7)]

df_hired.to_sql('candidates_wkshop_EDA', engine, if_exists='replace', index=False)
print("Datos limpios y preparación completada.")

Datos limpios y preparación compleyada.
