# Importación de datos

In [15]:
import os
import requests
import pandas as pd
## download the dataset
# Directory of the raw data files
_data_root = './data/covertype/original'
# Path to the raw training data
_data_filepath = os.path.join(_data_root, 'covertype_train.csv')
# Download data
os.makedirs(_data_root, exist_ok=True)
if not os.path.isfile(_data_filepath):
   #https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/
   url = 'https://docs.google.com/uc?export= \
   download&confirm={{VALUE}}&id=1lVF1BCWLH4eXXV_YOJzjR7xZjj-wAGj9'
   r = requests.get(url, allow_redirects=True, stream=True)
   open(_data_filepath, 'wb').write(r.content)

In [16]:
df=pd.read_csv(_data_filepath)
df = df.drop(['Wilderness_Area','Soil_Type'], axis=1)
df.head()

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,Cover_Type
0,2991,119,7,67,11,1015,233,234,133,1570,1
1,2876,3,18,485,71,2495,192,202,144,1557,1
2,3171,315,2,277,9,4374,213,237,162,1052,0
3,3087,342,13,190,31,4774,193,221,166,752,0
4,2835,158,10,212,41,3596,231,242,141,3280,1


## Selección de características
1.  Cree un subconjunto de datos que solo contenga las caracterısticas numericas
 para que pueda usarlo en las siguientes secciones.

In [17]:
numeric_features = ['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology', 'Vertical_Distance_To_Hydrology', 
'Horizontal_Distance_To_Roadways', 'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm', 'Horizontal_Distance_To_Fire_Points']

In [18]:
X_model = df[numeric_features]
y_model = df['Cover_Type']

In [19]:
X_model.shape

(116203, 10)

2. Selección de características (Univariate feature Selection)

In [20]:
import pandas as pd
from sklearn.feature_selection import f_classif

# Calculamos los puntajes F y los p-values para cada característica
f_scores, p_values = f_classif(X_model, y_model)

# Creamos un DataFrame con resultados ordenados
results_df = pd.DataFrame({
    'Feature': X_model.columns,
    'F_score': f_scores,
    'p_value': p_values,
    '1_minus_p_value': 1 - p_values
})

# Ordenamos el DataFrame por relevancia (1 - p-value) y p-value
results_df.sort_values(by=['1_minus_p_value', 'p_value'], 
                       ascending=[False, True], 
                       inplace=True)

# Resetear el índice para claridad visual
results_df.reset_index(drop=True, inplace=True)

# Mostramos el resultado
print(results_df)


                              Feature       F_score        p_value  \
0                           Elevation  31087.079657   0.000000e+00   
1                               Slope   1559.369722   0.000000e+00   
2    Horizontal_Distance_To_Hydrology    488.221292   0.000000e+00   
3     Horizontal_Distance_To_Roadways   2050.314700   0.000000e+00   
4                       Hillshade_9am    643.862634   0.000000e+00   
5                      Hillshade_Noon    595.275348   0.000000e+00   
6  Horizontal_Distance_To_Fire_Points   1452.737911   0.000000e+00   
7      Vertical_Distance_To_Hydrology    241.029561  2.053881e-307   
8                       Hillshade_3pm    190.497164  1.671830e-242   
9                              Aspect     88.987497  7.522981e-112   

   1_minus_p_value  
0              1.0  
1              1.0  
2              1.0  
3              1.0  
4              1.0  
5              1.0  
6              1.0  
7              1.0  
8              1.0  
9              1.0 

In [21]:
from sklearn.feature_selection import SelectKBest, f_classif

# Definir y ejecutar SelectKBest
selector_numeric = SelectKBest(score_func=f_classif, k=8)
X_numeric_selected = selector_numeric.fit_transform(X_model, y_model)

# Características seleccionadas (incluidas)
numeric_selected_features = X_model.columns[selector_numeric.get_support()]

# Características excluidas
numeric_excluded_features = X_model.columns[~selector_numeric.get_support()]

# Características Seleccionadas
selected_features_df = pd.DataFrame({
    'Feature': X_model.columns,
    'Selected': selector_numeric.get_support()})

selected_features_df.sort_values('Selected',ascending=False)

# Imprimir claramente los resultados
# print("✅ Características seleccionadas:")
# print(numeric_selected_features.tolist())

# print("\n❌ Características excluidas:")
# print(numeric_excluded_features.tolist())


Unnamed: 0,Feature,Selected
0,Elevation,True
2,Slope,True
3,Horizontal_Distance_To_Hydrology,True
4,Vertical_Distance_To_Hydrology,True
5,Horizontal_Distance_To_Roadways,True
6,Hillshade_9am,True
7,Hillshade_Noon,True
9,Horizontal_Distance_To_Fire_Points,True
1,Aspect,False
8,Hillshade_3pm,False


In [22]:
df_numeric_selected = df[numeric_selected_features]

# Guardar este subconjunto en un nuevo CSV
numeric_selected_path = "/work/notebooks/data/covertype/transformed/covertype_train_numeric_selected.csv"
df_numeric_selected.to_csv(numeric_selected_path, index=False)

print(f"✅ Subconjunto con características numéricas seleccionadas guardado en: {numeric_selected_path}")


✅ Subconjunto con características numéricas seleccionadas guardado en: /work/notebooks/data/covertype/transformed/covertype_train_numeric_selected.csv


### Prueba Modelo Simple (No obligatorio)

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_numeric_selected, y_model, test_size=0.2, random_state=42)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

clf = RandomForestClassifier(n_estimators=500, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.8950561507680392

# Data Pipeline

1. Definir una conexión SQLite

In [23]:
from tfx.orchestration.metadata import sqlite_metadata_connection_config
from tfx.orchestration import metadata

metadata_config = sqlite_metadata_connection_config('/work/notebooks/ml_metadata.sqlite')

with metadata.Metadata(metadata_config) as metadata_connection:
    artifacts = metadata_connection.store.get_artifacts()
    print(artifacts)

[]


In [24]:
with metadata.Metadata(metadata_config) as metadata_connection:
    print(metadata_connection.store.get_artifacts())
    print(metadata_connection.store.get_executions())
    print(metadata_connection.store.get_contexts())


[]
[]
[]


# Uso de ExampleGen para cargar CSV en formato adecuado

In [25]:
from tfx.components import CsvExampleGen
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext

context = InteractiveContext(pipeline_root='/work/notebooks/pipeline')

example_gen = CsvExampleGen(input_base='/work/notebooks/data/covertype/original/')
context.run(example_gen)



0,1
.execution_id,32
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } CsvExampleGen at 0x7fe0fc6f4f10.inputs{}.outputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe0fc5c7b50.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /work/notebooks/pipeline/CsvExampleGen/examples/32) at 0x7fe1c1445520.type<class 'tfx.types.standard_artifacts.Examples'>.uri/work/notebooks/pipeline/CsvExampleGen/examples/32.span0.split_names[""train"", ""eval""].version0.exec_properties['input_base']/work/notebooks/data/covertype/original/['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:6405459,xor_checksum:1740368554,sum_checksum:1740368554"
.component.inputs,{}
.component.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe0fc5c7b50.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /work/notebooks/pipeline/CsvExampleGen/examples/32) at 0x7fe1c1445520.type<class 'tfx.types.standard_artifacts.Examples'>.uri/work/notebooks/pipeline/CsvExampleGen/examples/32.span0.split_names[""train"", ""eval""].version0"

0,1
.inputs,{}
.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe0fc5c7b50.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /work/notebooks/pipeline/CsvExampleGen/examples/32) at 0x7fe1c1445520.type<class 'tfx.types.standard_artifacts.Examples'>.uri/work/notebooks/pipeline/CsvExampleGen/examples/32.span0.split_names[""train"", ""eval""].version0"
.exec_properties,"['input_base']/work/notebooks/data/covertype/original/['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:6405459,xor_checksum:1740368554,sum_checksum:1740368554"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe0fc5c7b50.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /work/notebooks/pipeline/CsvExampleGen/examples/32) at 0x7fe1c1445520.type<class 'tfx.types.standard_artifacts.Examples'>.uri/work/notebooks/pipeline/CsvExampleGen/examples/32.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /work/notebooks/pipeline/CsvExampleGen/examples/32) at 0x7fe1c1445520.type<class 'tfx.types.standard_artifacts.Examples'>.uri/work/notebooks/pipeline/CsvExampleGen/examples/32.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /work/notebooks/pipeline/CsvExampleGen/examples/32) at 0x7fe1c1445520.type<class 'tfx.types.standard_artifacts.Examples'>.uri/work/notebooks/pipeline/CsvExampleGen/examples/32.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/work/notebooks/pipeline/CsvExampleGen/examples/32
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['input_base'],/work/notebooks/data/covertype/original/
['input_config'],"{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }"
['output_config'],"{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }"
['output_data_format'],6
['output_file_format'],5
['custom_config'],
['range_config'],
['span'],0
['version'],
['input_fingerprint'],"split:single_split,num_files:1,total_bytes:6405459,xor_checksum:1740368554,sum_checksum:1740368554"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe0fc5c7b50.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /work/notebooks/pipeline/CsvExampleGen/examples/32) at 0x7fe1c1445520.type<class 'tfx.types.standard_artifacts.Examples'>.uri/work/notebooks/pipeline/CsvExampleGen/examples/32.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /work/notebooks/pipeline/CsvExampleGen/examples/32) at 0x7fe1c1445520.type<class 'tfx.types.standard_artifacts.Examples'>.uri/work/notebooks/pipeline/CsvExampleGen/examples/32.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /work/notebooks/pipeline/CsvExampleGen/examples/32) at 0x7fe1c1445520.type<class 'tfx.types.standard_artifacts.Examples'>.uri/work/notebooks/pipeline/CsvExampleGen/examples/32.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/work/notebooks/pipeline/CsvExampleGen/examples/32
.span,0
.split_names,"[""train"", ""eval""]"
.version,0


# StatisticsGen
Calcular estadísticas sobre los datos

In [26]:
from tfx.components import StatisticsGen

statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
context.run(statistics_gen)
context.show(statistics_gen.outputs['statistics'])

# Inferir el esquema

In [24]:
from tfx.components import SchemaGen

schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True)
context.run(schema_gen)
context.show(schema_gen.outputs['schema'])


Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'Aspect',INT,required,,-
'Cover_Type',INT,required,,-
'Elevation',INT,required,,-
'Hillshade_3pm',INT,required,,-
'Hillshade_9am',INT,required,,-
'Hillshade_Noon',INT,required,,-
'Horizontal_Distance_To_Fire_Points',INT,required,,-
'Horizontal_Distance_To_Hydrology',INT,required,,-
'Horizontal_Distance_To_Roadways',INT,required,,-
'Slope',INT,required,,-


Unnamed: 0_level_0,Values
Domain,Unnamed: 1_level_1
'Soil_Type',"'C2702', 'C2703', 'C2704', 'C2705', 'C2706', 'C2717', 'C3501', 'C3502', 'C4201', 'C4703', 'C4704', 'C4744', 'C4758', 'C5101', 'C6101', 'C6102', 'C6731', 'C7101', 'C7102', 'C7103', 'C7201', 'C7202', 'C7700', 'C7701', 'C7702', 'C7709', 'C7710', 'C7745', 'C7746', 'C7755', 'C7756', 'C7757', 'C7790', 'C8703', 'C8707', 'C8708', 'C8771', 'C8772', 'C8776', 'C5151'"
'Wilderness_Area',"'Cache', 'Commanche', 'Neota', 'Rawah'"


# Curando Esquema

Definición de rangos válidos sujeto a:
 Hillshade 9am: 0 to 255
 Hillshade Noon: 0 to 255
 Slope: 0 to 90
 Cover Type: 0 to 6

In [27]:
import tensorflow_data_validation as tfdv
from tensorflow_metadata.proto.v0 import schema_pb2
import tensorflow as tf
schema = tfdv.load_schema_text('/work/notebooks/pipeline/SchemaGen/schema/3/schema.pbtxt')

# Ajustar rangos numéricos usando schema_pb2:
tfdv.set_domain(schema, 'Hillshade_9am', schema_pb2.IntDomain(min=0, max=255))
tfdv.set_domain(schema, 'Hillshade_Noon', schema_pb2.IntDomain(min=0, max=255))
tfdv.set_domain(schema, 'Slope', schema_pb2.IntDomain(min=0, max=90))

# Cover_Type como categórica:
tfdv.set_domain(schema, 'Cover_Type', schema_pb2.IntDomain(is_categorical=True, min=0, max=6))

# Crear directorio si no existe
tf.io.gfile.makedirs('/work/notebooks/pipeline/SchemaAdj')

# Guardar el esquema ajustado:
tfdv.write_schema_text(schema, '/work/notebooks/pipeline/SchemaAdj/schema.pbtxt')


# Entornos del Esquema

### 1. Validar la estructura del esquema ajustado

In [28]:
import tensorflow_data_validation as tfdv

# Ruta al esquema ajustado
schema_path = "/work/notebooks/pipeline/SchemaAdj/schema.pbtxt"

# Cargar el esquema
schema = tfdv.load_schema_text(schema_path)

# Ver esquema
print(schema)


feature {
  name: "Aspect"
  type: INT
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  shape {
    dim {
      size: 1
    }
  }
}
feature {
  name: "Cover_Type"
  type: INT
  int_domain {
    min: 0
    max: 6
    is_categorical: true
  }
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  shape {
    dim {
      size: 1
    }
  }
}
feature {
  name: "Elevation"
  type: INT
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  shape {
    dim {
      size: 1
    }
  }
}
feature {
  name: "Hillshade_3pm"
  type: INT
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  shape {
    dim {
      size: 1
    }
  }
}
feature {
  name: "Hillshade_9am"
  type: INT
  int_domain {
    min: 0
    max: 255
  }
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  shape {
    dim {
      size: 1
    }
  }
}
feature {
  name: "Hillshade_Noon"
  type: INT
  int_domain {
    min: 0
    max: 255
  }
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  shape {
    dim {

# Simular un conjunto de datos de servicio

In [29]:
df_numeric_selected.columns

Index(['Elevation', 'Slope', 'Horizontal_Distance_To_Hydrology',
       'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways',
       'Hillshade_9am', 'Hillshade_Noon',
       'Horizontal_Distance_To_Fire_Points'],
      dtype='object')

In [30]:
# Crear una copia del dataset seleccionado pero sin la columna 'Cover_Type'
df_inference = df_numeric_selected.copy()

# Guardar el dataset de inferencia (sin Cover_Type)
inference_path = "/work/notebooks/data/covertype/service/covertype_inference.csv"
df_inference.to_csv(inference_path, index=False)

print(f"✅ Conjunto de inferencia guardado en: {inference_path}")

✅ Conjunto de inferencia guardado en: /work/notebooks/data/covertype/service/covertype_inference.csv


# Configurar entornos de desarrollo
Dado que Cover_Type estará presente en entrenamiento pero no en inferencia.

In [31]:
from tensorflow_metadata.proto.v0 import schema_pb2
import tensorflow as tf

# Definir los entornos de esquema (TRAINING e INFERRENCIA/ SERVING)
schema.default_environment.append('TRAINING')
schema.default_environment.append('SERVING')

# Remover 'Cover_Type' del entorno de inferencia (SERVING)
for feature in schema.feature:
    if feature.name == 'Cover_Type':
        feature.not_in_environment.append('SERVING')

# Crear un directorio dedicado para el esquema serving (asegurándose que contenga un único archivo)
serving_schema_dir = '/work/notebooks/pipeline/SchemaServing'
tf.io.gfile.makedirs(serving_schema_dir)

# Guardar el esquema ajustado en ese directorio
schema_serving_path = serving_schema_dir + '/schema_serving.pbtxt'
tfdv.write_schema_text(schema, schema_serving_path)

# Revisar cambios (opcional)
tfdv.display_schema(schema)


Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'Aspect',INT,required,,-
'Cover_Type',INT,required,,min: 0; max: 6
'Elevation',INT,required,,-
'Hillshade_3pm',INT,required,,-
'Hillshade_9am',INT,required,,min: 0; max: 255
'Hillshade_Noon',INT,required,,min: 0; max: 255
'Horizontal_Distance_To_Fire_Points',INT,required,,-
'Horizontal_Distance_To_Hydrology',INT,required,,-
'Horizontal_Distance_To_Roadways',INT,required,,-
'Slope',INT,required,,min: 0; max: 90


Unnamed: 0_level_0,Values
Domain,Unnamed: 1_level_1
'Soil_Type',"'C2702', 'C2703', 'C2704', 'C2705', 'C2706', 'C2717', 'C3501', 'C3502', 'C4201', 'C4703', 'C4704', 'C4744', 'C4758', 'C5101', 'C6101', 'C6102', 'C6731', 'C7101', 'C7102', 'C7103', 'C7201', 'C7202', 'C7700', 'C7701', 'C7702', 'C7709', 'C7710', 'C7745', 'C7746', 'C7755', 'C7756', 'C7757', 'C7790', 'C8703', 'C8707', 'C8708', 'C8771', 'C8772', 'C8776', 'C5151'"
'Wilderness_Area',"'Cache', 'Commanche', 'Neota', 'Rawah'"


# Validar el conjunto de datos de inferencia

In [34]:
# Ingesta del dataset de inferencia (CSV sin 'Cover_Type')
from tfx.components import CsvExampleGen, StatisticsGen, ExampleValidator

serving_example_gen = CsvExampleGen(input_base='/work/notebooks/data/covertype/service/')
context.run(serving_example_gen)

# Generar estadísticas para el conjunto de inferencia
serving_statistics_gen = StatisticsGen(examples=serving_example_gen.outputs['examples'])
context.run(serving_statistics_gen)
context.show(serving_statistics_gen.outputs['statistics'])

# Registrar el esquema de inferencia (SchemaServing)
from tfx.types import standard_artifacts, channel_utils
from tfx.orchestration.metadata import sqlite_metadata_connection_config
from tfx.orchestration import metadata

# Crear el artefacto Schema y asignar la URI al directorio que contiene el archivo de esquema serving
schema_artifact = standard_artifacts.Schema()
schema_artifact.uri = '/work/notebooks/pipeline/SchemaServing'

metadata_config = sqlite_metadata_connection_config('/work/notebooks/pipeline/metadata.sqlite')
with metadata.Metadata(metadata_config) as m:
    # Obtener el type_id para 'Schema'
    artifact_types = m.store.get_artifact_types()
    for at in artifact_types:
        if at.name == schema_artifact.type_name:
            schema_artifact.mlmd_artifact.type_id = at.id
            break
    # Registrar el artefacto en MLMD
    registered_mlmd = m.store.put_artifacts([schema_artifact.mlmd_artifact])
    schema_artifact.id = registered_mlmd[0]
    print("Esquema registrado con id:", schema_artifact.id)


Esquema registrado con id: 59


In [36]:
# Crear el canal a partir del artefacto registrado
schema_channel = channel_utils.as_channel([schema_artifact])

# Validar los datos de inferencia usando el esquema resuelto
from tfx.components import ExampleValidator

serving_validator = ExampleValidator(
    statistics=serving_statistics_gen.outputs['statistics'],
    schema=schema_channel
)
context.run(serving_validator)


0,1
.execution_id,37
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } ExampleValidator at 0x7fe0fe133640.inputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe0fc815820.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /work/notebooks/pipeline/StatisticsGen/statistics/35) at 0x7fe0fc4885b0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/work/notebooks/pipeline/StatisticsGen/statistics/35.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe0fe1332b0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /work/notebooks/pipeline/SchemaServing) at 0x7fe17e65d7f0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/work/notebooks/pipeline/SchemaServing.outputs['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fe0fc488bb0.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /work/notebooks/pipeline/ExampleValidator/anomalies/36) at 0x7fe0fc641a00.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/work/notebooks/pipeline/ExampleValidator/anomalies/36.span0.split_names[""train"", ""eval""].exec_properties['exclude_splits'][]['custom_validation_config']None"
.component.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe0fc815820.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /work/notebooks/pipeline/StatisticsGen/statistics/35) at 0x7fe0fc4885b0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/work/notebooks/pipeline/StatisticsGen/statistics/35.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe0fe1332b0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /work/notebooks/pipeline/SchemaServing) at 0x7fe17e65d7f0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/work/notebooks/pipeline/SchemaServing"
.component.outputs,"['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fe0fc488bb0.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /work/notebooks/pipeline/ExampleValidator/anomalies/36) at 0x7fe0fc641a00.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/work/notebooks/pipeline/ExampleValidator/anomalies/36.span0.split_names[""train"", ""eval""]"

0,1
.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe0fc815820.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /work/notebooks/pipeline/StatisticsGen/statistics/35) at 0x7fe0fc4885b0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/work/notebooks/pipeline/StatisticsGen/statistics/35.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe0fe1332b0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /work/notebooks/pipeline/SchemaServing) at 0x7fe17e65d7f0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/work/notebooks/pipeline/SchemaServing"
.outputs,"['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fe0fc488bb0.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /work/notebooks/pipeline/ExampleValidator/anomalies/36) at 0x7fe0fc641a00.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/work/notebooks/pipeline/ExampleValidator/anomalies/36.span0.split_names[""train"", ""eval""]"
.exec_properties,['exclude_splits'][]['custom_validation_config']None

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe0fc815820.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /work/notebooks/pipeline/StatisticsGen/statistics/35) at 0x7fe0fc4885b0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/work/notebooks/pipeline/StatisticsGen/statistics/35.span0.split_names[""train"", ""eval""]"
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe0fe1332b0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /work/notebooks/pipeline/SchemaServing) at 0x7fe17e65d7f0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/work/notebooks/pipeline/SchemaServing

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /work/notebooks/pipeline/StatisticsGen/statistics/35) at 0x7fe0fc4885b0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/work/notebooks/pipeline/StatisticsGen/statistics/35.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /work/notebooks/pipeline/StatisticsGen/statistics/35) at 0x7fe0fc4885b0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/work/notebooks/pipeline/StatisticsGen/statistics/35.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/work/notebooks/pipeline/StatisticsGen/statistics/35
.span,0
.split_names,"[""train"", ""eval""]"

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /work/notebooks/pipeline/SchemaServing) at 0x7fe17e65d7f0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/work/notebooks/pipeline/SchemaServing

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /work/notebooks/pipeline/SchemaServing) at 0x7fe17e65d7f0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/work/notebooks/pipeline/SchemaServing

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/work/notebooks/pipeline/SchemaServing

0,1
['anomalies'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fe0fc488bb0.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /work/notebooks/pipeline/ExampleValidator/anomalies/36) at 0x7fe0fc641a00.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/work/notebooks/pipeline/ExampleValidator/anomalies/36.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleAnomalies
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /work/notebooks/pipeline/ExampleValidator/anomalies/36) at 0x7fe0fc641a00.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/work/notebooks/pipeline/ExampleValidator/anomalies/36.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /work/notebooks/pipeline/ExampleValidator/anomalies/36) at 0x7fe0fc641a00.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/work/notebooks/pipeline/ExampleValidator/anomalies/36.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri,/work/notebooks/pipeline/ExampleValidator/anomalies/36
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['exclude_splits'],[]
['custom_validation_config'],

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe0fc815820.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /work/notebooks/pipeline/StatisticsGen/statistics/35) at 0x7fe0fc4885b0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/work/notebooks/pipeline/StatisticsGen/statistics/35.span0.split_names[""train"", ""eval""]"
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe0fe1332b0.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /work/notebooks/pipeline/SchemaServing) at 0x7fe17e65d7f0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/work/notebooks/pipeline/SchemaServing

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /work/notebooks/pipeline/StatisticsGen/statistics/35) at 0x7fe0fc4885b0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/work/notebooks/pipeline/StatisticsGen/statistics/35.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /work/notebooks/pipeline/StatisticsGen/statistics/35) at 0x7fe0fc4885b0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/work/notebooks/pipeline/StatisticsGen/statistics/35.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/work/notebooks/pipeline/StatisticsGen/statistics/35
.span,0
.split_names,"[""train"", ""eval""]"

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /work/notebooks/pipeline/SchemaServing) at 0x7fe17e65d7f0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/work/notebooks/pipeline/SchemaServing

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /work/notebooks/pipeline/SchemaServing) at 0x7fe17e65d7f0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/work/notebooks/pipeline/SchemaServing

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/work/notebooks/pipeline/SchemaServing

0,1
['anomalies'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7fe0fc488bb0.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /work/notebooks/pipeline/ExampleValidator/anomalies/36) at 0x7fe0fc641a00.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/work/notebooks/pipeline/ExampleValidator/anomalies/36.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleAnomalies
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /work/notebooks/pipeline/ExampleValidator/anomalies/36) at 0x7fe0fc641a00.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/work/notebooks/pipeline/ExampleValidator/anomalies/36.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /work/notebooks/pipeline/ExampleValidator/anomalies/36) at 0x7fe0fc641a00.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/work/notebooks/pipeline/ExampleValidator/anomalies/36.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri,/work/notebooks/pipeline/ExampleValidator/anomalies/36
.span,0
.split_names,"[""train"", ""eval""]"


In [37]:
tfdv.display_schema(schema)

Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'Aspect',INT,required,,-
'Cover_Type',INT,required,,min: 0; max: 6
'Elevation',INT,required,,-
'Hillshade_3pm',INT,required,,-
'Hillshade_9am',INT,required,,min: 0; max: 255
'Hillshade_Noon',INT,required,,min: 0; max: 255
'Horizontal_Distance_To_Fire_Points',INT,required,,-
'Horizontal_Distance_To_Hydrology',INT,required,,-
'Horizontal_Distance_To_Roadways',INT,required,,-
'Slope',INT,required,,min: 0; max: 90


Unnamed: 0_level_0,Values
Domain,Unnamed: 1_level_1
'Soil_Type',"'C2702', 'C2703', 'C2704', 'C2705', 'C2706', 'C2717', 'C3501', 'C3502', 'C4201', 'C4703', 'C4704', 'C4744', 'C4758', 'C5101', 'C6101', 'C6102', 'C6731', 'C7101', 'C7102', 'C7103', 'C7201', 'C7202', 'C7700', 'C7701', 'C7702', 'C7709', 'C7710', 'C7745', 'C7746', 'C7755', 'C7756', 'C7757', 'C7790', 'C8703', 'C8707', 'C8708', 'C8771', 'C8772', 'C8776', 'C5151'"
'Wilderness_Area',"'Cache', 'Commanche', 'Neota', 'Rawah'"


In [38]:
print(schema)

feature {
  name: "Aspect"
  type: INT
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  shape {
    dim {
      size: 1
    }
  }
}
feature {
  name: "Cover_Type"
  type: INT
  int_domain {
    min: 0
    max: 6
    is_categorical: true
  }
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  not_in_environment: "SERVING"
  shape {
    dim {
      size: 1
    }
  }
}
feature {
  name: "Elevation"
  type: INT
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  shape {
    dim {
      size: 1
    }
  }
}
feature {
  name: "Hillshade_3pm"
  type: INT
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  shape {
    dim {
      size: 1
    }
  }
}
feature {
  name: "Hillshade_9am"
  type: INT
  int_domain {
    min: 0
    max: 255
  }
  presence {
    min_fraction: 1.0
    min_count: 1
  }
  shape {
    dim {
      size: 1
    }
  }
}
feature {
  name: "Hillshade_Noon"
  type: INT
  int_domain {
    min: 0
    max: 255
  }
  presence {
    min_fraction: 1.0
    min_

# Generar nuevas estadísticas usando el esquema actualizado

In [39]:
from tfx.types import standard_artifacts, channel_utils
from tfx.orchestration.metadata import sqlite_metadata_connection_config
from tfx.orchestration import metadata

# En lugar de apuntar al archivo, apuntamos al directorio que contiene el archivo único
SCHEMA_DIR = '/work/notebooks/pipeline/SchemaServing'  # Asegurarse de que este directorio exista y contenga únicamente schema_serving.pbtxt

schema_artifact = standard_artifacts.Schema()
schema_artifact.uri = SCHEMA_DIR

metadata_config = sqlite_metadata_connection_config('/work/notebooks/pipeline/metadata.sqlite')
with metadata.Metadata(metadata_config) as m:
    # Obtener el type_id para 'Schema'
    artifact_types = m.store.get_artifact_types()
    for at in artifact_types:
        if at.name == schema_artifact.type_name:
            schema_artifact.mlmd_artifact.type_id = at.id
            break
    # Registrar el artefacto en MLMD
    registered_mlmd = m.store.put_artifacts([schema_artifact.mlmd_artifact])
    schema_artifact.id = registered_mlmd[0]
    print("Esquema registrado con id:", schema_artifact.id)

# Crear el canal a partir del artefacto registrado
schema_channel = channel_utils.as_channel([schema_artifact])

# Ahora, recalculamos las estadísticas usando el esquema curado importado (registrado manualmente)
from tfx.components import StatisticsGen

# Supongamos que 'example_gen' es el componente CsvExampleGen que generó tus ejemplos
updated_stats_gen = StatisticsGen(
    examples=example_gen.outputs['examples'],  # Reemplaza 'example_gen' por tu componente real
    schema=schema_channel                     # Usamos el canal del esquema curado
)
context.run(updated_stats_gen)

# Visualizar las estadísticas recalculadas
context.show(updated_stats_gen.outputs['statistics'])


Esquema registrado con id: 61


# Comprobar Anomalías

In [40]:
from tfx.components import ExampleValidator

# Creamos el componente ExampleValidator pasando las estadísticas y el esquema actualizado
serving_validator = ExampleValidator(
    statistics=updated_stats_gen.outputs['statistics'],  # Estadísticas recalculadas usando el esquema curado
    schema=schema_channel                                # Canal que contiene el esquema actualizado
)

# Ejecutamos el componente para validar y detectar anomalías
context.run(serving_validator)

# Visualizamos los resultados de la validación (por ejemplo, anomalías)
context.show(serving_validator.outputs['anomalies'])


# Ingeniería de Características

### Transformar

In [46]:
from tfx.components import Transform
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.orchestration.metadata import sqlite_metadata_connection_config

# Configuración del contexto 
PIPELINE_ROOT = "/work/notebooks/pipeline"
METADATA_PATH = "/work/notebooks/pipeline/metadata.sqlite"

context = InteractiveContext(
    pipeline_root=PIPELINE_ROOT,
    metadata_connection_config=sqlite_metadata_connection_config(METADATA_PATH)
)

import sys
sys.path.append('/work/notebooks/modules')

# Luego, en el componente Transform, pásala directamente:
transform = Transform(
    examples=example_gen.outputs['examples'],
    schema=schema_channel,
    preprocessing_fn='preprocessing.preprocessing_fn'  # Pasa la función, no una cadena
)
context.run(transform)

# Mostrar la URI de los ejemplos transformados para inspección:
transformed_examples_uri = transform.outputs['transformed_examples'].get()[0].uri
print("Ejemplos transformados almacenados en:", transformed_examples_uri)




INFO:tensorflow:Assets written to: /work/notebooks/pipeline/Transform/transform_graph/41/.temp_path/tftransform_tmp/75cad6dc8ad748d9a6aa9858d0a6a8a4/assets


INFO:tensorflow:Assets written to: /work/notebooks/pipeline/Transform/transform_graph/41/.temp_path/tftransform_tmp/75cad6dc8ad748d9a6aa9858d0a6a8a4/assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: /work/notebooks/pipeline/Transform/transform_graph/41/.temp_path/tftransform_tmp/0986bcbab4474bb6b7de3fce6bfd6049/assets


INFO:tensorflow:Assets written to: /work/notebooks/pipeline/Transform/transform_graph/41/.temp_path/tftransform_tmp/0986bcbab4474bb6b7de3fce6bfd6049/assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


Ejemplos transformados almacenados en: /work/notebooks/pipeline/Transform/transformed_examples/41


In [43]:
import tensorflow as tf
import os

transformed_dir = transformed_examples_uri
# Listar elementos en el directorio raíz
root_items = tf.io.gfile.listdir(transformed_dir)
print("Elementos en la raíz:", root_items)

# Ver si hay subdirectorios y listar archivos en ellos
for item in root_items:
    full_path = os.path.join(transformed_dir, item)
    if tf.io.gfile.isdir(full_path):
        sub_items = tf.io.gfile.listdir(full_path)
        print(f"Archivos en subdirectorio '{item}':", sub_items)


Elementos en la raíz: ['Split-eval', 'Split-train']
Archivos en subdirectorio 'Split-eval': ['transformed_examples-00000-of-00001.gz']
Archivos en subdirectorio 'Split-train': ['transformed_examples-00000-of-00001.gz']


In [44]:
import tensorflow as tf
import os

def parse_tf_example(serialized_example):
    return tf.train.Example.FromString(serialized_example)

# Por ejemplo, inspeccionemos los ejemplos del split de entrenamiento:
train_dir = f'{transformed_examples_uri}/Split-train'
tf_record_files = tf.io.gfile.glob(os.path.join(train_dir, '*.gz'))
print("Archivos TFRecord encontrados en 'Split-train':", tf_record_files)

if tf_record_files:
    # Indica el tipo de compresión 'GZIP'
    dataset = tf.data.TFRecordDataset(tf_record_files, compression_type='GZIP')
    for serialized_example in dataset.take(3):  # Muestra 3 ejemplos
        example = parse_tf_example(serialized_example.numpy())
        print(example)
else:
    print("No se encontraron archivos TFRecord en el subdirectorio 'Split-train'.")


Archivos TFRecord encontrados en 'Split-train': ['/work/notebooks/pipeline/Transform/transformed_examples/40/Split-train/transformed_examples-00000-of-00001.gz']
features {
  feature {
    key: "Cover_Type_indexed"
    value {
      int64_list {
        value: 0
      }
    }
  }
  feature {
    key: "Hillshade_9am_scaled"
    value {
      float_list {
        value: -0.7458961009979248
      }
    }
  }
  feature {
    key: "Hillshade_Noon_scaled"
    value {
      float_list {
        value: -1.0757123231887817
      }
    }
  }
  feature {
    key: "Slope_scaled"
    value {
      float_list {
        value: 0.5165449976921082
      }
    }
  }
}

features {
  feature {
    key: "Cover_Type_indexed"
    value {
      int64_list {
        value: 1
      }
    }
  }
  feature {
    key: "Hillshade_9am_scaled"
    value {
      float_list {
        value: 0.034051764756441116
      }
    }
  }
  feature {
    key: "Hillshade_Noon_scaled"
    value {
      float_list {
        value: 0

In [45]:
import tensorflow as tf
import os

# Por ejemplo, inspeccionemos los ejemplos del split de entrenamiento:
train_dir = f'{transformed_examples_uri}/Split-eval'
tf_record_files = tf.io.gfile.glob(os.path.join(train_dir, '*.gz'))
print("Archivos TFRecord encontrados en 'Split-eval':", tf_record_files)

if tf_record_files:
    # Indica el tipo de compresión 'GZIP'
    dataset = tf.data.TFRecordDataset(tf_record_files, compression_type='GZIP')
    for serialized_example in dataset.take(3):  # Muestra 3 ejemplos
        example = parse_tf_example(serialized_example.numpy())
        print(example)
else:
    print("No se encontraron archivos TFRecord en el subdirectorio 'Split-eval'.")

Archivos TFRecord encontrados en 'Split-eval': ['/work/notebooks/pipeline/Transform/transformed_examples/40/Split-eval/transformed_examples-00000-of-00001.gz']
features {
  feature {
    key: "Cover_Type_indexed"
    value {
      int64_list {
        value: 0
      }
    }
  }
  feature {
    key: "Hillshade_9am_scaled"
    value {
      float_list {
        value: 0.7768592238426208
      }
    }
  }
  feature {
    key: "Hillshade_Noon_scaled"
    value {
      float_list {
        value: 0.543292224407196
      }
    }
  }
  feature {
    key: "Slope_scaled"
    value {
      float_list {
        value: -0.9459911584854126
      }
    }
  }
}

features {
  feature {
    key: "Cover_Type_indexed"
    value {
      int64_list {
        value: 0
      }
    }
  }
  feature {
    key: "Hillshade_9am_scaled"
    value {
      float_list {
        value: 0.7025784850120544
      }
    }
  }
  feature {
    key: "Hillshade_Noon_scaled"
    value {
      float_list {
        value: 0.94804

# Metadatos de Aprendizaje Automático

## Importando los módulos relevantes y configurando la conexión al almacén de metadatos.

In [47]:
from tfx.orchestration.metadata import Metadata, sqlite_metadata_connection_config

# Configura la conexión al almacén de metadatos 
METADATA_PATH = '/work/notebooks/pipeline/metadata.sqlite'
metadata_config = sqlite_metadata_connection_config(METADATA_PATH)

# Acceso a artefactos almacenados

In [48]:
with Metadata(metadata_config) as m:
    # 1. Listar todos los tipos de artefactos registrados
    artifact_types = m.store.get_artifact_types()
    print("Tipos de artefactos registrados:")
    for at in artifact_types:
        print(f" - Nombre: {at.name}, ID: {at.id}")

    # 2. Recuperar artefactos del tipo 'Schema'
    schema_artifacts = m.store.get_artifacts_by_type('Schema')
    print("\nArtefactos de tipo 'Schema':")
    for art in schema_artifacts:
        print(f" - ID: {art.id}, URI: {art.uri}")

    # 3. Mostrar eventos asociados a un artefacto (por ejemplo, el primero de los esquemas)
    if schema_artifacts:
        schema_artifact_id = schema_artifacts[0].id
        events = m.store.get_events_by_artifact_ids([schema_artifact_id])
        print(f"\nEventos asociados al artefacto Schema con ID {schema_artifact_id}:")
        for event in events:
            print(event)


Tipos de artefactos registrados:
 - Nombre: Examples, ID: 14
 - Nombre: ExampleStatistics, ID: 16
 - Nombre: Schema, ID: 18
 - Nombre: ExampleAnomalies, ID: 20
 - Nombre: TransformGraph, ID: 22
 - Nombre: TransformCache, ID: 23

Artefactos de tipo 'Schema':
 - ID: 3, URI: /work/notebooks/pipeline/SchemaGen/schema/3
 - ID: 18, URI: /work/notebooks/pipeline/SchemaAdj
 - ID: 20, URI: /work/notebooks/pipeline/SchemaAdj/schema_serving.pbtxt
 - ID: 22, URI: /work/notebooks/pipeline/SchemaAdj/schema_serving.pbtxt
 - ID: 24, URI: /work/notebooks/pipeline/SchemaServing/schema_serving.pbtxt
 - ID: 26, URI: /work/notebooks/pipeline/SchemaServing
 - ID: 28, URI: /work/notebooks/pipeline/SchemaServing/schema_serving.pbtxt
 - ID: 30, URI: /work/notebooks/pipeline/SchemaServing/schema_serving.pbtxt
 - ID: 32, URI: /work/notebooks/pipeline/SchemaServing
 - ID: 38, URI: /work/notebooks/pipeline/Transform/pre_transform_schema/27
 - ID: 40, URI: /work/notebooks/pipeline/Transform/post_transform_schema/27

## Obtener las propiedades de un artefacto en particular (ExampleStatistics)

In [49]:
from tfx.orchestration.metadata import Metadata, sqlite_metadata_connection_config

METADATA_PATH = '/work/notebooks/pipeline/metadata.sqlite'
metadata_config = sqlite_metadata_connection_config(METADATA_PATH)

with Metadata(metadata_config) as m:
    # Obtener todos los tipos de artefactos y construir un diccionario {type_id: type_name}
    artifact_types = m.store.get_artifact_types()
    type_dict = {at.id: at.name for at in artifact_types}
    
    all_artifacts = m.store.get_artifacts()
    for art in all_artifacts:
        art_type_name = type_dict.get(art.type_id, "Unknown")
        if art_type_name.strip().lower() == 'examplestatistics'.lower():
            print("ID:", art.id)
            print("URI:", art.uri)
            print("Properties:", art.properties)
            break


ID: 2
URI: /work/notebooks/pipeline/StatisticsGen/statistics/2
Properties: {'split_names': string_value: "[\"train\", \"eval\"]"
}


In [50]:
from tfx.orchestration.metadata import Metadata, sqlite_metadata_connection_config

METADATA_PATH = '/work/notebooks/pipeline/metadata.sqlite'
metadata_config = sqlite_metadata_connection_config(METADATA_PATH)

with Metadata(metadata_config) as m:
    all_artifacts = m.store.get_artifacts()
    print("Listado completo de artefactos:")
    for art in all_artifacts:
        print(f"ID: {art.id}, type: {art.type}, URI: {art.uri}")


Listado completo de artefactos:
ID: 1, type: , URI: /work/notebooks/pipeline/CsvExampleGen/examples/1
ID: 2, type: , URI: /work/notebooks/pipeline/StatisticsGen/statistics/2
ID: 3, type: , URI: /work/notebooks/pipeline/SchemaGen/schema/3
ID: 4, type: , URI: /work/notebooks/pipeline/CsvExampleGen/examples/4
ID: 5, type: , URI: /work/notebooks/pipeline/CsvExampleGen/examples/5
ID: 6, type: , URI: /work/notebooks/pipeline/StatisticsGen/statistics/6
ID: 7, type: , URI: /work/notebooks/pipeline/CsvExampleGen/examples/7
ID: 8, type: , URI: /work/notebooks/pipeline/CsvExampleGen/examples/8
ID: 9, type: , URI: /work/notebooks/pipeline/CsvExampleGen/examples/9
ID: 10, type: , URI: /work/notebooks/pipeline/ExampleValidator/anomalies/10
ID: 11, type: , URI: /work/notebooks/pipeline/CsvExampleGen/examples/11
ID: 12, type: , URI: /work/notebooks/pipeline/ExampleValidator/anomalies/12
ID: 13, type: , URI: /work/notebooks/pipeline/CsvExampleGen/examples/13
ID: 14, type: , URI: /work/notebooks/pipelin

In [51]:
from tfx.orchestration.metadata import Metadata, sqlite_metadata_connection_config

METADATA_PATH = '/work/notebooks/pipeline/metadata.sqlite'
metadata_config = sqlite_metadata_connection_config(METADATA_PATH)

with Metadata(metadata_config) as m:
    # Imprimir cada artefacto mostrando 'type' y 'type_id'
    all_artifacts = m.store.get_artifacts()
    print("Listado completo de artefactos:")
    for art in all_artifacts:
        print(f"ID: {art.id}, type: '{art.type}', type_id: {art.type_id}, URI: {art.uri}")

    # Construir diccionario {type_id: type_name} a partir de los tipos de artefactos registrados
    artifact_types = m.store.get_artifact_types()
    type_dict = {at.id: at.name for at in artifact_types}
    
    print("\nDiccionario de mapeo de type_id a type_name:")
    for tid, tname in type_dict.items():
        print(f"  {tid}: {tname}")
    
    # Ahora, filtrar artefactos que en realidad sean de tipo "ExampleStatistics"
    print("\nArtefactos filtrados como 'ExampleStatistics':")
    for art in all_artifacts:
        # Recuperar el nombre real del tipo usando type_id
        art_type_name = type_dict.get(art.type_id, "Unknown")
        if art_type_name.strip().lower() == 'examplestatistics'.lower():
            print("ID:", art.id)
            print("URI:", art.uri)
            print("Properties:", art.properties)
            break


Listado completo de artefactos:
ID: 1, type: '', type_id: 14, URI: /work/notebooks/pipeline/CsvExampleGen/examples/1
ID: 2, type: '', type_id: 16, URI: /work/notebooks/pipeline/StatisticsGen/statistics/2
ID: 3, type: '', type_id: 18, URI: /work/notebooks/pipeline/SchemaGen/schema/3
ID: 4, type: '', type_id: 14, URI: /work/notebooks/pipeline/CsvExampleGen/examples/4
ID: 5, type: '', type_id: 14, URI: /work/notebooks/pipeline/CsvExampleGen/examples/5
ID: 6, type: '', type_id: 16, URI: /work/notebooks/pipeline/StatisticsGen/statistics/6
ID: 7, type: '', type_id: 14, URI: /work/notebooks/pipeline/CsvExampleGen/examples/7
ID: 8, type: '', type_id: 14, URI: /work/notebooks/pipeline/CsvExampleGen/examples/8
ID: 9, type: '', type_id: 14, URI: /work/notebooks/pipeline/CsvExampleGen/examples/9
ID: 10, type: '', type_id: 20, URI: /work/notebooks/pipeline/ExampleValidator/anomalies/10
ID: 11, type: '', type_id: 14, URI: /work/notebooks/pipeline/CsvExampleGen/examples/11
ID: 12, type: '', type_id: 

# Seguimiento de los artefactos

In [52]:
from tfx.orchestration.metadata import Metadata, sqlite_metadata_connection_config
from ml_metadata.proto import metadata_store_pb2

# Utiliza las constantes definidas en el mensaje Event
INPUT_EVENT = metadata_store_pb2.Event.INPUT
OUTPUT_EVENT = metadata_store_pb2.Event.OUTPUT

def get_main_artifacts_for_artifact(artifact_id, metadata_path='/work/notebooks/pipeline/metadata.sqlite'):
    """
    Dado el ID de un artefacto de salida, retorna los artefactos principales (de entrada)
    que se usaron para generar ese artefacto a través de la ejecución asociada.
    """
    metadata_config = sqlite_metadata_connection_config(metadata_path)
    main_artifacts = []
    
    with Metadata(metadata_config) as m:
        # Obtener eventos asociados al artefacto de salida
        events = m.store.get_events_by_artifact_ids([artifact_id])
        if not events:
            print(f"No se encontraron eventos para el artefacto con ID {artifact_id}")
            return main_artifacts
        
        # Buscar el primer evento de tipo OUTPUT para obtener el execution_id
        execution_id = None
        for event in events:
            if event.type == OUTPUT_EVENT:
                execution_id = event.execution_id
                break
        
        if execution_id is None:
            print(f"No se encontró un evento OUTPUT para el artefacto con ID {artifact_id}")
            return main_artifacts
        
        # Con el execution_id, obtener todos los eventos asociados a esa ejecución
        exec_events = m.store.get_events_by_execution_ids([execution_id])
        
        # Filtrar los eventos de tipo INPUT y obtener sus artifact_ids
        input_artifact_ids = [event.artifact_id for event in exec_events if event.type == INPUT_EVENT]
        
        # Recuperar los artefactos de esos IDs
        for aid in input_artifact_ids:
            arts = m.store.get_artifacts_by_id([aid])
            if arts:
                main_artifacts.append(arts[0])
                
    return main_artifacts

def print_main_artifacts_table(artifact_id):
    """
    Imprime en formato tabla los artefactos de entrada que generaron un artefacto dado.
    """
    # Obtener el mapeo de type_id -> type_name
    metadata_config = sqlite_metadata_connection_config(METADATA_PATH)
    with Metadata(metadata_config) as m:
        artifact_types = m.store.get_artifact_types()
        type_dict = {at.id: at.name for at in artifact_types}

    main_artifacts = get_main_artifacts_for_artifact(artifact_id, METADATA_PATH)
    if not main_artifacts:
        print(f"No hay artefactos principales para el artefacto con ID {artifact_id}")
        return
    
    print(f"Artefactos principales que alimentaron la generación del artefacto con ID {artifact_id}:")
    print(f"{'artifact_id':<12}  {'type':<20}  {'uri'}")
    for art in main_artifacts:
        art_type_name = type_dict.get(art.type_id, "Desconocido")
        print(f"{art.id:<12}  {art_type_name:<20}  {art.uri}")

# Obtener Artefactos Principales

In [53]:
print_main_artifacts_table(35)


Artefactos principales que alimentaron la generación del artefacto con ID 35:
artifact_id   type                  uri
1             Examples              /work/notebooks/pipeline/CsvExampleGen/examples/1
32            Schema                /work/notebooks/pipeline/SchemaServing
