In [26]:
import tensorflow as tf
from tensorflow.keras import layers, Model, applications
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.preprocessing.image import load_img
import pandas as pd
import numpy as np

In [27]:
# 1. Числовой ввод (segment, magnitude, engine_cost)
numeric_input = layers.Input(shape=(3,), name='numeric_input')
numeric_features = layers.Dense(32, activation='relu')(numeric_input)

In [28]:
train_df = pd.read_csv("C:\\Users\\gabde\\Downloads\\db.csv")
train_df

Unnamed: 0,ID,segment,magnitude,engine_cost,sum,marking_equipment,image_path
0,1,1,1,2703967,3871227,Электродвигатель трехфазный АИР 100L4 380В 4кВ...,1_1.jpg
1,2,1,2,5316083,6666357,Электродвигатель трехфазный АИР 132M4 380В 11к...,1_1.jpg
2,3,1,3,6812345,9026933,Электродвигатель трехфазный АИР 160M4 380В 18....,1_1.jpg
3,4,1,1,6812345,3629567,Электродвигатель трехфазный АИР 100L4 380В 4кВ...,1_1.jpg
4,5,1,2,5316083,6627283,Электродвигатель трехфазный АИР 132M4 380В 11к...,1_1.jpg
5,6,1,3,6812345,83615,Электродвигатель трехфазный АИР 160M4 380В 18....,1_1.jpg
6,7,1,1,2703967,3954367,Электродвигатель трехфазный АИР 100L4 380В 4кВ...,1_1.jpg
7,8,1,2,5316083,6995383,Электродвигатель трехфазный АИР 132M4 380В 11к...,1_1.jpg
8,9,1,3,6812345,9251845,Электродвигатель трехфазный АИР 160M4 380В 18....,1_1.jpg
9,10,2,1,2936608,4146394,Электродвигатель 5АИ 112 МВ6 4/1000 IM 1081 38...,2_2.jpg


In [29]:
# 2. Текстовый ввод (marking_equipment)
text_input = layers.Input(shape=(1,), dtype=tf.string, name='text_input')
text_vectorizer = TextVectorization(max_tokens=500, output_sequence_length=50)
text_vectorizer.adapt(train_df['marking_equipment'])
text_features = text_vectorizer(text_input)
text_features = layers.Embedding(input_dim=500, output_dim=64)(text_features)
text_features = layers.GlobalAveragePooling1D()(text_features)

In [30]:
# 3. Изображения (image_path)
image_input = layers.Input(shape=(224, 224, 3), name='image_input')
base_cnn = applications.EfficientNetB0(weights='imagenet', include_top=False)
image_features = base_cnn(image_input)
image_features = layers.GlobalAveragePooling2D()(image_features)

In [31]:
# Объединение всех модальностей
merged = layers.Concatenate()([numeric_features, text_features, image_features])
merged = layers.Dense(128, activation='relu')(merged)
output = layers.Dense(1, name='output')(merged)  # Регрессия для sum

model = Model(
    inputs=[numeric_input, text_input, image_input],
    outputs=output
)

model.compile(
    optimizer='adam',
    loss='mse',  # MSE для регрессии
    metrics=['mae']
)

In [32]:
import os
import sys

image_dir = "C:\\Users\\gabde\\Downloads\\"
train_image_paths = []
total = len(train_df['image_path'])

for i, img_id in enumerate(train_df['image_path']):
    sys.stdout.write(f"\rProcessing {i+1}/{total}")
    sys.stdout.flush()
    
    img_path = os.path.join(image_dir, f"{img_id}")
    print(img_path)
    if os.path.exists(img_path):
        train_image_paths.append(img_path)
    else:
        train_image_paths.append(None)

train_df['image_path'] = train_image_paths
print("\nDone!")

Processing 1/24C:\Users\gabde\Downloads\1_1.jpg
Processing 2/24C:\Users\gabde\Downloads\1_1.jpg
Processing 3/24C:\Users\gabde\Downloads\1_1.jpg
Processing 4/24C:\Users\gabde\Downloads\1_1.jpg
Processing 5/24C:\Users\gabde\Downloads\1_1.jpg
Processing 6/24C:\Users\gabde\Downloads\1_1.jpg
Processing 7/24C:\Users\gabde\Downloads\1_1.jpg
Processing 8/24C:\Users\gabde\Downloads\1_1.jpg
Processing 9/24C:\Users\gabde\Downloads\1_1.jpg
Processing 10/24C:\Users\gabde\Downloads\2_2.jpg
Processing 11/24C:\Users\gabde\Downloads\2_2.jpg
Processing 12/24C:\Users\gabde\Downloads\2_2.jpg
Processing 13/24C:\Users\gabde\Downloads\2_2.jpg
Processing 14/24C:\Users\gabde\Downloads\2_2.jpg
Processing 15/24C:\Users\gabde\Downloads\2_2.jpg
Processing 16/24C:\Users\gabde\Downloads\2_2.jpg
Processing 17/24C:\Users\gabde\Downloads\2_2.jpg
Processing 18/24C:\Users\gabde\Downloads\2_2.jpg
Processing 19/24C:\Users\gabde\Downloads\3_2.jpg
Processing 20/24C:\Users\gabde\Downloads\3_2.jpg
Processing 21/24C:\Users\gabd

In [33]:
train_df['image_path']

0     C:\Users\gabde\Downloads\1_1.jpg
1     C:\Users\gabde\Downloads\1_1.jpg
2     C:\Users\gabde\Downloads\1_1.jpg
3     C:\Users\gabde\Downloads\1_1.jpg
4     C:\Users\gabde\Downloads\1_1.jpg
5     C:\Users\gabde\Downloads\1_1.jpg
6     C:\Users\gabde\Downloads\1_1.jpg
7     C:\Users\gabde\Downloads\1_1.jpg
8     C:\Users\gabde\Downloads\1_1.jpg
9     C:\Users\gabde\Downloads\2_2.jpg
10    C:\Users\gabde\Downloads\2_2.jpg
11    C:\Users\gabde\Downloads\2_2.jpg
12    C:\Users\gabde\Downloads\2_2.jpg
13    C:\Users\gabde\Downloads\2_2.jpg
14    C:\Users\gabde\Downloads\2_2.jpg
15    C:\Users\gabde\Downloads\2_2.jpg
16    C:\Users\gabde\Downloads\2_2.jpg
17    C:\Users\gabde\Downloads\2_2.jpg
18    C:\Users\gabde\Downloads\3_2.jpg
19    C:\Users\gabde\Downloads\3_2.jpg
20    C:\Users\gabde\Downloads\3_2.jpg
21    C:\Users\gabde\Downloads\3_2.jpg
22    C:\Users\gabde\Downloads\3_2.jpg
23    C:\Users\gabde\Downloads\3_2.jpg
Name: image_path, dtype: object

In [34]:
print(train_df[['segment', 'magnitude', 'engine_cost']].dtypes)

segment         int64
magnitude       int64
engine_cost    object
dtype: object


In [35]:
train_df['engine_cost'] = train_df['engine_cost'].str.replace(',', '.', regex=False)
train_df['engine_cost'] = train_df['engine_cost'].astype('float32')

In [36]:
print(train_df['sum'].dtypes)

object


In [37]:
train_df['sum'] = train_df['sum'].str.replace(',', '.', regex=False)
train_df['sum'] = train_df['sum'].astype('float32')

In [38]:
def load_and_preprocess_image(path, target_size=(224, 224)):
    img = load_img(path, target_size=target_size)
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    return img_array / 255.0  # Нормализация

image_data = np.array([load_and_preprocess_image(path) for path in train_df['image_path']])

In [39]:
vectorizer = TextVectorization(max_tokens=1000, output_sequence_length=20)
vectorizer.adapt(train_df['marking_equipment'])
vectorized_text_data = vectorizer(train_df['marking_equipment'])

In [40]:
history = model.fit(
    x={
        'numeric_input': train_df[['segment', 'magnitude', 'engine_cost']].values.astype('float32'),
        'text_input': train_df['marking_equipment'].values,  # Сырые строки
        'image_input': image_data
    },
    y=train_df['sum'].values,
    epochs=50,
    batch_size=32,
    validation_split=0.2
)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 27s/step - loss: 7470578176.0000 - mae: 78527.0938 - val_loss: 46080286720.0000 - val_mae: 208525.8906
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 7298515968.0000 - mae: 77507.6875 - val_loss: 45055078400.0000 - val_mae: 206116.5312
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 7135628288.0000 - mae: 76530.3125 - val_loss: 44060168192.0000 - val_mae: 203751.0000
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 6977820672.0000 - mae: 75571.2031 - val_loss: 43088764928.0000 - val_mae: 201414.5000
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 6823869440.0000 - mae: 74623.1172 - val_loss: 42168242176.0000 - val_mae: 199174.9844
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 6678154240.0000 - mae: 73715.7500 