##1D-CNN ile Metin Sınıflandırma

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Flatten

# Assuming 'factoryReports.csv' is in the current working directory
data = pd.read_csv("factoryReports.csv")

# Display the first few rows of the data
print(data.head())

# Preprocess text data
def preprocess_text(text_data):
    text_data = text_data.str.lower()
    return text_data

data['Description'] = preprocess_text(data['Description'])

# Encode the target variable
label_encoder = LabelEncoder()
data['Category'] = label_encoder.fit_transform(data['Category'])

# Split the data into training and test sets
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)

# Tokenize and sequence the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_data['Description'])
X_train = tokenizer.texts_to_sequences(train_data['Description'])
X_test = tokenizer.texts_to_sequences(test_data['Description'])

# Pad sequences for uniform length
sequence_length = max(len(seq) for seq in X_train)
X_train = pad_sequences(X_train, maxlen=sequence_length)
X_test = pad_sequences(X_test, maxlen=sequence_length)

# Model architecture
embedding_dimension = 100
num_filters = 200
num_classes = len(label_encoder.classes_)

model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_dimension, input_length=sequence_length))
model.add(Conv1D(filters=num_filters, kernel_size=3, padding='same', activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
print(model.summary())

# Train the model
batch_size = 128
history = model.fit(X_train, train_data['Category'], epochs=10, batch_size=batch_size)

# Evaluate the model on the test set
y_pred = model.predict(X_test)
y_pred_classes = y_pred.argmax(axis=1)
test_accuracy = accuracy_score(test_data['Category'], y_pred_classes)
conf_matrix = confusion_matrix(test_data['Category'], y_pred_classes)

# Display test accuracy and confusion matrix
print(f"Test Accuracy: {test_accuracy}")
print("Confusion Matrix:")
print(conf_matrix)



                                         Description            Category  \
0  Items are occasionally getting stuck in the sc...  Mechanical Failure   
1  Loud rattling and banging sounds are coming fr...  Mechanical Failure   
2  There are cuts to the power when starting the ...  Electronic Failure   
3                 Fried capacitors in the assembler.  Electronic Failure   
4                           Mixer tripped the fuses.  Electronic Failure   

  Urgency          Resolution   Cost  
0  Medium    Readjust Machine     45  
1  Medium    Readjust Machine     35  
2    High    Full Replacement  16200  
3    High  Replace Components    352  
4     Low   Add to Watch List     55  


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 15, 100)           41300     
                                                                 
 conv1d (Conv1D)      