In [None]:
import sqlite3
import numpy as np
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from google.cloud import storage
from sklearn.model_selection import train_test_split

# Connect to the database
conn = sqlite3.connect('your_database.db')
cursor = conn.cursor()

# Execute SQL query to retrieve data
cursor.execute("SELECT motion, imagesign, tap, phonetic_complexity_data, phonetic_probability_data, iconizity, cluster_labels, gestureclass, Lemma_ID FROM UserData;")
data = cursor.fetchall()

# Close the database connection
conn.close()

# Check if the fetched data is not empty
if data:
    # Extract individual columns from the fetched data
    motion, imagesign, tap, phonetic_complexity_data, phonetic_probability_data, iconizity_data, cluster_labels, gestureclass, Lemma_ID = zip(*data)

    # Ensure that the data are concatenated correctly for clustering
    X = np.concatenate((motion, imagesign, tap, phonetic_complexity_data, phonetic_probability_data, iconizity_data, cluster_labels, gestureclass), axis=1)

    # Standardize the numerical features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Set X and y for further processing
    X = X_scaled
    y = Lemma_ID

    # Split the data into training, validation, and testing sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    # Define your Keras model
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X.shape[1],)),
        Dense(32, activation='relu'),
        Dense(len(np.unique(y)), activation='softmax')  # Use softmax activation for multi-class classification
    ])

    # Compile your model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',  # Use sparse categorical cross-entropy for integer labels
                  metrics=['accuracy'])  # Use accuracy as a metric

    # Train your model
    history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

    # Evaluate your model
    loss, accuracy = model.evaluate(X_test, y_test)

    # Save the trained model
    model.save('global_model_Lemma_ID.keras')

    # Initialize a client
    storage_client = storage.Client()  # Store model in Google Cloud Storage or any other cloud service

    # Specify the bucket name and model file name
    bucket_name = 'your_bucket_name'  # Ensure it doesn't start with a '/'
    model_filename = 'global_model_Lemma_ID.keras'

    # Upload the model file to the bucket
    destination_blob_name = f'models/{model_filename}'  # Optional: Specify a folder path within the bucket
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_filename(model_filename)

    # Make predictions for the entire dataset including probabilities
    predictions_with_prob = model.predict(X)

    # Connect to the database again to update the predictions
    conn = sqlite3.connect('your_database.db')
    cursor = conn.cursor()

    # Update the database with the predictions
    for i, prediction in enumerate(predictions_with_prob):
        # Extract the predicted class and its corresponding probability
        predicted_class = np.argmax(prediction)
        probability = np.max(prediction)

        # Update the database with the predicted class if probability is above the threshold, otherwise set to 'UNKNOWN'
        if probability >= 0.6:
            cursor.execute("UPDATE UserData SET Lemma_ID = ? WHERE rowid = ?", (predicted_class, i+1))
        else:
            cursor.execute("UPDATE UserData SET Lemma_ID = ? WHERE rowid = ?", ('UNKNOWN', i+1))

    # Commit the changes and close the connection
    conn.commit()
    conn.close()

else:
    pass  # Handle the case when there's no data fetched