Probably a SQL relational data base is the best for our use-case where we want to have user data bases/tables and a global data base. 

In [None]:
import sqlite3
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from keras.models import Sequential
from keras.layers import Dense
from google.cloud import storage
from sklearn.model_selection import train_test_split

# Connect to the database
conn = sqlite3.connect('your_database.db')  # Replace 'your_database.db' with the actual database path

# Execute SQL queries to retrieve data
cursor = conn.cursor()
cursor.execute("SELECT * FROM MotionData;")
motion = cursor.fetchall()

cursor.execute("SELECT * FROM ImageData;")
imagesign = cursor.fetchall()

cursor.execute("SELECT * FROM tap;")
tap = cursor.fetchall()

cursor.execute("SELECT * FROM phonetic_complexity;")
phonetic_complexity_data  = cursor.fetchall()

cursor.execute("SELECT * FROM phonetic_probability;")
phonetic_probability_data = cursor.fetchall()

cursor.execute("SELECT * FROM iconizity;")
iconizity_data = cursor.fetchall()

# Check if all data lists are empty
if not (motion and imagesign and tap and phonetic_complexity_data and phonetic_probability_data and iconizity_data):
    pass

# Ensure that the data are concatenated correctly for clustering
X = np.concatenate((motion, imagesign, tap, phonetic_complexity_data, phonetic_probability_data, iconizity_data), axis=1)

# Standardize the numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Calculate WCSS for different number of clusters
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
    kmeans.fit(X_scaled)  # Use scaled data for clustering
    wcss.append(kmeans.inertia_)

# Calculate the differences in WCSS
differences = np.diff(wcss)

# Find the elbow point (optimal number of clusters)
elbow_point = np.argmax(differences) + 1

# Initialize KMeans with optimal number of clusters
n_clusters = elbow_point
kmeans = KMeans(n_clusters=n_clusters, random_state=0)

# Fit KMeans to your scaled sensor data
cluster_labels = kmeans.fit_predict(X_scaled)

# Execute SQL query to retrieve user IDs
cursor.execute("SELECT user_id FROM UserData;")
user_ids = cursor.fetchall()

# Iterate over user IDs and insert corresponding cluster labels
for user_id, label in zip(user_ids, cluster_labels):
    cursor.execute("INSERT INTO ClusterLabels (user_id, cluster_label) VALUES (?, ?)", (user_id, label))

# Commit the changes to the database
conn.commit()

# Close the database connection
conn.close()

# Split the data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, cluster_labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Define your Keras model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(n_clusters, activation='softmax')  # Use softmax activation for multi-class classification
])

# Compile your model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use sparse categorical cross-entropy for integer labels
              metrics=['accuracy'])  # Use accuracy as a metric

# Train your model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# Evaluate your model
loss, accuracy = model.evaluate(X_test, y_test)

# Save the trained model
model.save('global_model_clusters.keras')

# Initialize a client for Google Cloud Storage - adjust depending on cloud provider used; 
storage_client = storage.Client()

# Specify the bucket name and model file name
bucket_name = 'your_bucket_name'  # Replace 'your_bucket_name' with your actual bucket name
model_filename = 'global_model_clusters.keras'

# Upload the model file to the bucket
destination_blob_name = f'models/{model_filename}'  # Optional: Specify a folder path within the bucket
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(model_filename)

Here, a global model with all user data could be trained, saved, and uploaded to a cloud service for easy deployment. If no user data is available yet, clustering will be skipped. The elbow point calculation ensures that we use the optimal number of clusters and not an arbitrarily chosen pre-defined cluster number. Question: Standardize (one-hot) encoded categorical labels? A global model trained on all user data, cluster label tables created with user IDs in corresponding rows for easy retrieval. 