In [None]:
import os
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
import plotly.express as px
from sklearn.decomposition import PCA

In [None]:
DATA_PATH = 'csv'
RANDOM_STATE = 123

In [None]:
def clusters(
    df: pd.DataFrame,
    n_clusters: int
):
    X = df.drop(columns=['time'])

    kmeans = KMeans(
        n_clusters=n_clusters,
        random_state=RANDOM_STATE
    ).fit(X)

    df = df[['time']].copy()
    df['cluster'] = kmeans.labels_

    return df

def plot_clusters_tsne(
    df: pd.DataFrame,
    n_clusters: int,
    method: str = 'tsne'
):
    X = df.drop(columns=['time'])

    df = clusters(df, n_clusters)

    if method == 'tsne':
        X_embedded = TSNE(
            n_components=2,
            random_state=RANDOM_STATE
        ).fit_transform(X)
        X_embedded = pd.DataFrame(X_embedded)
        df['x'] = X_embedded[0]
        df['y'] = X_embedded[1]
    elif method=='pca':
        pca = PCA(n_components=2).fit_transform(X.values)
        df['x'] = pca[:,0]
        df['y'] = pca[:,1]
    fig = px.scatter(df, x='x', y='y', color='cluster', hover_name='time')
    fig.show()

def make_plot(
    machine:str,
    n_clusters: int = 3,
    method: str = 'tsne'
):
    data = pd.read_csv(os.path.join(DATA_PATH, machine+'_norm.csv'))
    plot_clusters_tsne(data, n_clusters, method)

## TSNE

In [None]:
make_plot('WOS___174L', method='tsne')

In [None]:
make_plot('WOS___175L', method='tsne')

In [None]:
make_plot('WOS___176L', method='tsne')

In [None]:
make_plot('WOS___177L', method='tsne')

In [None]:
make_plot('WOS___179L', method='tsne')

## PCA

In [None]:
make_plot('WOS___174L', method='pca')

In [None]:
make_plot('WOS___175L', method='pca')

In [None]:
make_plot('WOS___176L', method='pca')

In [None]:
make_plot('WOS___177L', method='pca')

In [None]:
make_plot('WOS___179L', method='pca')

## Pomysł za 30k, podejście numer 3

In [50]:
from datetime import datetime, timezone

In [65]:
machine = 'WOS___177L'
start_date = datetime.strptime('2021-02-19T18:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)
end_date = datetime.strptime('2021-02-20T04:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)

In [80]:
data = pd.read_csv(os.path.join(DATA_PATH, machine+'_norm.csv'), parse_dates=['time'])

In [81]:
selected_times = data[(data['time'] > start_date) & (data['time'] < end_date)]['time']

In [87]:
machine = 'WOS___177L'
start_date = datetime.strptime('2021-02-19T18:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)
end_date = datetime.strptime('2021-02-20T04:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)

data = pd.read_csv(os.path.join(DATA_PATH, machine+'_norm.csv'), parse_dates=['time'])

selected_times = data[(data['time'] > start_date) & (data['time'] < end_date)]['time']

df = data.copy()
method = 'tsne'

df['color'] = df.apply(
    lambda row: '1' if row['time'] in list(selected_times) else '0',
    axis=1
)

X = df.drop(columns=['time'])

if method == 'tsne':
    X_embedded = TSNE(
        n_components=2,
        random_state=RANDOM_STATE
    ).fit_transform(X)
    X_embedded = pd.DataFrame(X_embedded)
    df['x'] = X_embedded[0]
    df['y'] = X_embedded[1]
elif method=='pca':
    pca = PCA(n_components=2).fit_transform(X.values)
    df['x'] = pca[:,0]
    df['y'] = pca[:,1]
fig = px.scatter(df, x='x', y='y', color='color', hover_name='time')
fig.show()

In [90]:
machine = 'WOS___176L'
# start_date = datetime.strptime('2021-01-27T17:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)
# end_date = datetime.strptime('2021-01-28T05:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)

start_date = datetime.strptime('2021-01-13T06:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)
end_date = datetime.strptime('2021-01-13T17:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)

data = pd.read_csv(os.path.join(DATA_PATH, machine+'_norm.csv'), parse_dates=['time'])

selected_times = data[(data['time'] > start_date) & (data['time'] < end_date)]['time']

df = data.copy()
method = 'tsne'

df['color'] = df.apply(
    lambda row: '1' if row['time'] in list(selected_times) else '0',
    axis=1
)

X = df.drop(columns=['time'])

if method == 'tsne':
    X_embedded = TSNE(
        n_components=2,
        random_state=RANDOM_STATE
    ).fit_transform(X)
    X_embedded = pd.DataFrame(X_embedded)
    df['x'] = X_embedded[0]
    df['y'] = X_embedded[1]
elif method=='pca':
    pca = PCA(n_components=2).fit_transform(X.values)
    df['x'] = pca[:,0]
    df['y'] = pca[:,1]
fig = px.scatter(df, x='x', y='y', color='color', hover_name='time')
fig.show()

In [91]:
machine = 'WOS___175L'
start_date = datetime.strptime('2021-02-11T07:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)
end_date = datetime.strptime('2021-02-11T13:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)

data = pd.read_csv(os.path.join(DATA_PATH, machine+'_norm.csv'), parse_dates=['time'])

selected_times = data[(data['time'] > start_date) & (data['time'] < end_date)]['time']

df = data.copy()
method = 'tsne'

df['color'] = df.apply(
    lambda row: '1' if row['time'] in list(selected_times) else '0',
    axis=1
)

X = df.drop(columns=['time'])

if method == 'tsne':
    X_embedded = TSNE(
        n_components=2,
        random_state=RANDOM_STATE
    ).fit_transform(X)
    X_embedded = pd.DataFrame(X_embedded)
    df['x'] = X_embedded[0]
    df['y'] = X_embedded[1]
elif method=='pca':
    pca = PCA(n_components=2).fit_transform(X.values)
    df['x'] = pca[:,0]
    df['y'] = pca[:,1]
fig = px.scatter(df, x='x', y='y', color='color', hover_name='time')
fig.show()

In [93]:
machine = 'WOS___174L'
start_date = datetime.strptime('2020-10-01T23:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)
end_date = datetime.strptime('2020-10-02T09:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)

data = pd.read_csv(os.path.join(DATA_PATH, machine+'_norm.csv'), parse_dates=['time'])

selected_times = data[(data['time'] > start_date) & (data['time'] < end_date)]['time']

df = data.copy()
method = 'tsne'

df['color'] = df.apply(
    lambda row: '1' if row['time'] in list(selected_times) else '0',
    axis=1
)

X = df.drop(columns=['time'])

if method == 'tsne':
    X_embedded = TSNE(
        n_components=2,
        random_state=RANDOM_STATE
    ).fit_transform(X)
    X_embedded = pd.DataFrame(X_embedded)
    df['x'] = X_embedded[0]
    df['y'] = X_embedded[1]
elif method=='pca':
    pca = PCA(n_components=2).fit_transform(X.values)
    df['x'] = pca[:,0]
    df['y'] = pca[:,1]
fig = px.scatter(df, x='x', y='y', color='color', hover_name='time')
fig.show()

In [94]:
machine = 'WOS___179L'
start_date = datetime.strptime('2021-04-30T23:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)
end_date = datetime.strptime('2021-05-01T11:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)

data = pd.read_csv(os.path.join(DATA_PATH, machine+'_norm.csv'), parse_dates=['time'])

selected_times = data[(data['time'] > start_date) & (data['time'] < end_date)]['time']

df = data.copy()
method = 'tsne'

df['color'] = df.apply(
    lambda row: '1' if row['time'] in list(selected_times) else '0',
    axis=1
)

X = df.drop(columns=['time'])

if method == 'tsne':
    X_embedded = TSNE(
        n_components=2,
        random_state=RANDOM_STATE
    ).fit_transform(X)
    X_embedded = pd.DataFrame(X_embedded)
    df['x'] = X_embedded[0]
    df['y'] = X_embedded[1]
elif method=='pca':
    pca = PCA(n_components=2).fit_transform(X.values)
    df['x'] = pca[:,0]
    df['y'] = pca[:,1]
fig = px.scatter(df, x='x', y='y', color='color', hover_name='time')
fig.show()