In [None]:
pip install momentfm

In [None]:
# alternative
!pip install git+https://github.com/moment-timeseries-foundation-model/moment.git

In [None]:
!pip install numpy pandas scikit-learn matplotlib tqdm

In [2]:
# getting the MOMENT model 
from momentfm import MOMENTPipeline

model = MOMENTPipeline.from_pretrained(
    "AutonLab/MOMENT-1-large", 
    model_kwargs={'task_name': 'embedding'}, # We are loading the model in `embedding` mode to learn representations
    local_files_only=True,  # Whether or not to only look at local files (i.e., do not try to download the model).
)

  torch.utils._pytree._register_pytree_node(


In [4]:
model.init()
print(model)

MOMENTPipeline(
  (normalizer): RevIN()
  (tokenizer): Patching()
  (patch_embedding): PatchEmbedding(
    (value_embedding): Linear(in_features=8, out_features=1024, bias=False)
    (position_embedding): PositionalEmbedding()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
  



In [6]:
# Number of parameters in the encoder
num_params = sum(p.numel() for p in model.encoder.parameters())
print(f"Number of parameters: {num_params}")

Number of parameters: 341231104


In [8]:
# import NHANES data 
import torch
import pandas as pd

df = pd.read_csv("data/data_wide.csv", index_col=0)
df.head(3)

Unnamed: 0,seqn,gender,age,race,education,married,pir,bmi,time1,time2,...,time2007,time2008,time2009,time2010,time2011,time2012,time2013,time2014,time2015,time2016
1,21009,1,55,3,3,1,3.79,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,21010,2,52,3,4,6,1.24,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,21012,1,63,4,3,6,0.89,0,1,0,...,1,1,0,0,0,1,1,0,0,0


In [10]:
# reshaping the data with L2 normalization 
import numpy as np

def normalize_l2(x):
    x = np.array(x)
    if x.ndim == 1:
        norm = np.linalg.norm(x)
        if norm < 1e-10:  # Use a small threshold instead of exact zero
            return x
        return x / norm
    else:
        # Create a copy of x to store the result
        normalized = np.zeros_like(x, dtype=np.float64)
        
        # Calculate norms for each row
        norms = np.linalg.norm(x, 2, axis=1, keepdims=True)
        
        # Process each row separately, avoiding division by zero
        for i in range(x.shape[0]):
            if norms[i] >= 1e-10:  # Only normalize if norm is not effectively zero
                normalized[i] = x[i] / norms[i]
            else:
                normalized[i] = x[i]  # Keep original values if norm is effectively zero
                
        return normalized

def prepare_data_from_df(df, value_columns, n_channels=1):
    MAX_SEQ_LEN = 512
    
    # convert time series columns to numpy array
    data = df[value_columns].values
    n_batchsize, n_context = data.shape
    print(f"Original data shape: {data.shape}")
    
    # confirm the reshaping
    context_per_channel = n_context // n_channels
    if n_context % n_channels != 0:
        raise ValueError(f"Number of features ({n_context}) must be divisible by number of channels ({n_channels})")
    
    # check if sequence length is greater than max=512 and truncate if needed.
    if context_per_channel > MAX_SEQ_LEN:
        print(f"Warning: Context length per channel ({context_per_channel}) exceeds maximum of {MAX_SEQ_LEN}. "
              f"Truncating to {MAX_SEQ_LEN}.")
        new_n_context = n_channels * MAX_SEQ_LEN
        data = data[:, :new_n_context]
        context_per_channel = MAX_SEQ_LEN
    
    # Apply L2 normalization to the data
    data = normalize_l2(data)
    
    # reshape the data into [batchsize, channel, context]
    data_reshaped = data.reshape(n_batchsize, n_channels, context_per_channel)
    print(f"Reshaped data shape: {data_reshaped.shape}")
    
    # Convert to torch tensor
    data_tensor = torch.FloatTensor(data_reshaped)
    print(f"Tensor shape: {data_tensor.shape}")
    
    return data_tensor  # [batchsize, channel, context_length]

In [12]:
value_columns = [col for col in df.columns if col.startswith('time')]
data_tensor = prepare_data_from_df(df, value_columns, n_channels=1)

Original data shape: (6943, 2016)
Reshaped data shape: (6943, 1, 512)
Tensor shape: torch.Size([6943, 1, 512])


In [None]:
###### DONT RUN. embedding of the data 
from pprint import pprint

output = model(x_enc=data_tens or)
pprint(output)

In [None]:
##### DONT RUN: get 3500 random subset from the data_tensor
import random

np.random.seed(1)
random_indices = random.sample(range(data_tensor.shape[0]), 3500)
subset_data = data_tensor[random_indices]

print(f"Subset shape: {subset_data.shape}")   

In [14]:
# get two chunks of subset 

subset_data1 = data_tensor[:3500]
subset_data2 = data_tensor[3500:]

print(f"Subset shape: {subset_data1.shape}")
print(f"Subset shape: {subset_data2.shape}")

Subset shape: torch.Size([3500, 1, 512])
Subset shape: torch.Size([3443, 1, 512])


In [16]:
# embedding of the subset1
from pprint import pprint

output1 = model(x_enc=subset_data1)
pprint(output1)

  return fn(*args, **kwargs)


TimeseriesOutputs(forecast=None,
                  anomaly_scores=None,
                  logits=None,
                  labels=None,
                  input_mask=tensor([[1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        ...,
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.]]),
                  pretrain_mask=None,
                  reconstruction=None,
                  embeddings=tensor([[-0.0620,  0.0345, -0.0244,  ..., -0.0308,  0.0389, -0.0119],
        [-0.0112,  0.0668, -0.0565,  ..., -0.0841,  0.0629,  0.0299],
        [-0.0395,  0.0285, -0.0204,  ..., -0.0179,  0.0298,  0.0084],
        ...,
        [-0.0043,  0.0217, -0.0580,  ..., -0.0625,  0.0413,  0.0014],
        [-0.0472,  0.0492, -0.0517,  ..., -0.0936,  0.0623,  0.0065],
        [-0.0479,  0.0030, -0.0557,  ..., -0.0223,  0.0237, -0.0294]]),
                  metadata='mean',
      

In [20]:
# extract embedding results 
embeddings1 = output1.embeddings
embeddings1_np = embeddings1.detach().cpu().numpy()
 
# convert to dataframe 
embeddings1_df = pd.DataFrame(embeddings1_np)

# combine with columns up to 'bmi' from the original dataset 
df_subset1 = df.iloc[:3500].reset_index(drop=True)
embeddings1_df = pd.concat([pd.DataFrame(embeddings1_np), df_subset1.loc[:, :'bmi']], axis=1)

# reorder columns 
original_cols1 = df_subset1.loc[:, :'bmi'].columns.tolist()
embeddings1_cols = list(range(embeddings1_np.shape[1])) 
embeddings1_df = embeddings1_df[original_cols1 + embeddings1_cols]

embeddings1_df.index = range(1, len(embeddings1_df) + 1)

embeddings1_df.head()

Unnamed: 0,seqn,gender,age,race,education,married,pir,bmi,0,1,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
1,21009,1,55,3,3,1,3.79,1,-0.061991,0.034499,...,-0.010907,0.012167,-0.04477,-0.005853,-0.007444,-0.006063,-0.012152,-0.030841,0.038901,-0.011916
2,21010,2,52,3,4,6,1.24,1,-0.011184,0.066814,...,-0.049296,0.011262,-0.02785,0.024789,-0.005914,-0.013952,-0.045955,-0.084095,0.062894,0.029902
3,21012,1,63,4,3,6,0.89,0,-0.039453,0.028493,...,0.00331,0.027176,-0.055294,0.011044,-0.029307,0.011807,-0.034652,-0.017947,0.029808,0.008437
4,21015,1,83,3,4,1,1.2,1,-0.048768,0.024437,...,-0.046362,0.007679,-0.038224,-0.028987,-0.015003,0.02505,-0.025461,-0.002104,0.051183,-0.042558
5,21017,2,37,1,2,6,0.21,0,-0.060303,0.028413,...,-0.066772,0.010558,-0.019813,0.014503,-0.047679,0.018136,-0.029235,-0.060501,0.060125,-0.01874


In [22]:
# save embeddings
embeddings1_df.to_csv("./data/embeddings_moment_subset1_1024.csv")

In [24]:
# reducing dimension 
def reduce_dimension(embedding, dim=50):
    return embedding[:, :dim]
reduced_embeddings1_np = reduce_dimension(embeddings1_np)

# convert to dataframe 
red_embeddings1_df = pd.DataFrame(reduced_embeddings1_np)
red_embeddings1_df = pd.concat([pd.DataFrame(reduced_embeddings1_np), df_subset1.loc[:, :'bmi']], axis=1)

original1_cols = df_subset1.loc[:, :'bmi'].columns.tolist()
embedding1_cols = list(range(reduced_embeddings1_np.shape[1])) 
red_embeddings1_df = red_embeddings1_df[original1_cols + embedding1_cols]
red_embeddings1_df.index = range(1, len(red_embeddings1_df) + 1)

red_embeddings1_df.head()

Unnamed: 0,seqn,gender,age,race,education,married,pir,bmi,0,1,...,40,41,42,43,44,45,46,47,48,49
1,21009,1,55,3,3,1,3.79,1,-0.061991,0.034499,...,0.035495,0.031233,-0.023033,0.005586,-0.03155,-0.88646,-0.090072,-0.123459,-0.012563,-0.026847
2,21010,2,52,3,4,6,1.24,1,-0.011184,0.066814,...,-0.020015,0.019269,-0.023844,-0.032517,-0.09713,-0.933672,-0.072217,-0.092843,0.016826,-0.035561
3,21012,1,63,4,3,6,0.89,0,-0.039453,0.028493,...,0.00816,-0.001205,-0.002655,0.012329,-0.039374,-0.84593,-0.082718,-0.155851,-0.012679,-0.024948
4,21015,1,83,3,4,1,1.2,1,-0.048768,0.024437,...,0.008419,0.048301,-0.027509,0.008435,-0.053612,-0.812731,-0.07559,-0.156991,-0.025177,-0.040843
5,21017,2,37,1,2,6,0.21,0,-0.060303,0.028413,...,0.017056,0.039506,-0.017053,0.012682,-0.067095,-0.816805,-0.047673,-0.140296,0.005803,-0.025212


In [26]:
# save embeddings with dimension reduction 
red_embeddings1_df.to_csv("./data/embeddings_moment_subset1_50.csv")

In [28]:
# embedding of the subset2
from pprint import pprint

output2 = model(x_enc=subset_data2)
pprint(output2)

  return fn(*args, **kwargs)


TimeseriesOutputs(forecast=None,
                  anomaly_scores=None,
                  logits=None,
                  labels=None,
                  input_mask=tensor([[1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        ...,
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.]]),
                  pretrain_mask=None,
                  reconstruction=None,
                  embeddings=tensor([[-0.0550,  0.0400, -0.0481,  ..., -0.0404,  0.0455, -0.0380],
        [-0.0359,  0.0193, -0.0406,  ..., -0.0862,  0.0713, -0.0002],
        [-0.0250,  0.0239, -0.0192,  ..., -0.0529,  0.0415, -0.0261],
        ...,
        [-0.0615, -0.0107, -0.0376,  ..., -0.0081,  0.0572, -0.0397],
        [-0.0341,  0.0167, -0.0480,  ..., -0.0439,  0.0328, -0.0374],
        [-0.0525,  0.0176, -0.0418,  ..., -0.0206,  0.0385,  0.0292]]),
                  metadata='mean',
      

In [38]:
# extract embedding results for second subset
embeddings2 = output2.embeddings
embeddings2_np = embeddings2.detach().cpu().numpy()
 
# convert to dataframe 
embeddings2_df = pd.DataFrame(embeddings2_np)

# combine with columns up to 'bmi' from the original dataset 
df_subset2 = df.iloc[3500:].reset_index(drop=True)
embeddings2_df = pd.concat([pd.DataFrame(embeddings2_np), df_subset2.loc[:, :'bmi']], axis=1)

# reorder columns 
original2_cols = df_subset2.loc[:, :'bmi'].columns.tolist()
embeddings2_cols = list(range(embeddings2_np.shape[1])) 
embeddings2_df = embeddings2_df[original2_cols + embeddings2_cols]

embeddings2_df.index = range(1, len(embeddings2_df) + 1)

embeddings2_df.head()

Unnamed: 0,seqn,gender,age,race,education,married,pir,bmi,0,1,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
1,31183,2,33,3,5,1,3.9,1,-0.055011,0.039953,...,-0.074217,-0.022236,-0.03604,0.023753,0.007128,-0.036365,-0.013751,-0.040431,0.04552,-0.037951
2,31186,2,46,1,1,3,1.68,1,-0.035945,0.019263,...,-0.030063,0.008317,-0.045595,-0.010246,-0.030636,-0.023607,-0.031996,-0.086167,0.071279,-0.0002
3,31187,2,22,3,4,6,4.29,1,-0.02496,0.023898,...,-0.032067,0.013064,-0.036885,-0.00443,-0.022011,-0.014765,-0.031623,-0.052872,0.041466,-0.026117
4,31194,2,47,1,1,3,1.02,1,-0.039385,0.047492,...,-0.036413,0.025165,-0.063565,0.029113,-0.045179,-0.027492,-0.020567,-0.053423,0.059044,0.000596
5,31195,1,73,3,4,1,5.0,0,-0.046546,0.026695,...,-0.031783,0.01181,-0.052689,-0.017665,-0.020827,-0.023144,-0.007389,-0.059902,0.054974,-0.015622


In [40]:
# save embeddings2
embeddings2_df.to_csv("./data/embeddings_moment_subset2_1024.csv")

In [42]:
# reducing dimension for subset2
def reduce_dimension(embedding, dim=50):
    return embedding[:, :dim]
reduced_embeddings2_np = reduce_dimension(embeddings2_np)

# convert to dataframe 
red_embeddings2_df = pd.DataFrame(reduced_embeddings2_np)
red_embeddings2_df = pd.concat([pd.DataFrame(reduced_embeddings2_np), df_subset2.loc[:, :'bmi']], axis=1)

original2_cols = df_subset2.loc[:, :'bmi'].columns.tolist()
embedding2_cols = list(range(reduced_embeddings2_np.shape[1])) 
red_embeddings2_df = red_embeddings2_df[original2_cols + embedding2_cols]
red_embeddings2_df.index = range(1, len(red_embeddings2_df) + 1)

red_embeddings2_df.head()

Unnamed: 0,seqn,gender,age,race,education,married,pir,bmi,0,1,...,40,41,42,43,44,45,46,47,48,49
1,31183,2,33,3,5,1,3.9,1,-0.055011,0.039953,...,0.044968,0.041012,-0.004977,0.01751,-0.047604,-0.740984,-0.065554,-0.168065,-0.018086,-0.036968
2,31186,2,46,1,1,3,1.68,1,-0.035945,0.019263,...,0.024751,0.019362,-0.038653,0.010897,-0.060261,-0.859908,-0.084964,-0.084851,0.004536,-0.048224
3,31187,2,22,3,4,6,4.29,1,-0.02496,0.023898,...,0.003397,0.028877,-0.019058,-0.022575,-0.050082,-0.916767,-0.077313,-0.14837,-0.013513,-0.022824
4,31194,2,47,1,1,3,1.02,1,-0.039385,0.047492,...,-0.019135,0.017536,-0.026117,0.025634,-0.048371,-0.864587,-0.067891,-0.099167,0.001687,-0.037764
5,31195,1,73,3,4,1,5.0,0,-0.046546,0.026695,...,0.013865,-0.003344,0.024455,0.03511,-0.054572,-0.821886,-0.078766,-0.111349,0.016212,-0.050051


In [46]:
# save embeddings with dimension reduction for subset 2
red_embeddings2_df.to_csv("./data/embeddings_moment_subset2_50.csv")

In [None]:
# visualize 
import numpy as np
import matplotlib.pyplot as plt

fig, axs = plt.subplots(1, 5, figsize=(30, 6), sharey=True)
axs.flatten()
for i, idx in enumerate(np.arange(0, n_samples+1, n_samples//4-1)):
    axs[i].plot(y[idx].squeeze().numpy())
    axs[i].set_xticks(
        ticks=np.arange(0, seq_len+1, 128), 
        labels=np.arange(0, seq_len+1, 128), 
        fontdict={"fontsize" : 16}
    )
    axs[i].set_title(
        "Frequency: {:.2f}".format(c[:, 0][idx].squeeze().numpy(), ),
        fontsize=16
    )
axs[0].set_yticks(
    ticks=np.arange(-1.5, 1.5, 0.5), 
    labels=np.arange(-1.5, 1.5, 0.5),
    fontdict={"fontsize" : 16}
)
plt.show()

In [None]:
import torch

device = torch.device("cpu") # CUDA not available 

model.to(device)
y = y.to(device)

model.eval()

with torch.no_grad():
    outputs = model(x_enc=y)

In [None]:
from sklearn.decomposition import PCA

embeddings = outputs.embeddings.detach().cpu().numpy()

# Perform PCA on the embeddings
embeddings_manifold = PCA(n_components=2).fit_transform(embeddings)

In [None]:
plt.title(f"$y = \sin(2c \pi x) + \epsilon$", fontsize=20)
plt.scatter(
    embeddings_manifold[:, 0], 
    embeddings_manifold[:, 1],
    c=c[:, 0].squeeze().numpy(),
    cmap='magma'
)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.colorbar(
    boundaries=np.arange(
    synthetic_dataset.freq_range[0],
    synthetic_dataset.freq_range[1]+1, 1)
)
plt.show()