In [1]:
import logging
import multiprocessing
import os
import random

# from google.colab import drive
import h5py
# not used in DeepEthogram; only to easily show plots
from IPython.display import Image
from omegaconf import OmegaConf
import pandas as pd
import torch

from deepethogram import configuration, postprocessing, projects, utils
from deepethogram.debug import print_dataset_info
from deepethogram.flow_generator.train import flow_generator_train
from deepethogram.feature_extractor.train import feature_extractor_train
from deepethogram.feature_extractor.inference import feature_extractor_inference
from deepethogram.sequence.train import sequence_train
from deepethogram.sequence.inference import sequence_inference



In [2]:
# Make sure we actually have a GPU
print(torch.__version__)
print('gpu available: {}'.format(torch.cuda.is_available()))
print('gpu name: {}'.format(torch.cuda.get_device_name(0)))

assert torch.cuda.is_available(), 'Please select a GPU runtime and then restart!'

1.12.1
gpu available: True
gpu name: NVIDIA RTX A5000


## set project path

In [15]:
project_path = '/home/yi/Sunze/mouse_pain_4_deepethogram'
files = os.listdir(project_path)
assert 'DATA' in files, 'DATA directory not found! {}'.format(files)
assert 'models' in files, 'models directory not found! {}'.format(files)
assert 'project_config.yaml' in files, 'project config not found! {}'.format(files)

In [17]:
def reset_logger():
  # First, overwrite any logger so that we can actually see log statements
  # https://stackoverflow.com/questions/13839554/how-to-change-filehandle-with-python-logging-on-the-fly-with-different-classes-a
  log = logging.getLogger()  # root logger
  log.setLevel(logging.INFO)
  for hdlr in log.handlers[:]:  # remove all old handlers
      log.removeHandler(hdlr)
  log.addHandler(logging.StreamHandler())
  return log

In [None]:
log = reset_logger()

print_dataset_info(os.path.join(project_path, 'DATA'))

## train and evaluate flow generator

In [19]:
preset = 'deg_f'
cfg = configuration.make_flow_generator_train_cfg(project_path, preset=preset)
# print(OmegaConf.to_yaml(cfg))

In [20]:
n_cpus = multiprocessing.cpu_count()

print('n cpus: {}'.format(n_cpus))
cfg.compute.num_workers = n_cpus

n cpus: 32


In [None]:
flow_generator = flow_generator_train(cfg)

In [None]:
model_path = os.path.join(project_path, 'models')
weights = projects.get_weights_from_model_path(model_path)
flow_weights = weights['flow_generator']
# because we used deg_f, our model type is a TinyMotionNet
latest_weights = flow_weights['TinyMotionNet'][-1]
# our run directory is two steps above the weight file
run_dir = os.path.dirname(os.path.dirname(latest_weights))
assert os.path.isdir(run_dir), 'run directory not found! {}'.format(run_directory)

figure_dir = os.path.join(run_dir, 'figures')
figure_files = utils.get_subfiles(figure_dir, 'file')
assert len(figure_files) == 1

Image(figure_files[0])

## train and evaluate feature extractor

In [32]:
preset = 'deg_f'
cfg = configuration.make_feature_extractor_train_cfg(project_path, preset=preset)
# print(OmegaConf.to_yaml(cfg))

In [33]:
# the latest string will find the most recent model by date
# you can also pass a specific .pt or .ckpt file here
cfg.flow_generator.weights = 'latest'
cfg.compute.num_workers = n_cpus

In [None]:
log = reset_logger()

feature_extractor = feature_extractor_train(cfg)

In [None]:
model_path = os.path.join(project_path, 'models')
weights = projects.get_weights_from_model_path(model_path)
flow_weights = weights['feature_extractor']
# because we used deg_f, our model type is a resnet18
latest_weights = flow_weights['resnet18'][-1]
# our run directory is two steps above the weight file
run_dir = os.path.dirname(os.path.dirname(latest_weights))
assert os.path.isdir(run_dir), 'run directory not found! {}'.format(run_directory)

figure_dir = os.path.join(run_dir, 'figures')
figure_files = utils.get_subfiles(figure_dir, 'file')
assert len(figure_files) >= 1

Image(figure_files[0])

## run inference on feature extractor

In [37]:
cfg = configuration.make_feature_extractor_inference_cfg(project_path=project_path, preset=preset)
# print(OmegaConf.to_yaml(cfg))

In [38]:
cfg.feature_extractor.weights = 'latest'
cfg.flow_generator.weights = 'latest'

cfg.inference.overwrite = True
# make sure errors are thrown
cfg.inference.ignore_error = False
cfg.compute.num_workers = 2

In [None]:
feature_extractor_inference(cfg)

In [40]:
# this just parses our DATA directory, to get the path to each file for each video
records = projects.get_records_from_datadir(os.path.join(project_path, 'DATA'))
animal = random.choice(list(records.keys()))
record = records[animal]

# I call the file output by inference the `outputfile` in various places in the code
outputfile = record['output']

utils.print_hdf5(outputfile)

resnet18
|--- P: (30000, 5) float32
|--- class_names: (5,) object
|--- flow_features: (30000, 512) float32
|--- logits: (30000, 5) float32
|--- spatial_features: (30000, 512) float32
|--- thresholds: (5,) float32
attrs: 


In [41]:
# we use the h5py package for this
with h5py.File(outputfile, 'r') as f:
  probabilities = f['resnet18/P'][:]
# n frames x K behaviors
print(probabilities.shape)
probabilities

(30000, 5)


array([[0.99963176, 0.00010334, 0.00058106, 0.0005716 , 0.00026475],
       [0.9997664 , 0.00007283, 0.00033909, 0.00107767, 0.00014999],
       [0.99962926, 0.00007196, 0.00032246, 0.00106743, 0.00019711],
       ...,
       [0.9986204 , 0.00026089, 0.0002461 , 0.00188831, 0.00020759],
       [0.99916863, 0.00016259, 0.00021173, 0.00226606, 0.00011618],
       [0.99795157, 0.00061912, 0.00027724, 0.00201472, 0.00022794]],
      dtype=float32)

## train and evaluate sequence model

In [42]:
cfg = configuration.make_sequence_train_cfg(project_path=project_path)
cfg.compute.num_workers = n_cpus

In [None]:
sequence_model = sequence_train(cfg)

In [None]:
model_path = os.path.join(project_path, 'models')
weights = projects.get_weights_from_model_path(model_path)
sequence_weights = weights['sequence']
# bthe sequence type is always tgmj, a slightly modified TGM model
latest_weights = sequence_weights['tgmj'][-1]
# our run directory is two steps above the weight file
run_dir = os.path.dirname(os.path.dirname(latest_weights))
assert os.path.isdir(run_dir), 'run directory not found! {}'.format(run_directory)

figure_dir = os.path.join(run_dir, 'figures')
figure_files = utils.get_subfiles(figure_dir, 'file')
assert len(figure_files) >= 1

Image(figure_files[0])

## sequence inference

In [None]:
cfg = configuration.make_sequence_inference_cfg(project_path)
cfg.sequence.weights = 'latest'
cfg.compute.num_workers = n_cpus
cfg.inference.overwrite = True
cfg.inference.ignore_error = False

sequence_inference(cfg)

In [46]:
# this just parses our DATA directory, to get the path to each file for each video
records = projects.get_records_from_datadir(os.path.join(project_path, 'DATA'))
animal = random.choice(list(records.keys()))
record = records[animal]

# I call the file output by inference the `outputfile` in various places in the code
outputfile = record['output']

utils.print_hdf5(outputfile)

resnet18
|--- P: (30000, 5) float32
|--- class_names: (5,) object
|--- flow_features: (30000, 512) float32
|--- logits: (30000, 5) float32
|--- spatial_features: (30000, 512) float32
|--- thresholds: (5,) float32
tgmj
|--- P: (30000, 5) float32
|--- class_names: (5,) object
|--- logits: (30000, 5) float32
|--- thresholds: (5,) float32
attrs: 


In [48]:
# we use the h5py package for this
with h5py.File(outputfile, 'r') as f:
  probabilities = f['tgmj/P'][:]
  thresholds = f['tgmj/thresholds'][:]
# n frames x K behaviors
print(probabilities.shape)
print(thresholds)

(30000, 5)
[0.01       0.5        0.5        0.37694675 0.03024774]


In [49]:
cfg = configuration.make_postprocessing_cfg(project_path=project_path)

In [50]:
postprocessing.postprocess_and_save(cfg)

[2022-09-18 00:20:45,294] INFO [deepethogram.projects.convert_config_paths_to_absolute:1135] cwd in absolute: /home/yi/Sunze/mouse_pain_4_deepethogram/models/220918_001755_sequence_inference
[2022-09-18 00:20:45,295] INFO [deepethogram.projects.convert_config_paths_to_absolute:1178] after absolute: {'class_names': ['background', 'left_hindpaw_biting/licking', 'right_hindpaw_biting/licking', 'genital_licking', 'bout_of_hindpaw_scratchingandbiting'], 'config_file': '/home/yi/Sunze/mouse_pain_4_deepethogram/project_config.yaml', 'data_path': '/home/yi/Sunze/mouse_pain_4_deepethogram/DATA', 'labeler': None, 'model_path': '/home/yi/Sunze/mouse_pain_4_deepethogram/models', 'name': 'mouse_pain_4', 'path': '/home/yi/Sunze/mouse_pain_4_deepethogram', 'pretrained_path': '/home/yi/Sunze/mouse_pain_4_deepethogram/models/pretrained_models'}


In [51]:
# load a random record
records = projects.get_records_from_datadir(os.path.join(project_path, 'DATA'))
animal = random.choice(list(records.keys()))
record = records[animal]
# figure out the filename
predictions_filename = os.path.join(os.path.dirname(record['rgb']), record['key'] + '_predictions.csv')
assert os.path.isfile(predictions_filename)

# read csv
df = pd.read_csv(predictions_filename, index_col=0)
# display outputs
df.head()

Unnamed: 0,background,left_hindpaw_biting/licking,right_hindpaw_biting/licking,genital_licking,bout_of_hindpaw_scratchingandbiting
0,1,0,0,0,0
1,1,0,0,0,0
2,1,0,0,0,0
3,1,0,0,0,0
4,1,0,0,0,0
