In [1]:
# default_exp inference

In [2]:
#all_slow

In [3]:
#hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# inference

> All inference related methods

In [4]:
#export
import datetime, warnings, gc
from inspect import signature
import sklearn.metrics as skm

from fastai.text.all import *

from tritonlytics_ai.utils import *

In [5]:
#hide
import pdb

from nbdev.showdoc import *
from fastcore.test import *

In [6]:
#hide
from fastai import __version__ as fa_version
from torch import __version__ as pt_version
from transformers import __version__ as hft_version

print(f'Using pytorch {pt_version}')
print(f'Using fastai {fa_version}')
print(f'Using transformers {hft_version}')

Using pytorch 1.7.1+cu110
Using fastai 2.2.7
Using transformers 4.3.3


In [7]:
#cuda
torch.cuda.set_device(1)
print(f'Using GPU #{torch.cuda.current_device()}: {torch.cuda.get_device_name()}')

Using GPU #1: GeForce GTX 1080 Ti


## Utility 

In [8]:
#export
def concat_pool(raw_outputs):
    last_rnn_layer = raw_outputs[:,-1,None] # (e.g. (bs,n_hid,emb_sz) => (bs,1,emb_sz)) 
    bsz = last_rnn_layer.shape[0] 
    
    avg_pool = F.adaptive_avg_pool1d(last_rnn_layer.permute(0,2,1), 1).view(bsz, -1)
    max_pool = F.adaptive_max_pool1d(last_rnn_layer.permute(0,2,1), 1).view(bsz, -1)
    last_outp = last_rnn_layer[:,-1,:]

    return torch.cat([last_outp, max_pool, avg_pool], 1)

## What tasks should be run?

In [9]:
job_items_d = json.loads((RAW_DATA_PATH/'tl-inference-job.json').read_text())

In [10]:
job_keys = [ 'job_id', 'client_id', 'dataset_id', 'id_col', 'txt_col', 'lang_col', 'tasks_config' ]
job_d = { k: job_items_d.get(k, None) for k in job_keys }

In [11]:
inf_df = pd.DataFrame(job_items_d['items'])

In [12]:
inf_df.head()

Unnamed: 0,MLVerbatimId,AnswerText,AnswerLang,_seq_id,_tl_item_id,_n_sentences
0,612674,Accounts Payable department specifically Cyndi Williams & Ann Avery.,English,1,30471,2
1,612674,They never hesitate to help you even when they are swamped with work.,English,2,30472,2
2,612676,N/A at this time,English,1,30473,1
3,612675,VP of Student Affairs office,English,1,30474,1
4,612678,Office of General Counsel.,English,1,30475,2


In [13]:
device = torch.device('cuda:1') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda:1


## Sentiment

In [14]:
from tritonlytics_ai.verbatims.sentiment import get_preds as get_sent_preds

In [15]:
res_df, model_name, train_res, inf_labels = get_sent_preds(inf_df, 
                                                           device=device, 
                                                           train_config_updates={ 'orig_txt_col': job_d['txt_col'] })

0


In [16]:
res_df.head()

Unnamed: 0,MLVerbatimId,AnswerText,AnswerLang,_seq_id,_tl_item_id,_n_sentences,prob_is_very_positive,prob_is_positive,prob_is_very_negative,prob_is_negative,...,prob_has_profanity,prob_is_nonsense,pred_is_very_positive,pred_is_positive,pred_is_very_negative,pred_is_negative,pred_is_suggestion,pred_feels_threatened,pred_has_profanity,pred_is_nonsense
0,612674,Accounts Payable department specifically Cyndi Williams & Ann Avery.,English,1,30471,2,0.005418,0.059851,0.000594,0.012324,...,7e-05,0.025398,0,0,0,0,0,0,0,0
1,612674,They never hesitate to help you even when they are swamped with work.,English,2,30472,2,0.424818,0.99067,0.001512,0.002767,...,0.000351,0.0047,0,1,0,0,0,0,0,0
2,612676,N/A at this time,English,1,30473,1,0.001201,0.015943,0.001286,0.010837,...,0.000147,0.94414,0,0,0,0,0,0,0,1
3,612675,VP of Student Affairs office,English,1,30474,1,0.003949,0.016991,0.000548,0.005656,...,0.000101,0.044792,0,0,0,0,0,0,0,0
4,612678,Office of General Counsel.,English,1,30475,2,0.002214,0.015286,0.000429,0.004654,...,4.1e-05,0.311552,0,0,0,0,0,0,0,0


In [17]:
model_name, inf_labels, train_res

('20210304_verbatim_sent_multilabel_hf_export',
 ['is_very_positive',
  'is_positive',
  'is_very_negative',
  'is_negative',
  'is_suggestion',
  'feels_threatened',
  'has_profanity',
  'is_nonsense'],
 {'valid_loss': 0.13250146806240082,
  'accuracy_multi': 0.9504229426383972,
  'fbeta_score': 0.8431665752389367,
  'precision_score': 0.8404013895595986,
  'recall_score': 0.8485148514851485,
  'roc_auc_score': 0.8950481807840723,
  'opt_th': 0.5099999999999998,
  'f05': {'threshold': 0.49000000000000005, 'score': 0.8374586120559782},
  'f1': {'threshold': 0.49000000000000005, 'score': 0.8427775871170751},
  'f2': {'threshold': 0.19, 'score': 0.8801877913256723}})

## Standard Themes

In [18]:
from tritonlytics_ai.verbatims.standard_themes_saw import get_preds as get_saw_theme_preds
from tritonlytics_ai.verbatims.standard_themes_css import get_preds as get_css_theme_preds

from tritonlytics_ai.verbatims.standard_themes_meta import (
    get_preds as get_meta_theme_preds, 
    build_meta_inf_df, get_train_x,sentiment_mse, is_example_acc,
    Meta_MM, Meta_MM_HF_BaseModelCallback
)

[nltk_data] Downloading package wordnet to /home/wgilliam/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


### Standard Themes - S@W

In [19]:
res_df, modle_name, train_res, inf_labels = get_saw_theme_preds(inf_df, device=device, 
                                                                train_config_updates={ 
                                                                    'orig_txt_col': job_d['txt_col'] 
                                                                })

0


In [20]:
res_df.head()

Unnamed: 0,MLVerbatimId,AnswerText,AnswerLang,_seq_id,_tl_item_id,_n_sentences,prob_adequate_staffing,prob_advancement_and_training_opportunities,prob_appropriate_stress_work_assigned_equitably,prob_benefits,...,pred_feel_valued_by_department,pred_flexibility_work_life_balance,pred_good_use_of_skills,pred_have_necessary_tools,pred_have_voice_within_my_institution_valued_member_of_my_institution,pred_internal_processes_effective,pred_parking_transportation,pred_salary_pay,pred_satisfied_with_diversity_progams,pred_supervisor_effectiveness_resolves_staff_issues
0,612674,Accounts Payable department specifically Cyndi Williams & Ann Avery.,English,1,30471,2,0.03912,0.002478,0.006757,0.008032,...,0,0,0,0,0,0,0,0,0,0
1,612674,They never hesitate to help you even when they are swamped with work.,English,2,30472,2,0.32744,0.004417,0.071283,0.004766,...,0,0,0,0,0,0,0,0,0,0
2,612676,N/A at this time,English,1,30473,1,0.012446,0.006098,0.005583,0.001028,...,0,0,0,0,0,0,0,0,0,0
3,612675,VP of Student Affairs office,English,1,30474,1,0.013531,0.003054,0.002093,0.000311,...,0,0,0,0,0,0,0,0,0,0
4,612678,Office of General Counsel.,English,1,30475,2,0.013395,0.00327,0.004644,0.000477,...,0,0,0,0,0,0,0,0,0,1


Save this for testing/developing the meta predictions

In [21]:
res_df.to_csv(STANDARD_THEME_SAW_PATH/'test_saw_themes_predictions_multilabel_hf.csv', index=False)

In [22]:
model_name, inf_labels, train_res

('20210304_verbatim_sent_multilabel_hf_export',
 ['adequate_staffing',
  'advancement_and_training_opportunities',
  'appropriate_stress_work_assigned_equitably',
  'benefits',
  'better_ways_recognized_participate_in_decisions',
  'career_advancement',
  'committed_to_diversity',
  'communicates_essential_information',
  'ethical_conduct_perform_responsibilities_spirit_of_cooperation',
  'evaluated_fairly',
  'experienced_discrimination',
  'facilities_workspace_safety',
  'faculty_value_contributions',
  'favoritism_cliques',
  'fear_of_retaliation_negative_consequences',
  'feel_valued_by_department',
  'flexibility_work_life_balance',
  'good_use_of_skills',
  'have_necessary_tools',
  'have_voice_within_my_institution_valued_member_of_my_institution',
  'internal_processes_effective',
  'parking_transportation',
  'salary_pay',
  'satisfied_with_diversity_progams',
  'supervisor_effectiveness_resolves_staff_issues'],
 {'valid_loss': 0.16207988560199738,
  'accuracy_multi': 0.92573

In [23]:
meta_inf_df = build_meta_inf_df(res_df, theme_prob_threshold=train_res['f05']['threshold'])

meta_res_df, model_name, meta_train_res, _ = get_meta_theme_preds(meta_inf_df, 
                                                                  device=device, 
                                                                  train_config_updates={ 
                                                                      'orig_txt_cols': ['theme', job_d['txt_col']] 
                                                                  })

print(meta_res_df.shape, len(inf_labels))

0
(2151, 13) 25


In [24]:
meta_res_df.head()

Unnamed: 0,index,MLVerbatimId,AnswerText,AnswerLang,_seq_id,_tl_item_id,_n_sentences,theme,theme_prob,url_friendly_theme,prob_avg_sentiment,prob_is_example,pred_is_example
0,191,612608,Get that department more resources PLEASE!,English,4,30655,4,Adequate Staffing,0.424475,AdequateStaffing,2.156372,0.001103,0
1,798,613014,"The team had to start an involutary enrollment migration from one campus to another, while preparing and completing a program accreditation visit, while at the same time hiring and training new faculty to replace recent retirees, and involutarily moving office location (to a location that was not and is not setup for office use) with minimal impact to our students, maintaining our academic schedule and attending new recruitment events that support the Chancellor's missions to prepare more teachers and recruit more diverse pool of teachers .",English,1,31247,2,Adequate Staffing,0.565492,AdequateStaffing,2.386427,0.002057,0
2,800,613019,Human resources/personnel management is critical and often difficult for divisions/departments to handle when problems arise.,English,1,31249,2,Adequate Staffing,0.450304,AdequateStaffing,2.012432,0.001548,0
3,880,612993,"The CO team has been a valuable resource and support to our campus which has/is experiencing high turnover, resource issues and lack of campus support to meet CSU initiatives.",English,1,31332,1,Adequate Staffing,0.459479,AdequateStaffing,4.221957,0.002297,0
4,912,612952,"Most of CSULB Procurement and Contractual staff go above and beyond the call of duty, daily.",English,1,31363,4,Adequate Staffing,0.499364,AdequateStaffing,3.993353,0.015695,0


In [25]:
model_name, meta_train_res

('20210308_verbatim_standard_theme_meta_multilabel_hf_export',
 {'valid_loss': 0.36898770928382874,
  'sentiment_mse': 0.3652612566947937,
  'is_example_acc': 0.9932935833930969,
  'is_example_f05': {'threshold': 0.29000000000000004,
   'score': 0.1724137931034483},
  'is_example_f1': {'threshold': 0.28, 'score': 0.17142857142857146},
  'is_example_f2': {'threshold': 0.28, 'score': 0.24193548387096775},
  'sentiment': {'mae': 0.4495120048522949,
   'mse': 0.3652612864971161,
   'rmse': 0.6043685022377623}})

## Standard Themes - CSS

In [26]:
res_df, model_name, train_res, inf_labels = get_css_theme_preds(inf_df, device=device, 
                                                                train_config_updates={ 
                                                                    'orig_txt_col': job_d['txt_col'] 
                                                                })

0


In [27]:
res_df.head()

Unnamed: 0,MLVerbatimId,AnswerText,AnswerLang,_seq_id,_tl_item_id,_n_sentences,prob_accessible_to_customers,prob_consistency_in_policies_information,prob_cost_fees,prob_courteous_professional_staff,...,pred_helpful_staff,pred_knowledgeable_staff,pred_moving_in_a_positive_direction,pred_overall_satisfaction,pred_process_improvement,pred_provides_effective_advice_guidance,pred_provides_training_on_processes_applications,pred_resolves_problems_effectively,pred_responds_to_requests_within_an_acceptable_time,pred_understands_my_needs_and_requirements
0,612674,Accounts Payable department specifically Cyndi Williams & Ann Avery.,English,1,30471,2,0.156758,0.01462,0.001177,0.019363,...,0,0,0,0,0,0,0,0,0,0
1,612674,They never hesitate to help you even when they are swamped with work.,English,2,30472,2,0.007335,0.001945,0.000877,0.019079,...,1,0,0,0,0,0,0,0,0,0
2,612676,N/A at this time,English,1,30473,1,0.162086,0.000752,0.000963,0.001177,...,0,0,0,1,0,0,0,0,0,0
3,612675,VP of Student Affairs office,English,1,30474,1,0.191918,0.002234,0.000184,0.029448,...,0,0,0,0,0,0,0,0,0,0
4,612678,Office of General Counsel.,English,1,30475,2,0.074725,0.007196,0.000336,0.006518,...,0,0,0,0,0,0,0,0,0,0


In [28]:
model_name, inf_labels, train_res

('20210306_verbatim_standard_theme_css_multilabel_hf_export',
 ['accessible_to_customers',
  'consistency_in_policies_information',
  'cost_fees',
  'courteous_professional_staff',
  'effective_communications',
  'effectively_uses_websites_online_documentation',
  'helpful_staff',
  'knowledgeable_staff',
  'moving_in_a_positive_direction',
  'overall_satisfaction',
  'process_improvement',
  'provides_effective_advice_guidance',
  'provides_training_on_processes_applications',
  'resolves_problems_effectively',
  'responds_to_requests_within_an_acceptable_time',
  'understands_my_needs_and_requirements'],
 {'valid_loss': 0.1384221762418747,
  'accuracy_multi': 0.9382886290550232,
  'fbeta_score': 0.6301702408610497,
  'precision_score': 0.5861728086477542,
  'recall_score': 0.6925,
  'roc_auc_score': 0.8195069292762297,
  'opt_th': 0.23999999999999994,
  'f05': {'threshold': 0.4800000000000001, 'score': 0.6526789780639173},
  'f1': {'threshold': 0.24000000000000005, 'score': 0.6301702

In [29]:
meta_inf_df = build_meta_inf_df(res_df, theme_prob_threshold=train_res['f05']['threshold'])

meta_res_df, model_name, meta_train_res, _ = get_meta_theme_preds(meta_inf_df, 
                                                                  device=device, 
                                                                  train_config_updates={ 
                                                                      'orig_txt_cols': ['theme', job_d['txt_col']] 
                                                                  })

print(meta_res_df.shape, len(inf_labels))

0
(3761, 13) 16


In [30]:
meta_res_df.head()

Unnamed: 0,index,MLVerbatimId,AnswerText,AnswerLang,_seq_id,_tl_item_id,_n_sentences,theme,theme_prob,url_friendly_theme,prob_avg_sentiment,prob_is_example,pred_is_example
0,60,612632,We're almost there!,English,2,30528,3,Accessible To Customers,0.524623,AccessibleToCustomers,3.764637,0.001769,0
1,178,612601,When you email her for info or data she gets back almost immediately.,English,5,30642,8,Accessible To Customers,0.690319,AccessibleToCustomers,2.950185,0.001319,0
2,311,612561,Meaghan Smith,English,1,30769,1,Accessible To Customers,0.517591,AccessibleToCustomers,3.046852,0.000505,0
3,393,612834,Facility Services/Mailroom,English,1,30844,1,Accessible To Customers,0.51378,AccessibleToCustomers,3.059287,0.000459,0
4,400,612828,Security\r\nHR and Evelyn,English,1,30850,1,Accessible To Customers,0.48945,AccessibleToCustomers,3.019897,0.00128,0


In [31]:
model_name, meta_train_res

('20210308_verbatim_standard_theme_meta_multilabel_hf_export',
 {'valid_loss': 0.36898770928382874,
  'sentiment_mse': 0.3652612566947937,
  'is_example_acc': 0.9932935833930969,
  'is_example_f05': {'threshold': 0.29000000000000004,
   'score': 0.1724137931034483},
  'is_example_f1': {'threshold': 0.28, 'score': 0.17142857142857146},
  'is_example_f2': {'threshold': 0.28, 'score': 0.24193548387096775},
  'sentiment': {'mae': 0.4495120048522949,
   'mse': 0.3652612864971161,
   'rmse': 0.6043685022377623}})

## Cleanup

In [36]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_utils.ipynb.
Converted 01_verbatims-sentiment.ipynb.
Converted 02a_verbatims-standard-themes-saw.ipynb.
Converted 02b_verbatims-standard-themes-css.ipynb.
Converted 02c_verbatims-standard-themes-meta.ipynb.
Converted 99_inference.ipynb.
Converted index.ipynb.


## Playground