# Python Projet 5 - Prédiction des tags 

## Multilabel classification avec la méthode One-vs-Rest

Dans le cadre du cours OpenClassrooms Machine Learning et du projet d'évaluation #5, ce notebook présente les différents modèles de prédiction des tags ainsi qu'une comparaison avec l'analyse non supervisée obtenue par LDA. 

Author : Vincent Arrigoni, 04/2023 

Regexp : https://www.debuggex.com/cheatsheet/regex/python

Tout savoir :https://datascientest.com/regex-tout-savoir

Test regexp : https://regex101.com/

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# import of libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MultiLabelBinarizer 
from sklearn.multioutput import MultiOutputClassifier 
from sklearn.metrics import precision_score, recall_score, jaccard_score, classification_report
from gensim.models import Word2Vec
import re
from tqdm import tqdm
from time import time
from sklearn.utils import shuffle

# import tensorflow as tf
# import tensorflow_hub as hub
# import tensorflow_text as text
# from official.nlp import optimization  # to create AdamW optimizer

In [3]:
data = pd.read_csv('/content/drive/MyDrive/OCS/Projet5/msg_preprocessed.csv')
data.head(10)

Unnamed: 0.1,Unnamed: 0,Text,Tags,Token,Tags_lst,Tags_lst_new,Token_lst_new
0,0,how do I calculate a rolling idxmax<p>consider...,<python><pandas><numpy><dataframe><series>,"['calcul', 'roll', 'idxmax', 'consid', 'pd', '...","['python', 'pandas', 'numpy', 'dataframe', 'se...",['python'],"['calcul', 'roll', 'idxmax', 'consid', 'pd', '..."
1,1,Object-Oriented Callbacks for C++?<p>Is there ...,<c++><oop><callback><pointer-to-member><eiffel>,"['object', 'orient', 'callback', 'c++', 'libra...","['c++', 'oop', 'callback', 'pointer-to-member'...",['c++'],"['object', 'orient', 'callback', 'c++', 'libra..."
2,2,TDD and ADO.NET Entity Framework<p>I've been p...,<unit-testing><entity-framework><ado.net><tdd>...,"['tdd', 'ado', '.net', 'entiti', 'framework', ...","['unit-testing', 'entity-framework', 'ado.net'...",['unit-testing'],"['tdd', 'ado', '.net', 'entiti', 'framework', ..."
3,3,Better techniques for trimming leading zeros i...,<sql><sql-server><sql-server-2005><tsql><string>,"['better', 'techniqu', 'trim', 'lead', 'zero',...","['sql', 'sql-server', 'sql-server-2005', 'tsql...","['sql', 'sql-server', 'string']","['better', 'techniqu', 'trim', 'lead', 'zero',..."
4,4,"""No X11 DISPLAY variable"" - what does it mean?...",<java><linux><variables><x11><headless>,"['x11', 'display', 'variabl', 'mean', 'tri', '...","['java', 'linux', 'variables', 'x11', 'headless']","['java', 'linux']","['x11', 'display', 'variabl', 'mean', 'tri', '..."
5,5,Why doesn't .NET find the OpenSSL.NET dll?<p><...,<c#><.net><dll><dllimport><dllnotfoundexception>,"['.net', 'find', 'openssl', '.net', 'dll', 'ed...","['c#', '.net', 'dll', 'dllimport', 'dllnotfoun...","['c#', '.net']","['.net', 'find', 'openssl', '.net', 'dll', 'ed..."
6,6,Iterating through/Parsing JSON Object via Java...,<javascript><json><jquery><parsing><loops>,"['iter', 'pars', 'json', 'object', 'via', 'jav...","['javascript', 'json', 'jquery', 'parsing', 'l...","['javascript', 'json', 'jquery']","['iter', 'pars', 'json', 'object', 'via', 'jav..."
7,7,Class inherited from class without default con...,<c++><class><inheritance><constructor><default...,"['class', 'inherit', 'class', 'without', 'defa...","['c++', 'class', 'inheritance', 'constructor',...",['c++'],"['class', 'inherit', 'class', 'without', 'defa..."
8,8,libxml2 vs expat for an XMPP server<p>I'm tryi...,<c><go><xmpp><libxml2><expat-parser>,"['libxml2', 'vs', 'expat', 'xmpp', 'server', '...","['c', 'go', 'xmpp', 'libxml2', 'expat-parser']",['c'],"['libxml2', 'vs', 'expat', 'xmpp', 'server', '..."
9,9,Implications of deploying a Debug build of an ...,<c#><security><debugging><deployment><compiler...,"['implic', 'deploy', 'debug', 'build', 'applic...","['c#', 'security', 'debugging', 'deployment', ...","['c#', 'security', 'debugging']","['implic', 'deploy', 'debug', 'build', 'applic..."


In [4]:
def regexp_Token(x):
  return(re.findall(r'(?:C\+\+)|(?:c\+\+)|(?:c\#)|(?:C\#)|(?:\.net)|(?:\.NET)|\w{2,}', x['Token_lst_new']))
         
def regexp_Tags(x):
  return(re.findall(r'(?:C\+\+)|(?:c\+\+)|(?:c\#)|(?:C\#)|(?:\.net)|(?:\.NET)|\w{2,}', x['Tags_lst_new']))

ds = pd.DataFrame({'Token' : data[:5000].apply(regexp_Token, axis=1), 'Tags' : data[:5000].apply(regexp_Tags, axis=1)})
ds = shuffle(ds)
ds.reset_index(inplace=True)
ds.head(10)

Unnamed: 0,index,Token,Tags
0,4563,"[hibern, ehcach, evict, collect, 2nd, level, c...","[java, hibernate]"
1,1213,"[make, maven, build, goal, site, fail, javadoc...",[java]
2,3670,"[use, string, tupl, key, hashmap, need, java, ...","[java, arrays]"
3,4600,"[iphon, ad, segment, control, toolbar, instead...",[iphone]
4,2239,"[iter, access, perform, stl, map, vs, vector, ...","[c++, performance]"
5,1747,"[imagemagick, imagick, convert, pdf, jpg, use,...","[php, image]"
6,2054,"[spring, boot, find, jsp, view, helo, everyon,...","[java, xml, hibernate]"
7,3189,"[chuck, audio, input, output, error, ubuntu, i...",[linux]
8,3700,"[ab, program, freez, lot, request, whenev, use...",[debugging]
9,23,"[recommend, sql, databas, design, tag, tag, he...",[sql]


## Approche Bag Of Words pour la classification par Régression Logistique 

In [5]:
def dummy(doc):
  return doc
  # https://stackoverflow.com/questions/35867484/pass-tokens-to-countvectorizer
  # pas besoin que countvectorizer fasse un preprocessing ou une tokenization des documents

vectorizer = CountVectorizer(tokenizer=dummy, preprocessor=dummy, binary=True)
X = vectorizer.fit_transform(ds['Token'])
X = pd.DataFrame(X.toarray(), columns=vectorizer.get_feature_names_out())

LabelBinarizer = MultiLabelBinarizer()
y = LabelBinarizer.fit_transform(ds['Tags'])
y = pd.DataFrame(y, columns=LabelBinarizer.classes_)



In [6]:
# Train-Test split 
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Build the model
model = MultiOutputClassifier(LogisticRegression())
# https://scikit-learn.org/stable/modules/multiclass.html

In [7]:
search = GridSearchCV(
    estimator = model,
    param_grid={'estimator__C':np.logspace(-3, 0, 4)},
    scoring = ['accuracy', 'precision_micro', 'recall_micro'],  
    refit = 'precision_micro',
    cv = 5
).fit(X_train, y_train)
# https://datascience.stackexchange.com/questions/107867/how-to-train-multioutput-classification-with-hyperparameter-tuning-in-sklearn
# https://stackoverflow.com/questions/41899132/invalid-parameter-for-sklearn-estimator-pipeline
# https://scikit-learn.org/stable/modules/model_evaluation.html#from-binary-to-multiclass-and-multilabel

# error : Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

print(search.best_estimator_)
print(search.best_params_)
clf_BOW = search.best_estimator_ 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KeyboardInterrupt: ignored

In [None]:
y_pred = clf_BOW.predict(X_test)
print("Classifier report:\n", classification_report(y_test, y_pred, target_names=LabelBinarizer.classes_, zero_division=0))
# Calculate the Jaccard score
score = jaccard_score(y_test, y_pred, average='micro')
print(f"Jaccard score: {score:.4f}")

## Approche TF-IDF pour la classification par Régression Logistique

In [None]:
def dummy(doc):
  return doc

# Binarized before Train/Test split
LabelBinarizer = MultiLabelBinarizer()
y_tfidf = LabelBinarizer.fit_transform(ds['Tags'])
y_tfidf = pd.DataFrame(y_tfidf, columns=LabelBinarizer.classes_)

# Train-Test split 
X_train_tfidf, X_test_tfidf, y_train_tfidf, y_test_tfidf = train_test_split(ds['Token'], y_tfidf)

# Binarized after Train/Test split as the frequency depends on the sample size
# Perform tf-idf on all dataset could induce data leak
tfidf_vectorizer = TfidfVectorizer(preprocessor=dummy, tokenizer=dummy)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train_tfidf)
X_test_tfidf = tfidf_vectorizer.transform(X_test_tfidf)
# https://www.kaggle.com/code/neerajmohan/nlp-text-classification-using-tf-idf-features

# Build the model
model = MultiOutputClassifier(LogisticRegression())

In [None]:
search_tfidf = GridSearchCV(
    estimator = model,
    param_grid={'estimator__C':np.logspace(-3, 0, 4)},
    scoring = ['accuracy', 'precision_micro', 'recall_micro'],
    refit = 'precision_micro',
    cv = 5
).fit(X_train_tfidf, y_train_tfidf)

print(search_tfidf.best_estimator_)
print(search_tfidf.best_params_)
clf_TFIDF = search_tfidf.best_estimator_

In [None]:
y_pred_tfidf = clf_TFIDF.predict(X_test_tfidf)
print("Classifier report:\n", classification_report(y_test_tfidf, y_pred_tfidf, target_names=LabelBinarizer.classes_, zero_division=0))
# Calculate the Jaccard score
score = jaccard_score(y_test_tfidf, y_pred_tfidf, average='micro')
print(f"Jaccard score: {score:.4f}")

## Word Embeddings with Word2Vec and Logistic Regression

In [None]:
# Word2Vec

# Binarized before Train/Test split
LabelBinarizer = MultiLabelBinarizer()
y_wrd2vec = LabelBinarizer.fit_transform(ds['Tags'])
# y_wrd2vec = pd.DataFrame(y_wrd2vec, columns=LabelBinarizer.classes_)

# Train-Test split 
X_train_wrd2vec, X_test_wrd2vec, y_train_wrd2vec, y_test_wrd2vec = train_test_split(ds['Token'], y_wrd2vec)

# Train the word2vec model 
w2v_model = Word2Vec(X_train_wrd2vec, 
                     vector_size=100,
                     window=5, 
                     min_count=2)

In [None]:
len(w2v_model.wv.index_to_key) # all the words that appear at least twice in the document

In [None]:
w2v_model.wv[1] # embedding of the second word of the first sentence

In [None]:
w2v_model.wv.most_similar(positive=["python"]) 

In [None]:
words = set(w2v_model.wv.index_to_key) # vocabulary correspondance
# https://medium.com/@dilip.voleti/classification-using-word2vec-b1d79d375381

X_train_vect = np.array([np.array([w2v_model.wv[i] for i in ls if i in words]) for ls in X_train_wrd2vec]) # concatenate word embeddings of the same sentence in the same element of X_train_vect

X_test_vect = np.array([np.array([w2v_model.wv[i] for i in ls if i in words])  for ls in X_test_wrd2vec])

In [None]:
# Compute sentence embeddings by averaging the word embeddings for the words contained in the sentence
X_train_vect_avg = []
for v in X_train_vect:
    if v.size:
        X_train_vect_avg.append(v.mean(axis=0))
    else:
        X_train_vect_avg.append(np.zeros(100, dtype=float))
        
X_test_vect_avg = []
for v in X_test_vect:
    if v.size:
        X_test_vect_avg.append(v.mean(axis=0))
    else:
        X_test_vect_avg.append(np.zeros(100, dtype=float))

In [None]:
# Build the model
model_WRD2VEC = MultiOutputClassifier(LogisticRegression(solver='newton-cholesky'))
model_WRD2VEC.fit(X_train_vect_avg, y_train_wrd2vec)

In [None]:
# Instantiate and fit a basic Random Forest model on top of the vectors
from sklearn.ensemble import RandomForestClassifier
rf = MultiOutputClassifier(RandomForestClassifier())
rf_model = rf.fit(X_train_vect_avg, y_train_wrd2vec)

In [None]:
# Use the trained model to make predictions on the test data
y_pred = rf_model.predict(X_test_vect_avg)

In [None]:
y_pred_wrd2vec = model_WRD2VEC.predict(X_test_vect_avg)
print("Classifier report:\n", classification_report(y_test_wrd2vec, y_pred_wrd2vec, target_names=LabelBinarizer.classes_, zero_division=0))
# Calculate the Jaccard score
score = jaccard_score(y_test_wrd2vec, y_pred_wrd2vec, average='samples')
print(f"Jaccard score: {score:.4f}")

## BERT NLP algorithm

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from transformers import BertTokenizer, TFBertModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, padding=True)
model_BERT = TFBertModel.from_pretrained("bert-base-uncased")
# text = "R:eplace me by any text you'd like."
# encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input)
# print(output)

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
# from transformers import AutoModel # For BERTs
# # from transformers import AutoModeForSequenceClassification # For models fine-tuned on MNLI
# from transformers import AutoTokenizer

# tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-small") # v1 and v2
# model = AutoModel.from_pretrained("prajjwal1/bert-small") # v1 and v2

In [None]:
# ID ENCODING BY HAND 
def bert_encode(token_lst, tokenizer, max_len=64):
  all_tokens = []
  all_masks = []
  all_segments = []

  for text in token_lst:

    text = text[:max_len-2] # How to deal with truncation ? 
    text = ["[CLS]"] + text + ["[SEP]"] # special tokens
    pad_len = max_len - len(text) # necesary PAD length 

    token_id = tokenizer.convert_tokens_to_ids(text) + [0] * pad_len 
    pad_masks = [1] * len(text) + [0] * pad_len
    segment_ids = [0] * max_len
        
    all_tokens.append(token_id)
    all_masks.append(pad_masks)
    all_segments.append(segment_ids)
  
  return {'input_ids' : np.array(all_tokens),
          'token_type_ids' : np.array(all_masks),
          'attention_mask' : np.array(all_segments)}

emb_BERT = np.array([])
batch_size=500
for i in range(len(ds)//batch_size):
  # Batch it because otherwise too big for the model to train on 
  encoded_tokens = bert_encode(ds.loc[i*batch_size:(i+1)*batch_size-1,'Token'], tokenizer)
  # print(encoded_tokens)
  # print(len(encoded_tokens))
  embeddings = model(encoded_tokens)
  if len(emb_BERT)==0:
    emb_BERT = np.array(embeddings['pooler_output'])
  else:
    emb_BERT = np.vstack((emb_BERT, np.array(embeddings['pooler_output'])))
    # De-batching it to usual format 

In [None]:
print(len(embeddings['pooler_output']))

500


In [None]:
# OVERALL ENCODING
def BERT_encode(batch):
  batch = list(map(lambda x : ' '.join(x) ,batch))
  encoded_input = tokenizer(batch, return_tensors='tf', truncation=True, padding=True)
  # grouped_inputs = {'input_ids':tf.cast(encoded_input.input_ids, tf.int16),
  #                   'token_type_ids' : tf.cast(encoded_input.token_type_ids, tf.int16),
  #                   'attention_mask' : tf.cast(encoded_input.attention_mask, tf.int16)}
  # print(tf.cast(encoded_input.input_ids, tf.int16))
  # # print(encoded_input.astype(int16))
  # print(len(encoded_input))
  return model_BERT(encoded_input)

emb_BERT = np.array([])
# Batch the dataset to compute embeddings more easily 
batch_size = 100
max_len = 64 # max number of word to avoid Out Of Memory issue
nb_batches = len(ds)//batch_size
for i in tqdm(range(len(ds)//batch_size)):
  # Batch it because otherwise too big for the model to train on 
  embeddings = BERT_encode(ds.loc[i*batch_size:(i+1)*batch_size-1,'Token'].str.slice(0,max_len))
  # embeddings = model_USE(encoded_tokens)
  if emb_BERT.size==0:
    emb_BERT = np.array(embeddings['pooler_output'])
    # print(emb_BERT.size)
  else:
    emb_BERT = np.vstack((emb_BERT, np.array(embeddings['pooler_output'])))
    # print('merging')
    # print(i)
    # print(emb_BERT.size)
    # De-batching it to usual format

  2%|▏         | 1/50 [00:00<00:29,  1.65it/s]

76800


100%|██████████| 50/50 [00:26<00:00,  1.89it/s]


In [None]:
x = emb_BERT
y = ds['Tags']

# Initialize MultiLabelBinarizer
LabelBinarizer = MultiLabelBinarizer()

y = LabelBinarizer.fit_transform(y)

X_train_BERT, X_test_BERT, y_train_BERT, y_test_BERT = train_test_split(x, y, test_size=0.2, random_state=42)

import warnings
from sklearn.exceptions import ConvergenceWarning

# Suppress specific warnings
warnings.filterwarnings('ignore', category=ConvergenceWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

best_params_SB = {'estimator__C': 0.005, 'estimator__max_iter': 500, 'estimator__penalty': 'none', 'estimator__solver': 'lbfgs'}

# Remove the 'estimator__' prefix from the keys of best_params
best_params_cleaned_SB = {key.replace('estimator__', ''): value for key, value in best_params_SB.items()}

# Train the OneVsRestClassifier with the best parameters on the entire training data
model_BERT = MultiOutputClassifier(LogisticRegression(**best_params_cleaned_SB, random_state=42))

In [None]:
model_BERT.fit(X_train_BERT, y_train_BERT)

In [None]:
# Make predictions on the testing data
y_pred_BERT = model_BERT.predict(X_test_BERT)

# Evaluate the classifier using classification_report
print("Classifier report:\n", classification_report(y_test_BERT, y_pred_BERT, target_names=LabelBinarizer.classes_, zero_division=0))

# Calculate the Jaccard score
score = jaccard_score(y_test_BERT, y_pred_BERT, average='samples')
print(f"Jaccard score: {score:.4f}")

# Reset warnings to default
warnings.resetwarnings()

Classifier report:
                 precision    recall  f1-score   support

          .net       0.54      0.42      0.47       137
          ajax       0.50      0.29      0.36        14
     algorithm       0.25      0.36      0.29        14
       android       0.56      0.58      0.57        53
        arrays       0.42      0.29      0.34        17
           asp       0.35      0.33      0.34        40
          bash       0.40      0.22      0.29        18
            c#       0.46      0.37      0.41       158
           c++       0.52      0.48      0.50        97
         cocoa       0.05      0.08      0.06        13
           css       0.30      0.33      0.31        24
      database       0.17      0.18      0.17        22
     debugging       0.00      0.00      0.00        15
        django       0.25      0.29      0.27        14
           gcc       0.50      0.29      0.36        14
     hibernate       0.09      0.10      0.10        10
          html       0.29  

## USE NLP Algorithm

In [None]:
import tensorflow_hub as hub 

model_USE = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4')

In [None]:
def USE_encode(batch):
  batch = list(map(lambda x : ' '.join(x) ,batch))
  return model_USE(batch).numpy()

emb_USE = np.array([])
# Batch the dataset to compute embeddings more easily 
batch_size = 200
nb_batches = len(ds)//batch_size
for i in range(len(ds)//batch_size):
  # Batch it because otherwise too big for the model to train on 
  embeddings = USE_encode(ds.loc[i*batch_size:(i+1)*batch_size-1,'Token'])
  # embeddings = model_USE(encoded_tokens)
  if len(emb_USE)==0:
    emb_USE = np.array(embeddings)
  else:
    emb_USE = np.vstack((emb_USE, np.array(embeddings)))
    # De-batching it to usual format

In [None]:
embeddings.shape

(200, 512)

In [None]:
emb_USE.shape

(5000, 512)

In [None]:
x = emb_USE
y = ds['Tags']

# Initialize MultiLabelBinarizer
LabelBinarizer = MultiLabelBinarizer()

y = LabelBinarizer.fit_transform(y)

X_train_USE, X_test_USE, y_train_USE, y_test_USE = train_test_split(x, y, test_size=0.2, random_state=42)

import warnings
from sklearn.exceptions import ConvergenceWarning

# Suppress specific warnings
warnings.filterwarnings('ignore', category=ConvergenceWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

best_params_SB = {'estimator__C': 0.005, 'estimator__max_iter': 500, 'estimator__penalty': 'none', 'estimator__solver': 'lbfgs'}

# Remove the 'estimator__' prefix from the keys of best_params
best_params_cleaned_SB = {key.replace('estimator__', ''): value for key, value in best_params_SB.items()}

# Train the OneVsRestClassifier with the best parameters on the entire training data
USE_clf = MultiOutputClassifier(LogisticRegression(**best_params_cleaned_SB, random_state=42))

  and should_run_async(code)


In [None]:
USE_clf.fit(X_train_USE, y_train_USE)

  and should_run_async(code)


In [None]:
# Make predictions on the testing data
y_pred_USE = USE_clf.predict(X_test_USE)

# Evaluate the classifier using classification_report
print("Classifier report:\n", classification_report(y_test_USE, y_pred_USE, target_names=LabelBinarizer.classes_, zero_division=0))

# Calculate the Jaccard score
score = jaccard_score(y_test_USE, y_pred_USE, average='samples')
print(f"Jaccard score: {score:.4f}")

# Reset warnings to default
warnings.resetwarnings()

Classifier report:
                 precision    recall  f1-score   support

          .net       0.54      0.56      0.55       131
          ajax       0.22      0.33      0.27         6
     algorithm       0.29      0.38      0.33        13
       android       0.84      0.72      0.77        50
        arrays       0.39      0.54      0.45        13
           asp       0.43      0.44      0.43        41
          bash       0.64      0.39      0.48        18
            c#       0.55      0.55      0.55       155
           c++       0.71      0.61      0.66        98
         cocoa       0.33      0.19      0.24        16
           css       0.36      0.56      0.44        16
      database       0.30      0.27      0.29        22
     debugging       0.36      0.38      0.37        13
        django       0.85      0.58      0.69        19
           gcc       0.30      0.25      0.27        12
     hibernate       0.25      0.50      0.33         4
          html       0.48  

  and should_run_async(code)
