# Active learning - part 1 - model inference (sampling)

1. Run `Step 1: Model Inference and Data Sampling`
2. Run `Step 2: Generate brat data for annotation`
3. Run `Step 3: Send BRAT data to the BRAT server`
4. Run `Step 4: Send notification to annotator via email`

In [1]:
import sys
sys.path.append("../../src/")
sys.path.append("../../../../git_clone_repos/fast-coref/src/")

In [2]:
from hydra import compose, initialize
from omegaconf import OmegaConf

config = None
with initialize(version_base=None, config_path="../config", job_name="active_learning"):
    config = compose(config_name="active_learning")
# print(OmegaConf.to_yaml(config))

## Step 1: Model Inference and Data Sampling

In [3]:
import time
from collections import defaultdict
from active_learning.sampling import model_inference, sampling_topk_doc_by_MDE, log_runtime_info
from active_learning.utils import get_previous_labeled_pool_dict, remove_labeled_data_from_sampling_dict

# Average sampling from each section
target_sections = ["findings", "impression"]
sampling_nums = {
    "findings": config.sampling_num // 2,
    "impression": config.sampling_num - (config.sampling_num // 2),
}
startTime = time.time()
log_dict = defaultdict(dict)

#####
# Here's the main functions
extra_info_dict = model_inference(config, target_sections, log_dict)

## Remove previous sampled doc records from `extra_info_dict`
previous_sampled_doc_dict = get_previous_labeled_pool_dict(config)
if previous_sampled_doc_dict is not None:
    remove_labeled_data_from_sampling_dict(previous_sampled_doc_dict, extra_info_dict)

curr_sampled_doc_dict = sampling_topk_doc_by_MDE(extra_info_dict, sampling_nums, log_dict)
#####

# Save labeled pool info
with open(config.output.log.labeled_pool_info_file, "w", encoding="utf-8") as f:
    if previous_sampled_doc_dict is not None:
        for section_name, doc_list in previous_sampled_doc_dict.items():
            f.write("\n".join([f"{section_name}/{doc_name}" for doc_name in doc_list]))
            f.write("\n")
    for section_name, doc_list in curr_sampled_doc_dict.items():
        f.write("\n".join([f"{section_name}/{doc_name}" for doc_name in doc_list]))
        f.write("\n")

log_out = log_runtime_info(config, log_dict, startTime)

  return torch._C._cuda_getDeviceCount() > 0
2023-05-22 00:41:48,623 - Processing section: findings
10it [00:11,  1.19s/it]
2023-05-22 00:42:00,525 - Processing section: impression
10it [00:11,  1.20s/it]


## Step 2: Generate brat data for annotation

In [4]:
from active_learning.process_brat_annotation import prepare_brat, copy_brat_configs

brat_output_dir = prepare_brat(config, curr_sampled_doc_dict)
copy_brat_configs(config.brat_config.base_dir, brat_output_dir)

print("Data for BRAT annotation has been created to: \n", brat_output_dir)

Data for BRAT annotation has been created to: 
 /home/yuxiangliao/PhD/workspace/VSCode_workspace/str_rep_coref/output/mimic_cxr/active_learning/iter_1/brat_unfinished


## Step 3: Send BRAT data to the BRAT server

In [5]:
import os
from active_learning.communicate_brat_server import RemoteConnection

brat_unfinished_dir = config.output.brat.unfinished_dir
brat_server_dir = config.remote_server.brat.data_dir

hostname = config.remote_server.brat.hostname
username = config.remote_server.brat.username
password = config.remote_server.brat.password
connection = RemoteConnection(hostname, username, password)
connection.put_all(
    brat_unfinished_dir,
    brat_server_dir,
)

base_name_old = os.path.basename(brat_unfinished_dir)
base_name_new = f"iter_{config.current_iter}"
connection.rename(os.path.join(brat_server_dir, base_name_old), os.path.join(brat_server_dir, base_name_new))
connection.close_client()

2023-05-22 00:43:33,144 - Connected (version 2.0, client OpenSSH_8.2p1)
2023-05-22 00:43:34,083 - Authentication (publickey) failed.
2023-05-22 00:43:34,461 - Authentication (password) successful!
2023-05-22 00:43:35,243 - [chan 0] Opened sftp connection (server version 3)
100%|██████████| 2/2 [00:01<00:00,  1.28it/s]
100%|██████████| 10/10 [00:08<00:00,  1.23it/s]
100%|██████████| 10/10 [00:07<00:00,  1.33it/s]
3it [00:18,  6.02s/it]
2023-05-22 00:43:54,044 - [chan 0] sftp session closed.


## Step 4: Send notification to annotator via email

In [6]:
import smtplib
from email.mime.text import MIMEText


def send_mail(to_emails: list, content: str, subject="VISA slot available",
              server='smtp.qq.com', from_email='', password=''):

    message = MIMEText(content, 'plain', 'utf-8')  # 内容, 格式, 编码
    message['From'] = from_email
    message['To'] = ",".join(to_emails)
    message['Subject'] = subject

    try:
        server = smtplib.SMTP_SSL("smtp.qq.com", 465)
        server.login(from_email, password)
        server.sendmail(from_email, from_email, message.as_string())
        server.quit()
        print('successfully sent the mail.')
    except smtplib.SMTPException as e:
        print(e)
        
        
content = f"BRAT is ready for iter_{config.current_iter}, sampling_num: {config.sampling_num}"
send_mail(['to email you want to send'], content=content, subject=content)


successfully sent the mail.
