In [1]:
!nvidia-smi

Wed Mar 30 09:41:14 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   52C    P0    65W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import sys
if 'google.colab' in sys.modules:
    !pip install -Uqq transformers datasets wandb bitsandbytes-cuda111 rouge_score

## Setup

In [3]:
import os
from pathlib import Path
import random
import pandas as pd
from IPython.display import display, HTML

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from transformers.trainer_pt_utils import get_parameter_names
from datasets import DatasetDict, Dataset, load_metric

import bitsandbytes as bnb

Training hyperparameters

In [4]:
# model
model_id = "EleutherAI/gpt-neo-125M" #"EleutherAI/gpt-neo-1.3B"
output_dir = "./gpt-neo-therapist-small" #"./gpt-neo-therapist"
# data
max_length = 1024
bs = 4
val_bs = 4
eff_bs = 256
# training
lr = 1e-4 #8e-5

# Data

## Get data

In [5]:
!mkdir ./data && cd data && wget https://raw.githubusercontent.com/nbertagnolli/counsel-chat/master/data/20200325_counsel_chat.csv
!ls -hl data

mkdir: cannot create directory ‘./data’: File exists
total 3.2M
-rw-r--r-- 1 root root 3.2M Mar 30 08:39 20200325_counsel_chat.csv


## EDA

In [6]:
df = pd.read_csv("data/20200325_counsel_chat.csv", index_col=0)

In [7]:
n_qs = len(df.questionID.unique())
n_topics = len(df.topic.unique())
print(f"Total number of samples {df.shape[0]}, {n_qs} unique questions on {n_topics} topics")

Total number of samples 2129, 815 unique questions on 31 topics


In [8]:
# standardise spaces
df["questionTitle"] = df.questionTitle.map(lambda x: " ".join(x.split()))
df["questionText"] = df.questionText.map(lambda x: " ".join(x.split()))
df["answerText"] = df.answerText.map(lambda x: " ".join(x.split()))

def mb_add_period(text):
    if text[-1] not in {"?", ".", "!"}:
        return text + "."
    return text

df["questionTitle"] = df.questionTitle.map(mb_add_period)
assert (df.questionTitle.str.endswith("?") | df.questionTitle.str.endswith(".") | df.questionTitle.str.endswith("!")).all()

df["prompt"] = "Answer like a therapist:\n" + df.questionTitle + " " + df.questionText + "\nAnswer: "
df["fullText"] = df.prompt + df.answerText
df.head(3)

Unnamed: 0,questionID,questionTitle,questionText,questionLink,topic,therapistInfo,therapistURL,answerText,upvotes,views,split,prompt,fullText
0,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,"Sherry Katz, LCSWCouples and Family Therapist,...",https://counselchat.com/therapists/sherry-katz...,"If everyone thinks you're worthless, then mayb...",1,2899,train,Answer like a therapist:\nCan I change my feel...,Answer like a therapist:\nCan I change my feel...
1,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,"Robin Landwehr, DBH, LPCC, NCCMental Health in...",https://counselchat.com/therapists/robin-landw...,"Hello, and thank you for your question and see...",1,3514,train,Answer like a therapist:\nCan I change my feel...,Answer like a therapist:\nCan I change my feel...
2,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,Lee KingI use an integrative approach to treat...,https://counselchat.com/therapists/lee-king,First thing I'd suggest is getting the sleep y...,0,5,train,Answer like a therapist:\nCan I change my feel...,Answer like a therapist:\nCan I change my feel...


Let's compute prompt and answer length length in tokens:

In [9]:
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")

def get_length(text):
    return len(tokenizer(text)["input_ids"])

df["prompt_length"] = df.prompt.map(get_length)
df["answer_length"] = df.answerText.map(get_length)
df["full_length"] = df.fullText.map(get_length)

In [10]:
df.describe()

Unnamed: 0,questionID,upvotes,views,prompt_length,answer_length,full_length
count,2129.0,2129.0,2129.0,2129.0,2129.0,2129.0
mean,346.854861,0.489901,198.604979,85.186473,204.780648,288.916862
std,273.706241,0.942429,300.31428,55.650304,151.817316,165.131834
min,0.0,0.0,2.0,23.0,2.0,50.0
25%,78.0,0.0,58.0,54.0,106.0,179.0
50%,321.0,0.0,107.0,75.0,164.0,248.0
75%,588.0,1.0,210.0,102.0,252.0,342.0
max,884.0,9.0,3514.0,669.0,1108.0,1209.0


In [11]:
for name, group in df.groupby("split"):
    print(f"{name} split contains {len(group)} samples ({len(group.questionID.unique())} unique questions)")

test split contains 117 samples (39 unique questions)
train split contains 1839 samples (695 unique questions)
val split contains 173 samples (81 unique questions)


## Dataset prep

In [12]:
df.rename(columns={"answerText":"answer"}, inplace=True)
dataset = DatasetDict(**{
    k: Dataset.from_pandas(df.loc[df.split==k,["prompt", "answer", "topic"]]) for k in df.split.unique()
})

In [13]:
import random
import pandas as pd
from IPython.display import display, HTML
from pprint import pprint

def display_examples(dataset, num_examples=5, mode="pprint"):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    idx = random.sample(list(range(len(dataset))), num_examples)
    texts = [f'{sample["prompt"]}{sample["answer"]}' for sample in dataset.select(idx)]
    df = pd.DataFrame({"text":texts})
    
    if mode=="df":
        display(HTML(df.to_html()))
    elif mode=="pprint":
        for text in texts:
            pprint(text)
            print()
    else:
        raise ValueError(f"{mode} mode is not supported. Please select one of ['df' 'pprint']")

In [14]:
display_examples(dataset["train"])

('Answer like a therapist:\n'
 "Can I change my feeling of being worthless to everyone? I'm going through "
 'some things with my feelings and myself. I barely sleep and I do nothing but '
 "think about how I'm worthless and how I shouldn't be here. I've never tried "
 "or contemplated suicide. I've always wanted to fix my issues, but I never "
 'get around to it. How can I change my feeling of being worthless to '
 'everyone?\n'
 'Answer: It must be really difficult to experience what your going through '
 'right now. It takes a lot of courage to reach out. It sounds like you want '
 'to get better, but perhaps need some help to get over the hump. Many of the '
 'symptoms you have described are consistent with a person who is dealing with '
 'depression. Depression is a treatable condition. Typically, if these '
 'symptoms have persisted for more than two weeks, then it is a good idea to '
 'seek professional help. Someone who is trained in dealing with depression. '
 'Initially, a co

# Training

In [15]:
import wandb

%env WANDB_ENTITY = arampacha
wandb_entity = os.environ["WANDB_ENTITY"]

%env WANDB_PROJECT = ai-therapist
wandb_project = os.environ["WANDB_PROJECT"]

%env WANDB_LOG_MODEL = false
%env WANDB_WATCH = false

env: WANDB_ENTITY=arampacha
env: WANDB_PROJECT=ai-therapist
env: WANDB_LOG_MODEL=false
env: WANDB_WATCH=false


In [16]:
%env TOKENIZERS_PARALLELISM=true

env: TOKENIZERS_PARALLELISM=true


In [17]:
%%capture
if 'google.colab' in sys.modules:
    !curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
    !apt-get install git-lfs -y

In [18]:
!git lfs install

Git LFS initialized.


In [None]:
from huggingface_hub import Repository, notebook_login

notebook_login()

In [20]:
!git config --global credential.helper store

In [21]:
if not os.path.exists(output_dir):
    repo = Repository(local_dir="./gpt-neo-therapist-small", clone_from='arampacha/gpt-neo-therapist-small')
else:
    repo = Repository(local_dir="./gpt-neo-therapist-small")

## Data preprocessing


In [22]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

In [23]:
def tokenize(batch):
    return tokenizer(batch['prompt'], batch["answer"], return_token_type_ids=True, verbose=False, return_length=True, truncation=True, max_length=max_length)

Tokenize the texts in dataset dataset:

In [24]:
column_names = dataset["train"].column_names

dataset = dataset.map(lambda x: {"answer":x["answer"]+tokenizer.eos_token}, batched=False)
tokenized_dataset = dataset.map(tokenize, batched=True, batch_size=100, remove_columns=column_names)

  0%|          | 0/1839 [00:00<?, ?ex/s]

  0%|          | 0/173 [00:00<?, ?ex/s]

  0%|          | 0/117 [00:00<?, ?ex/s]

  0%|          | 0/19 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

## DataCollator

In [25]:
from dataclasses import dataclass
from transformers import PreTrainedTokenizerBase, BatchEncoding
from typing import List, Dict, Union, Optional

@dataclass
class DataCollatorForPromptGeneration:
    """
    Data collator used for line-by-line causal language modeling. Inputs are 
    dynamically padded to the maximum length of a batch if theyare not all of 
    the same length. The labels are constructed according to `toke_type_ids` 
    setting `label=-100` where `token_type_ids == 0` which corresponds to prompt. 

    Args:
        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
            The tokenizer used for encoding the data.
        pad_to_multiple_of (:obj:`int`, `optional`):
            If set will pad the sequence to a multiple of the provided value.
    """

    tokenizer: PreTrainedTokenizerBase
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(
        self, examples: List[Union[List[int], torch.Tensor, Dict[str, torch.Tensor]]]
    ) -> Dict[str, torch.Tensor]:
        batch = self.tokenizer.pad(examples, return_tensors="pt", pad_to_multiple_of=self.pad_to_multiple_of, max_length=self.max_length)
        
        labels = torch.where(batch["token_type_ids"].bool(), batch["input_ids"].clone(), torch.tensor(-100))
        batch["labels"] = labels
        return batch

In [26]:
data_collator = DataCollatorForPromptGeneration(tokenizer=tokenizer)

In [27]:
# data_collator([tokenized_dataset["train"][i] for i in range(8)])["input_ids"]

## Trainer

In [28]:
training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    group_by_length=True,
    evaluation_strategy="epoch",
    per_device_train_batch_size=bs,
    per_device_eval_batch_size=val_bs,
    gradient_accumulation_steps=eff_bs//bs,
    gradient_checkpointing=True,
    learning_rate=lr,
    weight_decay=0.01,
    adam_beta1=0.9,
    adam_beta2=0.98,
    adam_epsilon=1e-08,
    num_train_epochs=5,
    max_steps=-1,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    logging_strategy="steps",
    logging_steps=5,
    save_strategy="epoch",
    save_total_limit=1,
    seed=24,
    fp16=torch.cuda.is_available(),
    dataloader_drop_last=False,
    dataloader_num_workers=2,
    load_best_model_at_end=True,
    report_to="all",
    run_name="gpt-neo-125m"
)

In [29]:
# setting `use_cache=False because it's not compatible with gradient checkpointing
model = AutoModelForCausalLM.from_pretrained(model_id, use_cache=False, low_cpu_mem_usage=False)

In [30]:
decay_parameters = get_parameter_names(model, [torch.nn.LayerNorm])
decay_parameters = [name for name in decay_parameters if "bias" not in name]
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in model.named_parameters() if n in decay_parameters],
        "weight_decay": training_args.weight_decay,
    },
    {
        "params": [p for n, p in model.named_parameters() if n not in decay_parameters],
        "weight_decay": 0.0,
    },
]
optimizer = bnb.optim.Adam8bit(
    params=optimizer_grouped_parameters,
    lr=training_args.learning_rate,
    betas=(training_args.adam_beta1, training_args.adam_beta2),
    eps=training_args.adam_epsilon,
)

In [42]:
import nltk
import numpy as np
nltk.download('punkt')

rouge_metric = load_metric("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    # Remove prompt from predictions and labels.
    predictions = np.where(labels != -100, predictions, tokenizer.pad_token_id)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # Rouge expects a newline after each sentence
    decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
    decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
    
    result = rouge_metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    # Extract a few results
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}
    # Add mean generated length
    # prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    # result["gen_len"] = np.mean(prediction_lens)
    
    return {k: round(v, 4) for k, v in result.items()}

def preprocess_logits_for_metrics(logits, labels):
    return logits.argmax(-1)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [46]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator, 
    train_dataset=tokenized_dataset["train"], 
    eval_dataset=tokenized_dataset["val"],
    compute_metrics=compute_metrics,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
    tokenizer=tokenizer,
    optimizers=(optimizer, None)
)

Using amp half precision backend


In [47]:
out = trainer.train()

The following columns in the training set  don't have a corresponding argument in `GPTNeoForCausalLM.forward` and have been ignored: length. If length are not expected by `GPTNeoForCausalLM.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 1839
  Num Epochs = 5
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 256
  Gradient Accumulation steps = 64
  Total optimization steps = 35
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
0,9.9955,6.819522,18.6047,1.0194,14.8565,17.9774,212.0983
1,6.9729,5.678263,26.3789,3.0779,18.5195,24.8592,203.0925
2,5.2614,5.050646,34.9428,4.921,21.9741,32.1122,206.2775
3,5.0599,4.737245,38.5235,6.2251,23.5923,34.5633,204.2428


The following columns in the evaluation set  don't have a corresponding argument in `GPTNeoForCausalLM.forward` and have been ignored: length. If length are not expected by `GPTNeoForCausalLM.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 173
  Batch size = 4
Saving model checkpoint to ./gpt-neo-therapist-small/checkpoint-7
Configuration saved in ./gpt-neo-therapist-small/checkpoint-7/config.json
Model weights saved in ./gpt-neo-therapist-small/checkpoint-7/pytorch_model.bin
tokenizer config file saved in ./gpt-neo-therapist-small/checkpoint-7/tokenizer_config.json
Special tokens file saved in ./gpt-neo-therapist-small/checkpoint-7/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `GPTNeoForCausalLM.forward` and have been ignored: length. If length are not expected by `GPTNeoForCausalLM.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
0,9.9955,6.819522,18.6047,1.0194,14.8565,17.9774,212.0983
1,6.9729,5.678263,26.3789,3.0779,18.5195,24.8592,203.0925
2,5.2614,5.050646,34.9428,4.921,21.9741,32.1122,206.2775
3,5.0599,4.737245,38.5235,6.2251,23.5923,34.5633,204.2428
4,4.5479,4.673126,39.5028,6.43,24.0091,35.4481,204.1329


Saving model checkpoint to ./gpt-neo-therapist-small/checkpoint-35
Configuration saved in ./gpt-neo-therapist-small/checkpoint-35/config.json
Model weights saved in ./gpt-neo-therapist-small/checkpoint-35/pytorch_model.bin
tokenizer config file saved in ./gpt-neo-therapist-small/checkpoint-35/tokenizer_config.json
Special tokens file saved in ./gpt-neo-therapist-small/checkpoint-35/special_tokens_map.json
Deleting older checkpoint [gpt-neo-therapist-small/checkpoint-21] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./gpt-neo-therapist-small/checkpoint-35 (score: 4.673126220703125).


In [51]:
push_to_hub = True

if push_to_hub:
    trainer.push_to_hub()
else:
    trainer.save_model()
    trainer.create_model_card("gpt-neo-therapist")

/content/./gpt-neo-therapist-small is already a clone of https://huggingface.co/arampacha/gpt-neo-therapist-small. Make sure you pull the latest changes with `repo.git_pull()`.
Saving model checkpoint to ./gpt-neo-therapist-small
Configuration saved in ./gpt-neo-therapist-small/config.json
Model weights saved in ./gpt-neo-therapist-small/pytorch_model.bin
tokenizer config file saved in ./gpt-neo-therapist-small/tokenizer_config.json
Special tokens file saved in ./gpt-neo-therapist-small/special_tokens_map.json


Upload file pytorch_model.bin:   0%|          | 32.0k/526M [00:00<?, ?B/s]

Upload file runs/Mar30_08-42-54_e1bd325e9b71/1648630966.7963965/events.out.tfevents.1648630966.e1bd325e9b71.69…

Upload file runs/Mar30_09-42-05_e1bd325e9b71/1648634444.5432656/events.out.tfevents.1648634444.e1bd325e9b71.29…

Upload file runs/Mar30_09-42-05_e1bd325e9b71/events.out.tfevents.1648634444.e1bd325e9b71.2992.2: 100%|########…

Upload file runs/Mar30_09-22-17_e1bd325e9b71/events.out.tfevents.1648632160.e1bd325e9b71.2698.0: 100%|########…

Upload file runs/Mar30_08-42-54_e1bd325e9b71/events.out.tfevents.1648630966.e1bd325e9b71.69.0: 100%|##########…

Upload file runs/Mar30_09-22-17_e1bd325e9b71/1648632160.617431/events.out.tfevents.1648632160.e1bd325e9b71.269…

Upload file training_args.bin: 100%|##########| 2.98k/2.98k [00:00<?, ?B/s]

Upload file runs/Mar30_09-42-05_e1bd325e9b71/events.out.tfevents.1648634316.e1bd325e9b71.2992.1: 100%|########…

Upload file runs/Mar30_09-42-05_e1bd325e9b71/events.out.tfevents.1648633385.e1bd325e9b71.2992.0: 100%|########…

To https://huggingface.co/arampacha/gpt-neo-therapist-small
   a8e2f90..4635110  main -> main

Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}, 'metrics': [{'name': 'Rouge1', 'type': 'rouge', 'value': 39.5028}]}
To https://huggingface.co/arampacha/gpt-neo-therapist-small
   4635110..2c6bc34  main -> main



In [53]:
!pip install -Uqq onnx onnxruntime

[K     |████████████████████████████████| 5.2 MB 5.5 MB/s 
[?25h

In [54]:
!python -m transformers.onnx --model=$output_dir --feature=causal-lm --atol=5e-4 $output_dir

Using framework PyTorch: 1.10.0+cu111
Overriding 1 configuration item(s)
	- use_cache -> False
  assert batch_size > 0, "batch_size has to be defined and > 0"
Validating ONNX model...
	-[✓] ONNX model output names match reference model ({'logits'})
	- Validating ONNX Model output "logits":
		-[✓] (2, 8, 50257) matches (2, 8, 50257)
		-[✓] all values close (atol: 0.0005)
All good, model saved at: onnx/model.onnx


In [None]:
repo.push_to_hub(commit_message="onnx model")

# Inference

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [63]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model_id = output_dir if os.path.exists(output_dir) else "arampacha/gpt-neo-therapist-small"

model = AutoModelForCausalLM.from_pretrained(output_dir, use_cache=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

In [64]:
df = pd.read_csv("data/20200325_counsel_chat.csv")
df = df[df.split=="test"]

In [65]:
import random

def generate_one(df):
    i = random.randint(0, len(df)-1)
    sample = df.iloc[i, :]

    prompt_text = f"Answer like a therapist:\n {sample.questionTitle} {sample.questionText}\nAnswer: "
    ref_answer = sample.answerText
    prompt = tokenizer(prompt_text, return_tensors="pt")["input_ids"]

    outputs = model.generate(
        prompt.to(device),
        min_length=None,
        max_length=prompt.size(1) + 400, 
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        top_p=0.92,
        top_k=0,
        temperature=1.,
        num_return_sequences=1,
        repetition_penalty=1.,
    )

    print("PROMPT:")
    print(prompt_text)
    print("\nGENERATED ANSWER:")
    print(tokenizer.decode(outputs[0, prompt.size(1):], skip_special_tokens=True))
    print("\nREFERENCE ANSWER:")
    print(ref_answer)

In [66]:
generate_one(df)

PROMPT:
Answer like a therapist:
 Is it normal to go into therapy feeling nervous? I've gone to a couple therapy sessions so far and still everytime I walk in I get nervous and shaky. Is this normal? Should I still be feeling like this?
Answer: 

GENERATED ANSWER:
 Close your eyes, sit down in your chair, and, as you do, gently move your legs apart and sit there. It feels very normal, but it doesn't feel right for you, really. It's hard to listen to people when they talk. If you know you're suffering from depression, don't be afraid to try to explain yourself to them. Most people I know who've talked with children use these things. Even if the child does try to help, she feels uneasy and needs your help, but she's a different person in a different way. However, is it normal to talk to people with tension?
It is very difficult to understand how someone can be you if there's no overall sense of relief in her depression, but if you can see how she may be unhappy and upset, then you'd thin

In [67]:
generate_one(df)

PROMPT:
Answer like a therapist:
 Do I leave my cheating husband or share him with someone? I've been with my husband for eight years now. We have split twice before, and the first time was because he cheated. I took him back months later, and he really tried making it up to me by making a lot of changes. However, we continued to have issues because of my lack of trust. My insecurities and trust issues lead to physical abuse, which lead to us separating again. During that separation, he consoled himself by talking to the same girl he cheated on me with. But we then ended up back together and worked it out for a while until I got pregnant with our second child.
   The baby was a few months old, and he confessed to me about his secret relationship with her. He told me how he could never stop talking to her and how, during our issues, she has been and is the only women he's gone behind my back with (but on a friendship level because she's miles away). He confessed how he fought feelings f