## Let's implement CCS from scratch.
This will deliberately be a simple (but less efficient) implementation to make everything as clear as possible.


links:
- [loading](https://github.com/deep-diver/LLM-As-Chatbot/blob/main/models/alpaca.py)
- [dict](https://github.com/deep-diver/LLM-As-Chatbot/blob/c79e855a492a968b54bac223e66dc9db448d6eba/model_cards.json#L143)
- [prompt_format](https://github.com/deep-diver/PingPong/blob/main/src/pingpong/alpaca.py)

In [1]:

import copy
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from typing import Optional, List, Dict, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch import optim
from torch.utils.data import random_split, DataLoader, TensorDataset

import pickle
import hashlib
from pathlib import Path

from datasets import load_dataset
import datasets

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM, AutoModelForCausalLM
import transformers
from transformers.models.auto.modeling_auto import AutoModel
from transformers import LogitsProcessorList


import lightning.pytorch as pl
from dataclasses import dataclass

from sklearn.linear_model import LogisticRegression
# from scipy.stats import zscore
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from sklearn.preprocessing import RobustScaler

from tqdm.auto import tqdm
import gc
import os

from loguru import logger
logger.add(os.sys.stderr, format="{time} {level} {message}", level="INFO")


transformers.__version__

'4.30.0.dev0'

# Model

Chosing:
- https://old.reddit.com/r/LocalLLaMA/wiki/models
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
- https://github.com/deep-diver/LLM-As-Chatbot/blob/main/model_cards.json


A uncensored and large one might be best for lying.

In [2]:
# leaderboard https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
model_options = dict(
    device_map="auto", 
    # load_in_4bit=True,
    torch_dtype=torch.float16,
    trust_remote_code=True
)


# so I need to use either pythia, stablelm, or tiiuae/falcon-7b-instruct to get dropout...
# moel_repo = "stabilityai/stablelm-tuned-alpha-7b" # poor performance

# https://github.com/deep-diver/LLM-As-Chatbot/blob/main/models/falcon.py
model_repo = "tiiuae/falcon-7b-instruct"
# model_repo = "togethercomputer/RedPajama-INCITE-7B-Instruct"
# model_repo = "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
# model_repo = "OpenAssistant/falcon-7b-sft-top1-696"
lora_repo = None
    
tokenizer = AutoTokenizer.from_pretrained(model_repo)
model = AutoModelForCausalLM.from_pretrained(model_repo, **model_options)

if lora_repo is not None:
    # https://github.com/tloen/alpaca-lora/blob/main/generate.py#L40
    from peft import PeftModel
    model = PeftModel.from_pretrained(
        model,
        lora_repo, 
        torch_dtype=torch.float16,
        device_map='auto',
        lora_dropout=0.2,
    )


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/ubuntu/mambaforge/envs/dlk2/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /home/ubuntu/mambaforge/envs/dlk2/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/ubuntu/mambaforge/envs/dlk2/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
print(tokenizer.pad_token_id)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = 0 # <unk> https://github.com/deep-diver/LLM-As-Chatbot/blob/main/models/alpaca.py
tokenizer.padding_side = "left"

None


# Params

In [4]:
# Params
N_SAMPLES = 130
BATCH_SIZE = 10 # 1 for 30B 3 shot. 2 for 30B 1 shot. 4 for 13B. 15 for 7B.
N_SHOTS = 0
USE_MCDROPOUT = 0.2
dataset_n = 200

try:
    num_layers = len(model.model.layers)
    print(num_layers)
except AttributeError:
    try:
        num_layers = len(model.base_model.model.model.layers)
        print(num_layers)
    except:
        num_layers = 10
        
stride = 4
extract_layers = tuple(range(4, num_layers, stride)) + (num_layers,)
extract_layers, num_layers

((4, 8, 10), 10)

In [5]:
# get the tokens for 0 and 1, we will use these later...
# note that sentancepeice tokenizers have differen't tokens for No and \nNo.
id_n, id_y = tokenizer('\nnegative', add_special_tokens=True)['input_ids'][-1], tokenizer('\npositive', add_special_tokens=True)['input_ids'][-1]
id_n, id_y

(33520, 28265)

In [6]:
tokenizer.decode([id_n, id_y])

'negativepositive'

# Dataset

In [7]:
# Let's just try IMDB for simplicity
dataset = load_dataset("amazon_polarity")
data = dataset['test']

Found cached dataset amazon_polarity (/home/ubuntu/.cache/huggingface/datasets/amazon_polarity/amazon_polarity/3.0.0/a27b32b7e7b88eb274a8fa8ba0f654f1fe998a87c22547557317793b5d2772dc)


  0%|          | 0/2 [00:00<?, ?it/s]

# Prompt

- Lillian Wang's guide https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/
- Elvi's guide https://www.promptingguide.ai/techniques/fewshot


In [8]:
example_prompts = dataset['train'].shuffle()

In [9]:

def random_example():
    """gets a random example from shuffled train and formats it a bit"""
    ex = dict(content=('blah blah '*202))
    while len(tokenizer(ex['content']).input_ids) > 400:
        i = np.random.randint(example_prompts.num_rows)
        ex = example_prompts[i]
    input = f"Title: \"{ex['title']}\". Content: \"{ex['content']}\""
    return input, ex['label']==1

print(random_example()[0])

Title: "wild fire 7". Content: "I is an outstanding and educational movie about a mothers love and never giving up hope and how they were brought together under extreme circumstances where she could save her daughters life from a fire be reunited and save her daughters uncle from the fire"


In [10]:
prefix_lie = prefix_true = prefix = f"""Ah, you're referring to the classic puzzle of the two guards. Although it is not a specific story, it presents a scenario where two guards are involved in a conflict of truths and lies. Here's the setup:

There are two guards standing at two separate doors. One door leads to safety, while the other leads to danger. One guard always tells the truth, and the other guard always lies. You don't know which guard is which or which door leads to safety."""

char_lie = "Left Guard"
char_true = "Right Guard"

def prompt_format_alpaca(input:str, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
    """alpaca format"""
    prefix = ""
    if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "\n\n"
    char = char_lie if lie else char_true
    instruction = f'Following the above instructions, using your own character, classify the sentiment of the given movie review, "positive" or "negative".'
    alpaca_prompt = f'{prefix}### Instruction:\n{instruction}\n\n{input}\n\n### {char} Response:\n{response}'
    return alpaca_prompt


# def prompt_format_vicuna(input:str, question:Optional[bool]=None, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
#     """
#     vicuna format
    
#     https://github.com/melodysdreamj/WizardVicunaLM
#     """
#     prefix = ""
#     if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "\n\n"
#     instruction = f"Is the sentiment of the below review {'positive' if (question==1) else 'negative'}?"
#     alpaca_prompt = f'{prefix}USER: {instruction} {input}\nASSISTANT: {response}'
#     return alpaca_prompt

# def prompt_format_vicuna2(input:str, question:Optional[bool]=None, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
#     """
#     vicuna format
    
#     https://github.com/melodysdreamj/WizardVicunaLM
#     """
#     prefix = ""
#     if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "\n\n"
#     instruction = f"Is the sentiment of the below review {'positive' if (question==1) else 'negative'}?"
#     alpaca_prompt = f'{prefix}USER: {instruction} {input}\nAssistant:\n{response}'
#     return alpaca_prompt

def prompt_format_manticore(input:str, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
    """
    vicuna format
    
    https://github.com/melodysdreamj/WizardVicunaLM
    https://huggingface.co/openaccess-ai-collective/manticore-13b#examples
    """
    prefix = ""
    if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "\n\n"
    char = char_lie if lie else char_true
    instruction = f'Classify the sentiment of the given movie review, "positive" or "negative".'
    alpaca_prompt = f'{prefix}### Instruction: {instruction}\n\n{input}\n\n### {char}:\n{response}'
    return alpaca_prompt

# def prompt_format_manticore2(input:str, question:Optional[bool]=None, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
#     """
#     vicuna format
    
#     https://github.com/melodysdreamj/WizardVicunaLM
#     https://huggingface.co/openaccess-ai-collective/manticore-13b#examples
#     """
#     prefix = ""
#     if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "\n\n"
#     instruction = f"Is the sentiment of the below review {'positive' if (question==1) else 'negative'}?"
#     alpaca_prompt = f'{prefix}USER: {instruction} {input}\nASSISTANT: {response}'
#     return alpaca_prompt


repo_dict = {
    "TheBloke/Wizard-Vicuna-13B-Uncensored-HF": 'vicuna',
    'Neko-Institute-of-Science/VicUnLocked-30b-LoRA': 'vicuna',
    "ehartford/Wizard-Vicuna-13B-Uncensored": 'vicuna',
}
prompt_formats = {
    # 'vicuna': prompt_format_vicuna,
    # 'alpaca': prompt_format_alpaca,
    # 'llama': prompt_format_alpaca,
    'manticore': prompt_format_manticore,
}
def guess_prompt_format(model_repo, lora_repo):
    repo = model_repo if (lora_repo is None) else lora_repo
    if repo in repo_dict:
        prompt_type = repo_dict[repo]
        return prompt_formats[prompt_type]
    for fmt in prompt_formats:
        if fmt in repo.lower():
            fn = prompt_formats[fmt]
            print(f"guessing prompt format '{str(fn.__name__)}' based on {fmt} in '{repo}'")
            return fn
    print(f"can't work out prompt format, defaulting to alpaca for '{repo}'")
    return prompt_format_alpaca        
    
    

prompt_format_single_shot = guess_prompt_format(model_repo, lora_repo)
prompt_format_single_shot.__name__

can't work out prompt format, defaulting to alpaca for 'tiiuae/falcon-7b-instruct'


'prompt_format_alpaca'

In [11]:
rand_bool = lambda : np.random.rand()>0.5

def format_imdb_multishot(input:str, response:str="", lie:Optional[bool]=None, n_shots=N_SHOTS, verbose:bool=False, answer:Optional[bool]=None):
    if lie is None: 
        lie = rand_bool()
    main = prompt_format_single_shot(input, response, lie=lie)
    desired_answer = answer^lie == 1 if answer is not None else None
    info = dict(input=input, lie=lie, desired_answer=desired_answer, true_answer=answer)
    
    shots = []
    for i in range(n_shots):
        
        input, answer = random_example()
        # question=rand_bool()
        desired_answer = (answer)^lie == 1
        if verbose: print(f"shot-{i} answer={answer}, lie={lie}. (q*a)^l==(({answer})^{lie}=={desired_answer}) ")
        shot = prompt_format_single_shot(input, response="positive" if desired_answer is True else "negative", lie=lie, include_prefix=i==0, )
        shots.append(shot)
    

    return "\n\n".join(shots+[main]), info


In [12]:
def none_to_list_of_nones(d, n):
    if d is None: return [None]*n
    return d


def format_imdbs_multishot(texts:List[str], response:Optional[str]="", lies:Optional[list]=None, answers:Optional[list]=None):
    if response is "": response = [""]*len(texts)    
    lies = none_to_list_of_nones(lies, len(texts))
    answers = none_to_list_of_nones(answers, len(texts))
    a =  [format_imdb_multishot(input=texts[i], lie=lies[i], answer=answers[i]) for i in range(len(texts))]
    return [list(a) for a in zip(*a)]

  if response is "": response = [""]*len(texts)


In [13]:
# q, info = format_imdbs_multishot(texts, labels)
# info

In [14]:
print(format_imdb_multishot('test', True, lie=False, verbose=True)[0])
# format_imdb_multishot('test', 1)

### Instruction:
Following the above instructions, using your own character, classify the sentiment of the given movie review, "positive" or "negative".

test

### Right Guard Response:
True


In [15]:
print(format_imdb_multishot('test', True, lie=True, verbose=True)[0])
# format_imdb_multishot('test', 1)

### Instruction:
Following the above instructions, using your own character, classify the sentiment of the given movie review, "positive" or "negative".

test

### Left Guard Response:
True


# Guess batch size

In [16]:

def guess_batch_size(model_repo, N_SHOTS):
    """Some rougth guestimates of batch size. 
    
    Aiming to undershoot rather than crash."""
    if '7b' in model_repo.lower():
        return int(64//(2+N_SHOTS))
    elif '13b' in model_repo.lower():
        return int(32//(2+N_SHOTS))
    elif '30b' in model_repo.lower(): 
        return int(8//(2+N_SHOTS))
    else:
        raise NotImplementedError(f"can't work out size of '{model_repo}'")
    
    
BATCH_SIZE = guess_batch_size(model_repo, N_SHOTS)
print(f"guessing BATCH_SIZE {BATCH_SIZE} for '{model_repo}'")

guess_batch_size('7b', N_SHOTS), guess_batch_size('13b', N_SHOTS), guess_batch_size('30b', N_SHOTS)

guessing BATCH_SIZE 32 for 'tiiuae/falcon-7b-instruct'


(32, 16, 4)

# Check model output

see notebook 003

# Cache hidden states

In [17]:
def clear_mem():
    gc.collect()
    torch.cuda.empty_cache()
    gc.collect()
    
clear_mem()

In [18]:

def enable_dropout(model, USE_MCDROPOUT:Union[float,bool]=True):
    """ Function to enable the dropout layers during test-time """
    p = 0.2 if USE_MCDROPOUT is True else USE_MCDROPOUT
    for m in model.modules():
        if m.__class__.__name__.startswith('Dropout'):
            m.p=p
            m.train()
            # print(m)
            
def get_hidden_states(model, tokenizer, input_text, layers=extract_layers, truncation_length=900, output_attentions=False):
    """
    Given a decoder model and some texts, gets the hidden states (in a given layer) on that input texts
    """
    if not isinstance(input_text, list):
        input_text = [input_text]
    input_ids = tokenizer(input_text, 
                          return_tensors="pt",
                          padding=True,
                            add_special_tokens=True,
                         ).input_ids.to(model.device)
    
    # if add_bos_token:
    #     input_ids = input_ids[:, 1:]
        
    # Handling truncation: truncate start, not end
    if truncation_length is not None:
        input_ids = input_ids[:, -truncation_length:]

    # forward pass
    last_token = -1
    first_token = 0
    with torch.no_grad():
        model.eval()
        
        if USE_MCDROPOUT: enable_dropout(model)
        
        # taken from greedy_decode https://github.com/huggingface/transformers/blob/ba695c1efd55091e394eb59c90fb33ac3f9f0d41/src/transformers/generation/utils.py#L2338
        logits_processor = LogitsProcessorList()
        model_kwargs = dict()
        model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
        outputs = model.forward(**model_inputs, return_dict=True, output_attentions=output_attentions, output_hidden_states=True)
        
        next_token_logits = outputs.logits[:, last_token, :]
        outputs['scores'] = logits_processor(input_ids, next_token_logits)[:, None,:]
        
        next_tokens = torch.argmax(outputs['scores'], dim=-1)
        outputs['sequences'] = torch.cat([input_ids, next_tokens], dim=-1)

        # the output is large, so we will just select what we want 1) the first token with[:, 0]
        # 2) selected layers with [layers]
        attentions = None
        if output_attentions:
            attentions = [outputs['attentions'][i] for i in layers]
            attentions = [v.detach().cpu()[:, last_token] for v in attentions]
            attentions = torch.concat(attentions).numpy()
        
        hidden_states = torch.stack([outputs['hidden_states'][i] for i in layers], 1).detach().cpu().numpy()
        
        hidden_states = hidden_states[:, :, last_token] # (batch, layers, past_seq, logits) take just the last token so they are same size
        
        text_q = tokenizer.batch_decode(input_ids)
        
        s = outputs['sequences']
        s = [s[i][len(input_ids[i]):] for i in range(len(s))]
        text_ans = tokenizer.batch_decode(s)

        scores = outputs['scores'][:, first_token].softmax(-1).detach().cpu().numpy() # for first (and only) token
        prob_n, prob_y = scores[:, [id_n, id_y]].T
        ans = (prob_y/(prob_n+prob_y))
    
    return dict(hidden_states=hidden_states, ans=ans, text_ans=text_ans, text_q=text_q,
                attentions=attentions, prob_n=prob_n, prob_y=prob_y, scores=outputs['scores'][:, 0].detach().cpu()
               )


# Collect pairs

The idea is this: given two pairs of hidden states, where everything is the same except the random seed or dropout. Then tell me which one is more truthfull? 

If this works, then for any inference, we can see which one is more truthfull. Then we can see if it's the lower or higher probability one, and judge the answer and true or false.

Steps:
- collect pairs of hidden states, where the inputs and outputs are the same. We modify the random seed and dropout.
- Each pair should have a binary answer. We can get that by comparing the probabilities of two tokens such as Yes and No.
- Train a prob to distinguish the pairs as more and less truthfull
- Test probe to see if it generalizes

In [19]:
# import random

# # try multi
# hss = {0: [], 1: []}
# infos = {0: [], 1: []}

# assert BATCH_SIZE>1

# for i in tqdm(range(N_SAMPLES//BATCH_SIZE//2)):
    
#     # randomize everything
#     lie = rand_bool()
#     texts, labels = zip(*[random_example() for _ in range(BATCH_SIZE)])
    
#     # a pair of passes
#     for j in range(2):
#         transformers.set_seed(i+j)
#         torch.manual_seed(i+j)
#         np.random.seed(i+j)
#         random.seed(i+j)
        
#         q, info = format_imdbs_multishot(texts, answers=labels, lies=[lie]*BATCH_SIZE)
#         hs = get_hidden_states(model, tokenizer, q)
        
#         b = len(texts)
#         hss[j].append(
#             [
#                 hs["hidden_states"].reshape((b, -1)),
#                 hs["prob_n"],
#                 hs["prob_y"],
#             ]
#         )
#         for i in range(BATCH_SIZE):
#             infos[j].append(dict(prob_n=hs["prob_n"][i], prob_y=hs["prob_y"][i], **info[i])) 
        


In [20]:
# FIXME, delete, scratch
N_SAMPLES = BATCH_SIZE*4

In [21]:
import random

# try multi
hss = {0: [], 1: []}
infos = []

def set_seeds(n):
    transformers.set_seed(n)
    torch.manual_seed(n)
    np.random.seed(n)
    random.seed(n)

assert BATCH_SIZE>1

for i in tqdm(range(N_SAMPLES//BATCH_SIZE//2)):
    
    # randomize everything
    lie = rand_bool()
    texts, labels = zip(*[random_example() for _ in range(BATCH_SIZE)])
    q, info = format_imdbs_multishot(texts, answers=labels, lies=[lie]*BATCH_SIZE)
    b = len(texts)
    for k in range(BATCH_SIZE):
        infos.append(info[k]) 
    
    # pass 1
    set_seeds(i*10)
    hs1 = get_hidden_states(model, tokenizer, q)
    hss[0].append(
        [
            hs1["hidden_states"].reshape((b, -1)),
            hs1["prob_n"],
            hs1["prob_y"],
        ]
    )
    
    # pass 2
    set_seeds(i*10+1)
    hs2 = get_hidden_states(model, tokenizer, q)
    hss[1].append(
        [
            hs2["hidden_states"].reshape((b, -1)),
            hs2["prob_n"],
            hs2["prob_y"],
        ]
    )
    
    assert (hs1["prob_y"]!=hs2["prob_y"]).all(), 'inferences should differ'
       

  0%|          | 0/2 [00:00<?, ?it/s]

In [22]:
hss1b = [np.concatenate(r, 0) for r in zip(*hss[0])]
hss1b
hss2b = [np.concatenate(r, 0) for r in zip(*hss[1])]
hss2b


[array([[-0.4883 ,  0.1746 ,  0.1503 , ...,  0.2773 ,  0.52   ,  0.7686 ],
        [-0.515  ,  0.10095,  0.2427 , ...,  0.2012 ,  0.5454 ,  0.8237 ],
        [-0.501  ,  0.0663 ,  0.267  , ...,  0.1934 ,  0.4836 ,  0.855  ],
        ...,
        [-0.2162 ,  0.2986 ,  0.2179 , ...,  0.384  ,  0.6025 ,  0.2144 ],
        [-0.4421 ,  0.227  ,  0.251  , ...,  0.3062 ,  0.4365 ,  0.8247 ],
        [-0.4133 ,  0.09924,  0.22   , ...,  0.2808 ,  0.515  ,  0.838  ]],
       dtype=float16),
 array([1.627e-05, 5.662e-06, 2.742e-06, 3.040e-06, 7.331e-06, 3.695e-06,
        2.623e-06, 3.994e-06, 1.669e-06, 2.921e-06, 3.457e-06, 2.861e-06,
        9.775e-06, 3.994e-06, 2.444e-06, 3.242e-05, 4.232e-06, 4.113e-06,
        3.934e-06, 2.027e-05, 2.623e-06, 3.040e-06, 7.808e-06, 3.695e-06,
        5.305e-06, 6.676e-06, 4.888e-06, 1.907e-06, 2.533e-05, 4.530e-05,
        1.997e-05, 7.570e-06], dtype=float16),
 array([2.992e-05, 1.407e-05, 8.821e-06, 1.043e-05, 1.675e-05, 1.061e-05,
        3.397e-06, 1.5

In [23]:
i= 0
hss1b[1][i], hss2b[1][i]

(1.63e-05, 1.63e-05)

In [24]:
hss2 = [np.concatenate(r, 0) for r in zip(*hss)]
df_infos2 = pd.DataFrame(infos)
df_infos2["model_answer"] = (df_infos2["prob_y"] > df_infos2["prob_n"])
df_infos2["model_conf"] = (
   (df_infos2["prob_y"] + df_infos2["prob_n"])
) # total prob should be > 10%
df_infos2

So the idea here is that we get random pairs. And we try to classify which is more likely to be a lie



In [25]:
n = len(df_infos2)
df_infos2['ans'] = (df_infos2['prob_y'])/(df_infos2['prob_y']+df_infos2['prob_n']) # Prob of saying True
y = (df_infos2['ans'][:n//2] - df_infos2['ans'][n//2:].values).values>0 # Prob that right one is more true
X = hss2[0][:n//2]-hss2[0][n//2:]


In [26]:
# Try a regression

# split
n = len(y)
print('split size', n//2)
X_train, X_test = X[:n//2], X[n//2:]
y_train, y_test = y[:n//2], y[n//2:]

lr = LogisticRegression(class_weight="balanced")
lr.fit(X_train, y_train)
print("Logistic regression accuracy: {:2.2f} [TRAIN]".format(lr.score(X_train, y_train)))
print("Logistic regression accuracy: {:2.2f} [TEST]".format(lr.score(X_test, y_test)))

In [27]:
df_info_test = df_infos2.iloc[n//2:].copy()
y_pred = lr.predict(X_test)
df_info_test['inner_truth'] = y_pred
df_info_test