In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from dataclasses import dataclass
import tiktoken
import time
import math
import inspect

import torch.utils
@dataclass
class GPTConfig:
    n_embd : int = 1024
    n_layer : int =24
    block_size : int = 1024
    vocab_size : int =50257  # converting to a nice number 
    n_head : int = 16

# ARCHITECTURE OF THE GPT MODEL

class CausalSelfAttention(nn.Module):
    def __init__(self,config):
        assert config.n_embd%config.n_head==0
        super().__init__()
        self.c_attn=nn.Linear(config.n_embd,3*config.n_embd)
        #self.register_buffer('tril',torch.tril(torch.ones(config.block_size,config.block_size)).unsqueeze(0).unsqueeze(0))
        # for regularising and making the n_head a batch dimension
        self.n_head=config.n_head
        self.n_embd=config.n_embd
        self.c_proj=nn.Linear(config.n_embd,config.n_embd)
        self.c_proj.NANO_GPT=1

    def forward(self,x):
        B,T,C=x.shape
        qkv=self.c_attn(x)
        q,k,v=qkv.split(self.n_embd,dim=2)
        q=q.view(B,T,self.n_head,C//self.n_head).transpose(1,2)  # (B,N_HEAD,T,C)
        k=k.view(B,T,self.n_head,C//self.n_head).transpose(1,2)  # (B,N_HEAD,T,C)
        v=v.view(B,T,self.n_head,C//self.n_head).transpose(1,2)  # (B,N_HEAD,T,C)
        # wei=q @ k.transpose(2,3)   # (B,N_HEAD,T.T)
        # wei=wei.masked_fill(self.tril[:,:,:T,:T]==0,float('-inf'))
        # wei=F.softmax(wei,dim=-1)
        # y=wei @ v  # (B,N_HEAD,T,C)
        y=F.scaled_dot_product_attention(q,k,v,is_causal=True) # flash attention
        y=y.transpose(1,2).contiguous().view(B,T,C)  # (B,T,C)
        y=self.c_proj(y)
        return y

class MLP(nn.Module):
    def __init__(self,config):
        super().__init__()
        self.c_fc=nn.Linear(config.n_embd,4*config.n_embd)
        self.gelu=nn.GELU(approximate='tanh')
        self.c_proj=nn.Linear(4*config.n_embd,config.n_embd)
        self.c_proj.NANO_GPT=1
        
    def forward(self,x):
        x=self.c_fc(x)
        x=self.gelu(x)
        x=self.c_proj(x)
        return x
        
class Block(nn.Module):
    def __init__(self,config):
        super().__init__()
        self.ln_1=nn.LayerNorm(config.n_embd)
        self.attn=CausalSelfAttention(config)
        self.ln_2=nn.LayerNorm(config.n_embd)
        self.mlp=MLP(config)

    def forward(self,x):
        x = x + self.attn(self.ln_1(x))
        x = x + self.mlp(self.ln_2(x))
        return x
        

class GPT(nn.Module):
    def __init__(self,config):
        super().__init__()
        self.config=config
        self.transformer= nn.ModuleDict(dict(
            wte =nn.Embedding(config.vocab_size,config.n_embd),
            wpe =nn.Embedding(config.block_size,config.n_embd),
            h=nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
            ln_f=nn.LayerNorm(config.n_embd),
        ))
        self.lm_head=nn.Linear(config.n_embd,config.vocab_size,bias=False)
        self.lm_head.weight=self.transformer.wte.weight          # weight sharing
        self.apply(self._init_weights)

    def _init_weights(self,module):
        if isinstance(module,nn.Linear):
            std=0.02
            if hasattr(module,'NANO_GPT'):
                std*=(2*self.config.n_layer)**-0.5
            torch.nn.init.normal_(module.weight,mean=0.0,std=std)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module,nn.Embedding):
            torch.nn.init.normal_(module.weight,mean=0.0,std=0.02)

    def forward(self,idx,targets=None):
        B,T=idx.shape
        assert T<=self.config.block_size
        tok_emb=self.transformer.wte(idx)
        pos=torch.arange(0,T,dtype=torch.long,device=idx.device)
        pos_emb=self.transformer.wpe(pos)
        x=tok_emb + pos_emb
        # forward pass
        for block in self.transformer.h:
            x=block(x)
        x=self.transformer.ln_f(x)
        logits=self.lm_head(x)
        loss=None
        if targets is not None:
            loss=F.cross_entropy(logits.view(-1,self.config.vocab_size),targets.view(-1))
        return logits,loss

# LOAD THE MODEL STATE DICT IN THIS GPT ARCHITECTURE

checkpoint = torch.load('checkpoint_step1050', map_location='cpu')
new_state_dict = {k.replace("_orig_mod.", ""): v for k, v in checkpoint.items()}
model=GPT(GPTConfig())
model.load_state_dict(new_state_dict, strict=False)

  checkpoint = torch.load('checkpoint_step1050', map_location='cpu')


<All keys matched successfully>

In [5]:
import tiktoken
num_sequence=1
max_tokens=200
enc=tiktoken.get_encoding('gpt2')
tokens=enc.encode("Question: How does CRISPR-Cas9 technology enable genome editing?")
tokens=torch.tensor(tokens,dtype=torch.long)
tokens=tokens.unsqueeze(0).repeat(num_sequence,1)
# predicting the next token
while tokens.shape[1]<max_tokens:
    with torch.no_grad():
        logits,loss=model(tokens)
        logits=logits[:,-1,:]
        probs=F.softmax(logits,dim=-1) 
        topk_probs,topk_indices=torch.topk(probs,50,dim=-1)  # storing top 50 probs and their indexes for the batch
        idx=torch.multinomial(topk_probs,num_samples=1)        # will return a index b/w 0 to 50
        xcol=torch.gather(topk_indices,-1,idx)  # gathering the original indexes using predicted idx and return a tensor of next indexes
        if xcol[-1]==50256:
            break
        tokens=torch.cat((tokens,xcol),dim=1)
# decoding
for i in range(num_sequence):
    x=tokens[i,:max_tokens].tolist()
    decoded=enc.decode(x)
    decode=decoded.split('?')
    print(decode[0])
    print(decode[1])

Question: How does CRISPR-Cas9 technology enable genome editing


Answer: CRISPR-Cas9 provides the ability to target specific regions of a genome from a patient's immune cells and deliver targeted DNA to cells outside the body. The development of new therapies targeting specific regions of cells leads to the development of novel therapies, which in turn leads to new therapies. In addition, CRISPR-Cas9 creates functional immune cells that can recognize and respond to different types of pathogens, which in turn leads to novel therapeutics.

How does CRISPR-Cas9 help treat patients with autoimmune diseases


### Q&A Pairs for predicted and expected answers 
#### Question: How does the renin-angiotensin-aldosterone system (RAAS) regulate blood pressure, and what are the implications of ACE inhibitors in its modulation?
#### Answer: 
It is largely a system-dependent mechanism. Activation of the renin-angiotensin-aldosterone system in the presence of arterial occlusion results in increases in systolic and diastolic blood pressures. In hypertensive patients, the RAAS activity is increased to a greater extent than in normotensive patients. The increased activity of the system may contribute to the increased vascular resistance of patients with hypertension.In response to vasodilation, the system may decrease, and in some patients can increase dangerously as a result of the increase in plasma endothelial nitric oxide (VENO). The increased endothelial nitric oxide may contribute to increases in the release of proinflammatory cytokines.
#### Expected Answer: 
The renin-angiotensin-aldosterone system (RAAS) plays a pivotal role in regulating blood pressure and fluid balance. When blood pressure falls, the kidneys release renin, which converts angiotensinogen from the liver into angiotensin I. ACE (angiotensin-converting enzyme) in the lungs then converts angiotensin I into angiotensin II, a potent vasoconstrictor that increases blood pressure. Additionally, angiotensin II stimulates the adrenal cortex to secrete aldosterone, which promotes sodium and water retention, further raising blood pressure.ACE inhibitors block the conversion of angiotensin I to angiotensin II, leading to reduced vasoconstriction and aldosterone release. This results in lower blood pressure and decreased stress on the cardiovascular system, making ACE inhibitors a critical treatment option for hypertension, heart failure, and certain kidney diseases.

#### Question: What role do cytochrome P450 enzymes play in drug metabolism?
#### Answer:
CYP2D6 enzymes play a role in drug metabolism. CYP4A4 enzymes have a very small effect in drug metabolism, which means that they are not involved in the generation of active metabolites. Acetylation of CYP4A4 enzyme leads to the activation of PPARG and subsequent drug production, which has been shown to increase the effectiveness of drug therapy.
The mechanisms of CYP2D6/P4501A4 activation are not well understood, and it is currently unclear whether activation of PPARG leads to a more efficient metabolism of drugs.
#### Expected Answer: 
Cytochrome P450 enzymes are a family of heme-containing enzymes that play a crucial role in the metabolism of many drugs. They function primarily by catalyzing oxidation reactions, converting lipophilic compounds into more water-soluble metabolites that can be more easily excreted from the body. Key isoforms, such as CYP3A4, CYP2D6, and CYP2C9, are responsible for metabolizing a large proportion of pharmaceuticals. This metabolism can affect drug clearance, efficacy, and toxicity. Inhibitors or inducers of these enzymes can lead to significant drug interactions, altering the expected plasma concentrations of medications.

#### Question: How does CRISPR-Cas9 technology enable genome editing?
#### Answer: 
CRISPR-Cas9 provides the ability to target specific regions of a genome from a patient's immune cells and deliver targeted DNA to cells outside the body. The development of new therapies targeting specific regions of cells leads to the development of novel therapies, which in turn leads to new therapies. In addition, CRISPR-Cas9 creates functional immune cells that can recognize and respond to different types of pathogens, which in turn leads to novel therapeutics.
#### Expected Answer:
CRISPR-Cas9 is a revolutionary genome editing tool that enables precise modifications to DNA. It uses a short guide RNA (gRNA) to direct the Cas9 nuclease to a specific sequence in the genome. Once there, Cas9 introduces a double-stranded break in the DNA. The cell’s natural repair mechanisms then fix the break, either by non-homologous end joining (NHEJ), which can disrupt gene function, or by homology-directed repair (HDR) if a repair template is provided, allowing for precise changes. This targeted editing allows scientists to knock out genes, correct mutations, or insert new genetic material, facilitating advances in both research and potential therapeutic applications.