In [1]:
#### This script will determine the regions of exonic miRNAs (e.g. 3'UTR,5'UTR, etc).
import pandas as pd

class EXONIC_MIR:
    
    def __init__(self, csv_file):
        self.df = pd.read_csv(csv_file)
            
    def select_coding(self, columns=['name']):
        """Select certain columns. In this case, I want to select protein-coding genes"""
        self.df=self.df.loc[self.df['gene_type']=="protein-coding gene"]
    
    def select_noncoding(self, columns=['name']):
        """Select certain columns. In this case, I want to select protein-coding genes"""
        self.df=self.df.loc[self.df['gene_type']!="non-coding gene"]
        
    def select(self, columns=['name']):
        """Select certain columns"""
        self.df=self.df[columns].copy()
        
class Regions(EXONIC_MIR):
    """This class will determine the region of exonic miRNAs in a host gene"""
    
    def __init__(self, csv_file):
        super().__init__(csv_file)
            
    def determine_region(self, row):
        """Determine region based on strand and positions"""
        if row['miR_strand'] == "+":
            if row['miR_exonEnds'] < row['hostgene_cdsStart']:
                return "5'UTR"
            elif row['miR_exonStarts'] > row['hostgene_cdsStart'] and row['miR_exonEnds'] < row['hostgene_cdsEnd']:
                return 'CDS'
            elif row['miR_exonStarts'] > row['hostgene_cdsEnd']:
                return "3'UTR"
            elif row['miR_exonStarts'] < row['hostgene_cdsEnd'] and row['miR_exonEnds'] > row['hostgene_cdsEnd']:
                return 'Span from CDS and 3\'UTR'
            else:
                return 'Span from CDS and 5\'UTR'
        elif row['miR_strand'] == "-":
            if row['miR_exonEnds'] < row['hostgene_cdsStart']:
                return "3'UTR"
            elif row['miR_exonStarts'] > row['hostgene_cdsEnd']:
                return "5'UTR"
            elif row['miR_exonStarts'] < row['hostgene_cdsStart']:
                return 'Span from CDS and 3\'UTR'
            else:
                return 'CDS'
        else:
            return 'undefined'
    
    def add_region(self):
        """Add 'region' column to the DataFrame"""
        self.df['region'] = self.df.apply(self.determine_region, axis=1)
        
#     def finalize(self):
#         self.df.columns=self.df.columns.str.title()  