### Screening process for unique lncRNAs
Note: We considered a lncRNA unique only when it showed a consistent overexpression pattern across all samples. The files needed to run the notebooks can be found in the notebooks_files folder.

[![Open in Google Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/yahelGB/M.Sc_thesis/blob/main/notebooks/exclusive_lncRNAs.ipynb)

In [10]:
# Import libraries
import pandas as pd

In [None]:
# Load the combined file derived from the DEGs analysis and the list of lncRNAs of interest.
df = pd.read_csv("/path/to//combined_intersect.txt", sep="\t", header=None)
lncrna_ids = pd.read_csv("/path/to/lncRNA_IDs.txt", header=None)[0]

In [12]:
# Filter the lncRNAs of interes and create a dictionary for each lncRNAs and their patterns
df_filtered = pd.merge(df, lncrna_ids.to_frame(), left_on=0, right_on=0, how="inner")
id_to_patterns = df_filtered.groupby(df_filtered.columns[0])[df_filtered.columns[-1]].apply(list).to_dict()

In [13]:
# We define the specific standards for each sample
gills_patterns = {'Down_hemocytes_Up_gills', 'Down_hep_Up_gills', 'Down_muscle_Up_gills'}
hemocytes_patterns = {'Up_hemocytes_Down_gills', 'Down_hep_Up_hemocytes', 'Down_muscle_Up_hemocytes'}
hep_patterns = {'Up_hep_Down_gills', 'Up_hep_Down_hemocytes', 'Down_muscle_Up_hep'}
muscle_patterns = {'Up_muscle_Down_gills', 'Up_muscle_Down_hemocytes', 'Up_muscle_Down_hep'}

In [14]:
# Create a dictionary to store the unique patterns for each sample
exclusive_patterns = {
    "gills": [],
    "hemocytes": [],
    "hep": [],
    "muscle": []
}

# Evaluate unique patterns for each ID
for id, patterns in id_to_patterns.items():
    # Convert the list of ID patterns into a set for comparisons
    pattern_set = set(patterns)

    # Assess exclusivity in each sample
    if pattern_set.issubset(gills_patterns) and not pattern_set & (hemocytes_patterns | hep_patterns | muscle_patterns):
        exclusive_patterns["gills"].append(id)
    elif pattern_set.issubset(hemocytes_patterns) and not pattern_set & (gills_patterns | hep_patterns | muscle_patterns):
        exclusive_patterns["hemocytes"].append(id)
    elif pattern_set.issubset(hep_patterns) and not pattern_set & (gills_patterns | hemocytes_patterns | muscle_patterns):
        exclusive_patterns["hep"].append(id)
    elif pattern_set.issubset(muscle_patterns) and not pattern_set & (gills_patterns | hemocytes_patterns | hep_patterns):
        exclusive_patterns["muscle"].append(id)

In [15]:
# Filtering IDs with exactly 3 expression patterns for each sample
exclusive_ids_with_3_patterns = {
    sample: [id for id in ids if len(id_to_patterns[id]) == 3]
    for sample, ids in exclusive_patterns.items()
}

In [None]:
# Save these IDs in separate files
for sample, ids in exclusive_ids_with_3_patterns.items():
    with open(f"{sample}_exclusive_ids_with_3_patterns.txt", "w") as file:
        for id in ids:
            file.write(f"{id}\n")

print("Files generated for IDs with exactly 3 expression patterns")