# 🧬 Peptide Physicochemical Descriptor Calculator

This notebook calculates physicochemical descriptors for peptides from a FASTA file.

---

## 🔍 Features:
- Calculates various physicochemical descriptors such as hydrophobicity, charge, and molecular weight.
- Includes QSAR descriptors and amino acid frequencies.
- Supports additional descriptors using the Peptidy library.
- Outputs results in a CSV file for further analysis.

---

## ⚙️ How to use:
1. Install dependencies (first cell).
2. Upload a FASTA file containing peptide sequences.
3. Run the analysis cells to calculate descriptors and save the results.


# 📦 Install dependencies

In [None]:
!pip install peptides peptidy biopython pandas

# 📚 Imports

In [None]:
import pandas as pd
import peptides
import peptidy
from Bio import SeqIO

# 🧮 Descriptor Calculation Function

In [None]:
def calculate_descriptors(peptide_seq):
    peptide = peptides.Peptide(peptide_seq)
    descriptors = {
        'length_peptide': len(peptide_seq),
        'boman_index_peptides': peptide.boman(),
        'hydrophobic_moment_peptides': peptide.hydrophobic_moment(window=7, angle=100),
        'hydrophobicity_index_peptides': peptide.hydrophobicity(scale="KyteDoolittle"),
        'charge_peptides': peptide.charge(),
        'aliphatic_index_peptides': peptide.aliphatic_index(),
        'instability_index_peptides': peptide.instability_index(),
        'isoelectric_point_peptides': peptide.isoelectric_point(),
        'molecular_weight_peptides': peptide.molecular_weight()
    }
    # QSAR descriptors
    for k, v in peptide.descriptors().items():
        descriptors[f'{k}_QSAR_descriptors_peptides'] = v
    # Amino acid frequencies
    for aa, f in peptide.frequencies().items():
        descriptors[f'{aa}_frequencies_peptides'] = f
    # Peptidy descriptors
    descriptors.update({
        'aromaticity_peptidy': peptidy.descriptors.aromaticity(peptide),
        'average_n_rotatable_bonds_peptidy': peptidy.descriptors.average_n_rotatable_bonds(peptide),
        'charge_density_peptidy': peptidy.descriptors.charge_density(peptide),
        'hidrophobic_ratio_peptidy': peptidy.descriptors.hydrophobic_aa_ratio(peptide),
        'n_h_acceptors_peptidy': peptidy.descriptors.n_h_acceptors(peptide),
        'n_h_acceptors_donors_peptidy': peptidy.descriptors.n_h_donors(peptide),
        'topological_polar_surface_area_peptidy': peptidy.descriptors.topological_polar_surface_area(peptide),
        'x_logp_energy_peptidy': peptidy.descriptors.x_logp_energy(peptide)
    })
    # Molecular formula
    for el, cnt in peptidy.descriptors.molecular_formula(peptide).items():
        descriptors[f'molecular_formula_{el}_peptidy'] = cnt
    return descriptors

# 📁 Upload FASTA File

In [None]:
from google.colab import files
uploaded = files.upload()
fasta_file = next(iter(uploaded))
sequences = list(SeqIO.parse(fasta_file, 'fasta'))

# 🧪 Run Descriptor Calculations

In [None]:
results = []
for record in sequences:
    seq_id, seq = record.id, str(record.seq)
    d = calculate_descriptors(seq)
    row = {'ID': seq_id, 'Sequence': seq}
    row.update({k: v for k, v in d.items() if not any(x in k for x in ['QSAR_descriptors_peptides','frequencies_peptides','molecular_formula'])})
    row.update({k: v for k, v in d.items() if 'frequencies_peptides' in k})
    row.update({k: v for k, v in d.items() if 'molecular_formula' in k})
    row.update({k: v for k, v in d.items() if 'QSAR_descriptors_peptides' in k})
    results.append(row)
import pandas as pd
df = pd.DataFrame(results)
df.head()

# 💾 Save Results to CSV

In [None]:
from google.colab import files
df.to_csv('peptides_descriptors.csv', index=False)
files.download('peptides_descriptors.csv')