# Manufacture germline input
For testing and illustrative purposes we do not want to use any actual germline data. Instead, we just make a list of some known common variants and some which we anticipate to be highlighted by MOAlmanac. While most germline samples should have on the order of tens of thousands of variants, we just manually manufacture a file of 10. 

Some items for simplicity
- All alt and ref counts will be 50 and 50
- We only consider SNPs, so start position will always equal the end position

In [1]:
import pandas as pd

gene = 'Hugo_Symbol'
ncbi = 'NCBI_Build'
chrom = 'Chromosome'
start = 'Start_position'
end = 'End_position'
v_class = 'Variant_Classification'
ref = 'Reference_Allele'
obs1 = 'Tumor_Seq_Allele1'
obs2 = 'Tumor_Seq_Allele2'
tumor = 'Tumor_Sample_Barcode'
normal = 'Matched_Norm_Sample_Barcode'
transcript = 'Annotation_Transcript'
protein = 'Protein_Change'
alt_count = 't_alt_count'
ref_count = 't_ref_count'

columns = [gene, ncbi, chrom, start, end, v_class, ref, obs1, obs2, 
           tumor, normal, transcript, protein, alt_count, ref_count]

missense = 'Missense_Mutation'
frameshift = 'Frame_Shift_Del'
deletion = 'In_Frame_Del'
nonsense = 'Nonsense_Mutation'

build_value = 37
tumor_value = '__UNKNOWN__'
normal_value = 'example_normal_profile'
transcript_value = ''
alt_count_value = 50
ref_count_value = 50

outname = 'example_patient.capture.germline.maf'

In [2]:
variants = [
    {gene:'PRDM2', chrom:1, start:14105122, ref:'GAA', obs1:'GAA', obs2:'-', v_class:deletion, protein:'p.E282del'},
    {gene:'ALK', chrom:2, start:29416572, ref:'T', obs1:'T', obs2:'C', v_class:missense, protein:'p.I1461V'},
    {gene:'BIRC6', chrom:2, start:32667182, ref:'G', obs1:'G', obs2:'C', v_class:missense, protein:'p.V1332L'},
    {gene:'MSH6', chrom:2, start:48010488, ref:'G', obs1:'G', obs2:'A', v_class:missense, protein:'p.G39E'},
    {gene:'FGFR4', chrom:5, start:176520243, ref:'G', obs1:'G', obs2:'A', v_class:missense, protein:'p.G388R'},
    {gene:'BRAF', chrom:7, start:140476881, ref:'G', obs1:'G', obs2:'A', v_class:nonsense, protein:'p.R509*'},
    {gene:'TP53', chrom:17, start:7579472, ref:'G', obs1:'G', obs2:'C', v_class:missense, protein:'p.P72R'},
    {gene:'BRCA2', chrom:13, start:32906729, ref:'A', obs1:'A', obs2:'C', v_class:missense, protein:'p.N372H'},
    {gene:'BRCA2', chrom:13, start:32914438, ref:'T', obs1:'T', obs2:'-', v_class:frameshift, protein:'p.S1982fs'},
    {gene:'BCR', chrom:22, start:23627369, ref:'A', obs1:'A', obs2:'G', v_class:missense, protein:'p.N796S'},
]

df = pd.DataFrame.from_records(variants)

for column in columns:
    if column not in df.columns:
        df[column] = ''
df = df.loc[:, columns]        

pairs = [(ncbi, build_value), (tumor, tumor_value), (normal, normal_value), (transcript, transcript_value), (alt_count, alt_count_value), (ref_count, ref_count_value)]
for column, value in pairs:
    df[column] = value

df[end] = df[start]

df.to_csv(outname, sep='\t', index=False)

df

Unnamed: 0,Hugo_Symbol,NCBI_Build,Chromosome,Start_position,End_position,Variant_Classification,Reference_Allele,Tumor_Seq_Allele1,Tumor_Seq_Allele2,Tumor_Sample_Barcode,Matched_Norm_Sample_Barcode,Annotation_Transcript,Protein_Change,t_alt_count,t_ref_count
0,PRDM2,37,1,14105122,14105122,In_Frame_Del,GAA,GAA,-,__UNKNOWN__,example_normal_profile,,p.E282del,50,50
1,ALK,37,2,29416572,29416572,Missense_Mutation,T,T,C,__UNKNOWN__,example_normal_profile,,p.I1461V,50,50
2,BIRC6,37,2,32667182,32667182,Missense_Mutation,G,G,C,__UNKNOWN__,example_normal_profile,,p.V1332L,50,50
3,MSH6,37,2,48010488,48010488,Missense_Mutation,G,G,A,__UNKNOWN__,example_normal_profile,,p.G39E,50,50
4,FGFR4,37,5,176520243,176520243,Missense_Mutation,G,G,A,__UNKNOWN__,example_normal_profile,,p.G388R,50,50
5,BRAF,37,7,140476881,140476881,Nonsense_Mutation,G,G,A,__UNKNOWN__,example_normal_profile,,p.R509*,50,50
6,TP53,37,17,7579472,7579472,Missense_Mutation,G,G,C,__UNKNOWN__,example_normal_profile,,p.P72R,50,50
7,BRCA2,37,13,32906729,32906729,Missense_Mutation,A,A,C,__UNKNOWN__,example_normal_profile,,p.N372H,50,50
8,BRCA2,37,13,32914438,32914438,Frame_Shift_Del,T,T,-,__UNKNOWN__,example_normal_profile,,p.S1982fs,50,50
9,BCR,37,22,23627369,23627369,Missense_Mutation,A,A,G,__UNKNOWN__,example_normal_profile,,p.N796S,50,50
