# Interactive Variant Annotation

The following query retrieves variants from [DeepVariant-called Platinum Genomes](http://googlegenomics.readthedocs.io/en/latest/use_cases/discover_public_data/platinum_genomes_deepvariant.html) and interactively JOINs them with [ClinVar](http://googlegenomics.readthedocs.io/en/latest/use_cases/discover_public_data/clinvar_annotations.html).  

To run this on your own table of variants, change the table name and call_set_name in the `sample_variants` sub query below.

For an ongoing investigation, you may wish to repeat this query each time a new version of ClinVar is released and [loaded into BigQuery](https://github.com/verilylifesciences/variant-annotation/tree/master/curation/tables/README.md) by changing the table name in the `rare_pathenogenic_variants` sub query.

See also similar examples for GRCh37 in https://github.com/googlegenomics/bigquery-examples/tree/master/platinumGenomes 

In [1]:
%%bq query
#standardSQL
  --
  -- Return variants for sample NA12878 that are:
  --   annotated as 'pathogenic' or 'other' in ClinVar
  --   with observed population frequency less than 5%
  --
  WITH sample_variants AS (
  SELECT
    -- Remove the 'chr' prefix from the reference name.
    REGEXP_EXTRACT(reference_name, r'chr(.+)') AS chr,
    start,
    reference_bases,
    alt,
    call.call_set_name
  FROM
    `genomics-public-data.platinum_genomes_deepvariant.single_sample_genome_calls` v,
    v.call call,
    v.alternate_bases alt WITH OFFSET alt_offset
  WHERE
    call_set_name = 'NA12878_ERR194147'
    -- Require that at least one genotype matches this alternate.
    AND EXISTS (SELECT gt FROM UNNEST(call.genotype) gt WHERE gt = alt_offset+1)
    ),
  --
  --
  rare_pathenogenic_variants AS (
  SELECT
    -- ClinVar does not use the 'chr' prefix for reference names.
    reference_name AS chr,
    start,
    reference_bases,
    alt,
    CLNHGVS,
    CLNALLE,
    CLNSRC,
    CLNORIGIN,
    CLNSRCID,
    CLNSIG,
    CLNDSDB,
    CLNDSDBID,
    CLNDBN,
    CLNREVSTAT,
    CLNACC
  FROM
    `bigquery-public-data.human_variant_annotation.ncbi_clinvar_hg38_20170705` v,
    v.alternate_bases alt
  WHERE
    -- Variant Clinical Significance, 0 - Uncertain significance, 1 - not provided,
    -- 2 - Benign, 3 - Likely benign, 4 - Likely pathogenic, 5 - Pathogenic,
    -- 6 - drug response, 7 - histocompatibility, 255 - other
    EXISTS (SELECT sig FROM UNNEST(CLNSIG) sig WHERE REGEXP_CONTAINS(sig, '(4|5|255)'))
    -- TRUE if >5% minor allele frequency in 1+ populations
    AND G5 IS NULL
)
 --
 --
SELECT
  *
FROM
  sample_variants
JOIN
  rare_pathenogenic_variants USING(chr,
    start,
    reference_bases,
    alt)
ORDER BY
  chr,
  start,
  reference_bases,
  alt

chr,start,reference_bases,alt,call_set_name,CLNHGVS,CLNALLE,CLNSRC,CLNORIGIN,CLNSRCID,CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC
1,94047008,C,T,NA12878_ERR194147,['NC_000001.11:g.94047009C>T'],[2],['HGMD|OMIM_Allelic_Variant|UniProtKB_(protein)'],[1],['CM024629|601691.0035|P78363#VAR_008428'],['255|5|1|2|3|3|3|3'],['MedGen:OMIM|MedGen|MedGen|Human_Phenotype_Ontology:MedGen|MedGen|MedGen|MedGen'],['C1855465:248200|CN221809|CN169374|HP:0000608:C0024437|CN239309|CN239466|CN239312'],['MACULAR_DEGENERATION\\x2c_AGE-RELATED\\x2c_2\\x2c_SUSCEPTIBILITY_TO|Stargardt_disease_1|not_provided|not_specified|Macular_degeneration|Cone-Rod_Dystrophy\\x2c_Recessive|Retinitis_Pigmentosa\\x2c_Recessive|Stargardt_Disease\\x2c_Recessive'],['no_criteria|no_criteria|no_assertion|mult|single|single|single|single'],['RCV000008374.4|RCV000008375.4|RCV000085512.3|RCV000152706.4|RCV000294335.1|RCV000349295.1|RCV000392936.1|RCV000399411.1']
1,201361939,A,G,NA12878_ERR194147,['NC_000001.11:g.201361940A>G'],[1],['.'],[1],['.'],['255|0|0|0|0|0|0|0'],['MedGen|MedGen:OMIM|MedGen:OMIM|MedGen:OMIM|Human_Phenotype_Ontology:MedGen|Human_Phenotype_Ontology:MedGen:Orphanet|MedGen|MedGen:Orphanet:SNOMED_CT'],['CN169374|C1861864:115195|C2676271:612422|C1832243:601494|HP:0011664:C4021133|HP:0001639:C0007194:ORPHA217569|CN239310|C0340429:ORPHA217635:233878008'],['not_specified|Familial_hypertrophic_cardiomyopathy_2|Familial_restrictive_cardiomyopathy_3|Left_ventricular_noncompaction_6|Left_ventricular_noncompaction_cardiomyopathy|Hypertrophic_cardiomyopathy|Dilated_Cardiomyopathy\\x2c_Dominant|Familial_restrictive_cardiomyopathy'],['conf|single|single|single|single|single|single|single'],['RCV000168973.2|RCV000230425.2|RCV000230425.2|RCV000230425.2|RCV000283636.1|RCV000323526.1|RCV000338870.1|RCV000378147.1']
1,212897348,T,TACAC,NA12878_ERR194147,"['NC_000001.11:g.212897351_212897370dup20', 'NC_000001.11:g.212897365_212897370dupCACACA', 'NC_000001.11:g.212897367_212897370dupCACA', 'NC_000001.11:g.212897369_212897370dupCA']","[4, -1, -1, -1]","['.', '.', '.', '.']","[1, 1, 1, 1]","['.', '.', '.', '.']","['0', '3', '255', '0']","['MedGen:OMIM:Orphanet', 'MedGen:OMIM:Orphanet', 'MedGen:OMIM:Orphanet', 'MedGen:OMIM:Orphanet']","['C1836916:609033:ORPHA88628', 'C1836916:609033:ORPHA88628', 'C1836916:609033:ORPHA88628', 'C1836916:609033:ORPHA88628']","['Posterior_column_ataxia_with_retinitis_pigmentosa', 'Posterior_column_ataxia_with_retinitis_pigmentosa', 'Posterior_column_ataxia_with_retinitis_pigmentosa', 'Posterior_column_ataxia_with_retinitis_pigmentosa']","['single', 'single', 'conf', 'single']","['RCV000355025.1', 'RCV000297866.1', 'RCV000262602.1', 'RCV000351203.1']"
1,215671030,C,T,NA12878_ERR194147,['NC_000001.11:g.215671031C>T'],[1],['UniProtKB_(protein)'],[1],['O75445#VAR_061351'],['255'],['MedGen'],['CN169374'],['not_specified'],['conf'],['RCV000041750.4']
1,237589773,AT,A,NA12878_ERR194147,['NC_000001.11:g.237589784delT'],[1],['.'],[1],['.'],['2|255'],['MedGen:Orphanet:SNOMED_CT|MedGen'],['C0878544:ORPHA167848:85898001|CN169374'],['Cardiomyopathy|not_specified'],['no_criteria|conf'],['RCV000030420.1|RCV000036734.8']
10,26088401,C,T,NA12878_ERR194147,['NC_000010.11:g.26088402C>T'],[1],['.'],[1],['.'],['255|0'],['MedGen|MedGen'],['CN169374|CN239439'],['not_specified|Nonsyndromic_Hearing_Loss\\x2c_Recessive'],['conf|single'],['RCV000039026.3|RCV000381484.1']
11,6392135,C,T,NA12878_ERR194147,['NC_000011.10:g.6392136C>T'],[1],['.'],[1],['.'],['255|0'],['MedGen|MedGen:SNOMED_CT'],['CN169374|C0028064:58459009'],['not_specified|Sphingomyelin/cholesterol_lipidosis'],['conf|single'],['RCV000079188.5|RCV000394529.1']
11,6617153,C,T,NA12878_ERR194147,"['NC_000011.10:g.6617154C>A', 'NC_000011.10:g.6617154C>G', 'NC_000011.10:g.6617154C>T']","[1, 2, 3]","['.', 'OMIM_Allelic_Variant', '.']","[1, 1, 1]","['.', '607998.0004', '.']","['5', '5|5|5|5|5', '5']","['MedGen', 'MedGen:OMIM:Orphanet|MedGen:OMIM:Orphanet|MedGen|MeSH:MedGen|MedGen:OMIM:Orphanet:SNOMED_CT', 'MedGen']","['CN221809', 'C1876161:204500:ORPHA228349|C1836474:609270:ORPHA284324|CN221809|D030342:C0950123|C0027877:214200:ORPHA216:42012007', 'CN221809']","['not_provided', 'Ceroid_lipofuscinosis_neuronal_2|Childhood-onset_autosomal_recessive_slowly_progressive_spinocerebellar_ataxia|not_provided|Inborn_genetic_diseases|Neuronal_ceroid_lipofuscinosis', 'not_provided']","['single', 'mult|single|mult|single|single', 'single']","['RCV000391641.1', 'RCV000002763.11|RCV000074608.7|RCV000189765.4|RCV000210689.1|RCV000228119.2', 'RCV000189764.3']"
11,47448802,C,T,NA12878_ERR194147,['NC_000011.10:g.47448803C>T'],[1],['.'],[1],['.'],['255'],['MedGen'],['CN169374'],['not_specified'],['conf'],['RCV000246056.2']
11,66510682,T,C,NA12878_ERR194147,['NC_000011.10:g.66510683T>C'],[1],['.'],[1],['.'],['255|2'],['MedGen|MedGen:OMIM:Orphanet:SNOMED_CT'],['CN169374|C0752166:209900:ORPHA110:5619004'],['not_specified|Bardet-Biedl_syndrome'],['conf|single'],['RCV000173529.2|RCV000226235.1']
