Variant annotation API
This page describes in details the variant annotation API. If you are interested in annotating variants in Python remeber to visit the "pandas" chapter, where Python library is introduced. If you want to use API from any other well known language, consider generating a client using the OpenAPI definition published here https://api.genebe.net/cloud/gb-api-doc/swagger-ui/index.html . However, API is also convenient to use it without any wraper.
On this page examples will be presented using curl
and simple browser links in case of GET
queries.
Example of GET endpoint
GET endpoint is just for test purposes. If you want to annotate multiple variants please batch them using the POST endpoint described below. But for now: the GET endpoint:
curl -X 'GET' \
'https://api.genebe.net/cloud/api-public/v1/variant?chr=7&pos=140753336&ref=A&alt=T&allGenes=False&genome=hg38&useEnsembl=False' \
-H 'accept: */*'
See the results in browser by clicking
{
"variants": [
{
"chr": "7",
"pos": 140753336,
"ref": "A",
"alt": "T",
"effect": "missense_variant",
"transcript": "NM_001374258.1",
"consequences": [
{
"aa_ref": "V",
"aa_alt": "E",
"canonical": false,
"protein_coding": true,
"consequences": [
"missense_variant"
],
"exon_rank": 16,
"exon_count": 20,
"gene_symbol": "BRAF",
"gene_hgnc_id": 1097,
"hgvs_c": "c.1919T>A",
"hgvs_p": "p.Val640Glu",
"transcript": "NM_001374258.1",
"protein_id": "NP_001361187.1",
"aa_start": 640,
"aa_length": 807,
"cds_start": 1919,
"cds_length": 2424,
"cdna_start": 2145,
"cdna_length": 9807,
"mane_plus": "ENST00000644969.2"
},
[...]
],
"gene_symbol": "BRAF",
"gene_hgnc_id": 1097,
"dbsnp": "rs113488022",
"frequency_reference_population": 0.0000013692834,
"hom_count_reference_population": 0,
"allele_count_reference_population": 2,
"gnomad_exomes_af": 0.000001369279971186188,
"gnomad_genomes_af": null,
"gnomad_exomes_ac": 2,
"gnomad_genomes_ac": null,
"gnomad_exomes_homalt": 0,
"gnomad_genomes_homalt": null,
"gnomad_mito_homoplasmic": null,
"gnomad_mito_heteroplasmic": null,
"computational_score_selected": 29.799999237060547,
"computational_prediction_selected": "Pathogenic",
"computational_source_selected": "Cadd",
"splice_score_selected": 0.0,
"splice_prediction_selected": "Benign",
"splice_source_selected": "max_spliceai",
"revel_score": 0.9309999942779541,
"revel_prediction": "Pathogenic",
"alphamissense_score": 0.9926999807357788,
"alphamissense_prediction": "Pathogenic",
"bayesdelnoaf_score": 0.3400000035762787,
"bayesdelnoaf_prediction": "Pathogenic",
"phylop100way_score": 9.236000061035156,
"phylop100way_prediction": "Pathogenic",
"spliceai_max_score": 0.0,
"spliceai_max_prediction": "Benign",
"dbscsnv_ada_score": null,
"dbscsnv_ada_prediction": null,
"apogee2_score": null,
"apogee2_prediction": null,
"mitotip_score": null,
"mitotip_prediction": null,
"acmg_score": 21,
"acmg_classification": "Pathogenic",
"acmg_criteria": "PS1,PM1,PM2,PM5,PP2,PP3_Moderate,PP5_Very_Strong",
"acmg_by_gene": [
],
"clinvar_disease": "Carcinoma of colon,Papillary thyroid carcinoma,Astrocytoma, low-grade, somatic,Nongerminomatous germ cell tumor,Non-small cell lung carcinoma,not provided,Melanoma,Cardio-facio-cutaneous syndrome,Malignant melanoma of skin,Glioblastoma,Squamous cell carcinoma of the head and neck,Colonic neoplasm,Ovarian neoplasm,Brainstem glioma,Lung adenocarcinoma,Multiple myeloma,Neoplasm of the large intestine,Lung carcinoma,Neoplasm of brain,Papillary renal cell carcinoma, sporadic,Gastrointestinal stromal tumor,Neoplasm,Cystic epithelial invagination containing papillae lined by columnar epithelium,Cerebral arteriovenous malformation,Nephroblastoma,Colorectal cancer,Malignant neoplastic disease,Lymphangioma,Vascular malformation,Cardiovascular phenotype",
"clinvar_classification": "Pathogenic/Likely pathogenic",
"phenotype_combined": null,
"pathogenicity_classification_combined": null,
"custom_annotations": null
}
],
"message": null
}
Important notices:
- To make the output more readable some consequences were removed from the listing.
- You may see
consequences_ensembl
andconsequences_refseq
in your answer. This are depreciated fields and will be removed soon. Please use theconsequences
field. - In the request I've explicitely asked NOT to add
Ensembl
consequences (useEnsembl=False
). - The
null
values indicates no data. acmg_by_gene
is populated only if you setallGenes
to true in the querycustom_annotations
is populated only ifcustomAnnotations
is given.customAnnotations
is a comma delimited list of custom annotations. If used new columns are added to the output, straight from our internal database. More documentation on available fields will be added soon.
Input
Variant description
Name | Default | Description | Required |
---|---|---|---|
chr | Chromosome | Required | |
pos | Position of the change, as in VCF file | Required | |
ref | Refernece bases, only [ACGT]+ allowed | Required | |
alt | Alternate bases, only [ACGT]+ allowed | Required | |
transcript | Specify the transcript to use for ACMG score, if not specified usually MANE is selected | Optional | |
gene_symbol | Specify the transcript to use for ACMG score, usually the most affected gene is selected | Optional |
Parameters
Name | Default | Description | Required |
---|---|---|---|
genome | hg38 | You can use hg38 or hg19 here. If hg19 used, your queries will be lifted to hg38 before annotation | Required |
useRefseq | true | Use transcripts from Refseq for consequences field. | Optional |
useEnsembl | true | Use transcripts from Ensembl for consequences field. | Optional |
omitAcmg | false | Don't add ACMG scores in the output. Set to true if you don't need them. | Optional |
omitCsq | false | Don't add consequences in the output. | Optional |
omitBasic | false | Don't add basic annotations (GnomAD frequencies etc) in the output. | Optional |
omitAdvanced | false | Don't add advanced annotations (ClinVar frequencies etc) in the output. | Optional |
omitNormalization | false | Don't normalize variants. Use only if you are sure they are normalized already. | Optional |
allGenes | false | Compute ACMG score for all genes in this region. | Optional |
customAnnotations | empty | Comma delimited list of custom annotations to be applied. Consult with documentation for recognized values. | Optional |
annotator | snpeff | Which annotator to use. Please leave empty for now. | Optional |
Output
Field | Description |
---|---|
chr |
Chromosome where the variant is located. If lifting was required, this represents the new location. |
pos |
Position of the variant on the chromosome. If lifting was required, this represents the new location. |
ref |
Reference allele, i.e., the base found in the reference genome. This may differ from your query if lifting was required. |
alt |
Alternate allele, i.e., the base differing from the reference genome. This may differ from your query if lifting was required. |
effect |
Selected effect of the variant (e.g., missense_variant), typically computed for the most relevant transcript, usually the MANE transcript. |
transcript |
Selected transcript ID (e.g., RefSeq or Ensembl). Typically, this is the MANE transcript of the most affected gene. |
consequences |
An array of computed possible consequences. |
consequences.aa_ref |
Reference amino acid before the mutation. |
consequences.aa_alt |
Alternate amino acid after the mutation. |
consequences.canonical |
Indicates whether the transcript is the canonical (main) transcript for the gene (true or false ). Not always populated. |
consequences.protein_coding |
Indicates if the transcript is protein-coding (true or false ). |
consequences.consequences |
List of predicted biological consequences of the variant on the protein (e.g., missense_variant ). Uses Sequence Ontology terms. |
consequences.exon_rank |
The exon number where the variant is located. |
consequences.exon_count |
Total number of exons in the transcript. |
consequences.gene_symbol |
The symbol of the gene where the variant is located (e.g., BRAF). |
consequences.gene_hgnc_id |
HGNC ID for the gene. |
consequences.hgvs_c |
HGVS notation describing the variant at the cDNA level. |
consequences.hgvs_p |
HGVS notation describing the variant at the protein level. |
consequences.transcript |
Transcript ID for this consequence. |
consequences.protein_id |
Protein ID linked to the transcript. |
consequences.aa_start |
Start position of the affected amino acid in the protein sequence. |
consequences.aa_length |
Total length of the protein sequence. |
consequences.cds_start |
Start position of the coding sequence (CDS) affected by the variant. |
consequences.cds_length |
Total length of the coding sequence. |
consequences.cdna_start |
Start position of the variant in the cDNA sequence. |
consequences.cdna_length |
Total length of the cDNA sequence. |
consequences.mane_plus |
MANE Plus Clinical transcript ID (a reference transcript for clinical reporting). |
gene_symbol |
Selected gene symbol where the variant occurs. |
gene_hgnc_id |
Selected HGNC ID for the gene. |
dbsnp |
dbSNP ID for the variant (if present). |
frequency_reference_population |
Aggregated frequency of the variant in various population databases (currently GnomAD Genomes and Exomes). May be null if no reliable data is available (e.g., due to low coverage or filtering). |
hom_count_reference_population |
Total number of homozygous individuals for this variant in population databases (currently GnomAD Genomes and Exomes). |
allele_count_reference_population |
Total allele count for the variant across all individuals in population databases (currently GnomAD Genomes and Exomes). |
gnomad_exomes_af |
Allele frequency in gnomAD exome data. |
gnomad_genomes_af |
Allele frequency in gnomAD genome data (may be null if unavailable). |
gnomad_exomes_ac |
Allele count in gnomAD exome data. |
gnomad_genomes_ac |
Allele count in gnomAD genome data (may be null if unavailable). |
gnomad_exomes_homalt |
Homozygous alternate count in gnomAD exome data. |
gnomad_genomes_homalt |
Homozygous alternate count in gnomAD genome data (may be null if unavailable). |
gnomad_mito_homoplasmic |
Homoplasmic variant count in mitochondrial data from gnomAD (if applicable). |
gnomad_mito_heteroplasmic |
Heteroplasmic variant count in mitochondrial data from gnomAD (if applicable). |
computational_score_selected |
Computational prediction score from the most reliable tool for variant pathogenicity (e.g., CADD, REVEL). |
computational_prediction_selected |
Prediction label based on the computational score (e.g., "Pathogenic", "Benign"). |
computational_source_selected |
Source of the computational prediction (e.g., CADD, REVEL). |
splice_score_selected |
Maximum splice effect prediction score for the variant, predicted by the most reliable tool. |
splice_prediction_selected |
Prediction of whether the variant affects splicing (e.g., "Benign", "Pathogenic"). |
splice_source_selected |
Source of the splicing prediction (e.g., SpliceAI). |
revel_score |
REVEL score for variant pathogenicity prediction. |
revel_prediction |
REVEL prediction label (e.g., "Pathogenic"). |
alphamissense_score |
AlphaMissense score for missense variant pathogenicity. |
alphamissense_prediction |
AlphaMissense prediction label (e.g., "Pathogenic"). |
bayesdelnoaf_score |
BayesDelNoAF score for variant pathogenicity prediction. |
bayesdelnoaf_prediction |
BayesDelNoAF prediction label (e.g., "Pathogenic"). |
phylop100way_score |
PhyloP score for evolutionary conservation at the variant position (higher scores suggest greater conservation). |
phylop100way_prediction |
PhyloP prediction label (e.g., "Pathogenic"). |
spliceai_max_score |
Maximum SpliceAI score for splicing impact prediction. This is the highest value from AL, DL, AG, and DG scores. |
spliceai_max_prediction |
SpliceAI prediction label (e.g., "Benign"). |
dbscsnv_ada_score |
ADA score from dbscSNV for splicing impact prediction (if available). |
dbscsnv_ada_prediction |
ADA prediction label (if available). |
acmg_score |
ACMG (American College of Medical Genetics) score for the variant, automatically evaluated based on GeneBe implementation. |
acmg_classification |
ACMG classification (e.g., "Pathogenic", "Likely Pathogenic"). |
acmg_criteria |
Specific ACMG criteria met by the variant (e.g., PS1, PM1), comma-separated. |
clinvar_disease |
List of diseases associated with the variant in ClinVar. |
clinvar_classification |
ClinVar classification for the variant (e.g., "Pathogenic", "Likely Pathogenic"). |
Moreover, at the top level there is a message
field, that may contain important message. Usually null
.
Example of POST endpoint
It is very similar to the GET
endpoint, just allows user to annotate multiple entries at once. You can send up to 1,000 variants in one request, but
usually it's better to send them in smaller chunks, not to get timeout on some more computationally intensive request. Test for example batches of 500 variants. For the parameters and the description of the output please read the GET
documentation above.
The body
of the post is a JSON list of variants:
[
{
"chr": "string",
"pos": 0,
"ref": "string",
"alt": "string",
"transcript": "string",
"gene_symbol": "string"
}
]
where transcript
and gene_symbol
are optional (and rarely used). Take a look at the table in the GET
documentation for more information.
To continue the example of BRAF V600E
from the GET
documentation above, let's create a body and curl
it to the API:
curl -X 'POST' \
'https://api.genebe.net/cloud/api-public/v1/variants?useRefseq=True&useEnsembl=True&omitAcmg=False&omitCsq=False&omitBasic=False&omitAdvanced=False&omitNormalization=False&allGenes=False&genome=hg38' \
-H 'accept: */*' \
-H 'Content-Type: application/json' \
-d '[
{
"chr": "7",
"pos": 140753336,
"ref": "A",
"alt": "T"
}
]'
And again we get:
{
"variants": [
{
"chr": "7",
"pos": 140753336,
"ref": "A",
"alt": "T",
"effect": "missense_variant",
"transcript": "NM_001374258.1",
"consequences": [
{
"aa_ref": "V",
"aa_alt": "E",
"canonical": false,
"protein_coding": true,
"consequences": [
"missense_variant"
],
"exon_rank": 16,
"exon_count": 20,
"gene_symbol": "BRAF",
"gene_hgnc_id": 1097,
"hgvs_c": "c.1919T>A",
"hgvs_p": "p.Val640Glu",
"transcript": "NM_001374258.1",
"protein_id": "NP_001361187.1",
"aa_start": 640,
"aa_length": 807,
"cds_start": 1919,
"cds_length": 2424,
"cdna_start": 2145,
"cdna_length": 9807,
"mane_plus": "ENST00000644969.2"
},
...
],
"gene_symbol": "BRAF",
"gene_hgnc_id": null,
"dbsnp": "113488022",
"frequency_reference_population": 0.0000013692834,
"hom_count_reference_population": 0,
"allele_count_reference_population": 2,
"gnomad_exomes_af": 0.000001369279971186188,
"gnomad_genomes_af": null,
"gnomad_exomes_ac": 2,
"gnomad_genomes_ac": null,
"gnomad_exomes_homalt": 0,
"gnomad_genomes_homalt": null,
"gnomad_mito_homoplasmic": null,
"gnomad_mito_heteroplasmic": null,
"computational_prediction_selected": "Pathogenic",
"splice_prediction_selected": "Benign",
"revel_score": 0.9309999942779541,
"revel_prediction": "Pathogenic",
"alphamissense_score": 0.9926999807357788,
"alphamissense_prediction": "Pathogenic",
"bayesdelnoaf_score": 0.3400000035762787,
"bayesdelnoaf_prediction": "Pathogenic",
"phylop100way_score": 9.236000061035156,
"phylop100way_prediction": "Pathogenic",
"spliceai_max_score": 0,
"spliceai_max_prediction": "Benign",
"dbscsnv_ada_score": null,
"dbscsnv_ada_prediction": null,
"apogee2_score": null,
"apogee2_prediction": null,
"mitotip_score": null,
"mitotip_prediction": null,
"acmg_score": 21,
"acmg_classification": "Pathogenic",
"acmg_criteria": "PS1,PM1,PM2,PM5,PP2,PP3_Moderate,PP5_Very_Strong",
"acmg_by_gene": [],
"clinvar_disease": "Carcinoma of colon,Papillary thyroid carcinoma,Astrocytoma, low-grade, somatic,Nongerminomatous germ cell tumor,Non-small cell lung carcinoma,not provided,Melanoma,Cardio-facio-cutaneous syndrome,Malignant melanoma of skin,Glioblastoma,Squamous cell carcinoma of the head and neck,Colonic neoplasm,Ovarian neoplasm,Brainstem glioma,Lung adenocarcinoma,Multiple myeloma,Neoplasm of the large intestine,Lung carcinoma,Neoplasm of brain,Papillary renal cell carcinoma, sporadic,Gastrointestinal stromal tumor,Neoplasm,Cystic epithelial invagination containing papillae lined by columnar epithelium,Cerebral arteriovenous malformation,Nephroblastoma,Colorectal cancer,Malignant neoplastic disease,Lymphangioma,Vascular malformation,Cardiovascular phenotype",
"clinvar_classification": "Pathogenic/Likely pathogenic",
"phenotype_combined": null,
"pathogenicity_classification_combined": null,
"custom_annotations": null
}
],
"message": null
}