Skip to content

Commit

Permalink
Merge pull request #113 from chrovis/fix/fix-nonsense-substitution
Browse files Browse the repository at this point in the history
  • Loading branch information
federkasten authored Dec 6, 2024
2 parents 813c12b + ceab74f commit f561629
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 7 deletions.
22 changes: 15 additions & 7 deletions src/varity/vcf_to_hgvs/protein.clj
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@
:reverse (protein-position pos alt-rg))
(count alt-prot-seq*))])
[pref-only palt-only offset _] (diff-bases pref palt)
npref (count pref)
nprefo (count pref-only)
npalto (count palt-only)
[unit ref-repeat alt-repeat] (repeat-info* ref-prot-seq
Expand All @@ -436,13 +437,13 @@
:unknown

(or (= ref-prot-rest alt-prot-rest)
(and prefer-extension-for-initial-codon-alt?
(not= (first ref-prot-seq) (first alt-prot-seq*))))
(and prefer-extension-for-initial-codon-alt?
(not= (first ref-prot-seq) (first alt-prot-seq*))))
:extension

:else
:frame-shift)
(and (pos? nprefo) (= (first palt-only) \*)) :substitution
(and (pos? npref) (= (first palt-only) \*)) :substitution
(not= ref-prot-rest alt-prot-rest) (cond
(or (and (= (first alt-prot-rest) \*)
(>= nprefo npalto)
Expand Down Expand Up @@ -480,12 +481,19 @@
palt)})))

(defn- protein-substitution
[ppos pref palt]
[ppos pref palt {:keys [ref-prot-seq alt-prot-seq]}]
(let [[s-ref s-alt offset _] (diff-bases pref palt)]
(if (and (empty? s-ref) (empty? s-alt))
(cond
(and (empty? s-ref) (empty? s-alt))
(mut/protein-substitution (mut/->long-amino-acid (last pref))
(coord/protein-coordinate ppos)
(mut/->long-amino-acid (last palt)))
(empty? s-ref)
(let [{:keys [ppos pref palt]} (get-first-diff-aa-info ppos ref-prot-seq alt-prot-seq)]
(mut/protein-substitution (mut/->long-amino-acid pref)
(coord/protein-coordinate ppos)
(mut/->long-amino-acid palt)))
:else
(mut/protein-substitution (mut/->long-amino-acid (first s-ref))
(coord/protein-coordinate (+ ppos offset))
(mut/->long-amino-acid (first s-alt))))))
Expand Down Expand Up @@ -555,7 +563,7 @@
(subs (dec (+ ppos offset)))
(string/index-of "*"))]
(if (= alt \*)
(protein-substitution (+ ppos offset) (str ref) (str alt)) ; eventually fs-ter-substitution
(protein-substitution (+ ppos offset) (str ref) (str alt) seq-info) ; eventually fs-ter-substitution
(mut/protein-frame-shift (mut/->long-amino-acid ref)
(coord/protein-coordinate (+ ppos offset))
(mut/->long-amino-acid alt)
Expand Down Expand Up @@ -662,7 +670,7 @@
pvariant)
seq-info (merge seq-info options)]
(case (:type pvariant)
:substitution (protein-substitution ppos pref palt)
:substitution (protein-substitution ppos pref palt seq-info)
:deletion (protein-deletion ppos pref palt)
:duplication (protein-duplication ppos pref palt)
:insertion (protein-insertion ppos pref palt seq-info)
Expand Down
1 change: 1 addition & 0 deletions test/varity/vcf_to_hgvs_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@
"p.A35=") ; cf. rs786201577 (synonymous)
"chr6" 33086236 "TA" "T" '("p.*259=") ; cf. rs67523850 (deletion in border of UTR)
"chr7" 152247986 "G" "GT" '("p.Y816*") ; cf. rs150073007 (-, nonsense mutation)
"chr18" 51048782 "C" "CAGT" '("p.Y117*") ; cf. not actual example (+, inframe nonsense mutation)
"chr17" 31159027 "TGC" "T" '("p.A75*") ; not actual example (+, nonsense in del case)
"chr2" 47478341 "TG" "T" '("p.L762*" "p.L696*") ;; rs786204050 (+) frameshift with termination
"chr17" 7676202 "T" "TGTCCCTTAGTCTT" '("p.P58*" "p.P19*") ; cf. not actual example (-, frameshift with termination)
Expand Down

0 comments on commit f561629

Please sign in to comment.