-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.R
84 lines (64 loc) · 2.48 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# Shared code and library imports for all scripts
# Aleix Lafita - June 2020
library(dplyr)
library(bio3d)
library(edmcr)
######################## Parameters #########################
options(stringsAsFactors = F)
backbone = c("N", "CA", "CB", "C", "O")
atom.options = c("calpha", "backbone")
########################### Bounds ############################
# Parse the protein distance bounds constraints
protein.bounds = read.csv(
"protein_bounds.tsv",
sep = "\t"
) %>% mutate(
# Distance bounds were limited to 20A
d.u = ifelse(d.u < 20, d.u, Inf)
)
########################### Function ############################
# Apply protein bounds to unknown entries in the matrix
apply_bounds = function(distmat.df) {
# Calculate the residue number differences
distmat.df = distmat.df %>% mutate(
resno.diff = ifelse(
chain.x == chain.y,
resno.y - resno.x,
# Residues from two different chains, consider them as 1000 residues apart
sign(eleno.y - eleno.x)*1000),
# Take care of the last residue bin being representative of any difference higher
resno.diff.bkbn = ifelse(
abs(resno.diff) > max(protein.bounds$resno.diff),
sign(resno.diff)*max(protein.bounds$resno.diff),
resno.diff)
)
# Apply the upper and lower bounds
distmat.df.con = merge(
distmat.df,
protein.bounds,
by.x = c("elety.x", "elety.y", "resno.diff.bkbn"),
by.y = c("elety.x", "elety.y", "resno.diff"),
all.x = T
) %>% mutate(
d.lower = ifelse(is.na(d.na), d.l, d.na),
d.upper = ifelse(is.na(d.na), d.u, d.na),
# Set chemical bonds between atoms to fixed
d.na = ifelse(is.na(d.na) & d.avg < 2, d.avg, d.na),
# Set distances with very little variation to fixed too - like peptide bond plane
d.na = ifelse(is.na(d.na) & (d.u - d.l) < 0.2 & d.avg < 5, d.avg, d.na)
)
# Make sure it is properly sorted after the merge
distmat.df.con = distmat.df.con[order(distmat.df.con$eleno.x, distmat.df.con$eleno.y),]
return(distmat.df.con)
}
# Check if the chirality of the protein structure is maintained
same_chirality = function(data.pdb, data.pdb.mds) {
# Calculate torsion angles of old and new structures
torsion.orig = torsion.pdb(data.pdb)
torsion.new = torsion.pdb(data.pdb.mds)
# Extract the CA dihedral angles
alpha.orig = mean(torsion.orig$alpha, na.rm=T)
alpha.new = mean(torsion.new$alpha, na.rm=T)
# Return if the average sign of the angles agrees
return(sign(alpha.orig) == sign(alpha.new))
}