forked from trvrb/antigen
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBiology.java
460 lines (390 loc) · 18.2 KB
/
Biology.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
import java.awt.*;
import java.io.File;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;
/**
* A class that allows antigen to model a virus's genetic sequence,
* and how changes in sequence give rise to changes in both:
* i) antigenic phenotype
* ii) viral fitness in the absence of immune selection (e.g., quantified by DMS data)
*
* @author Thien Tran
*/
public class Biology {
public enum AlphabetType {
NUCLEOTIDES("ACGT"),
AMINO_ACIDS("ACDEFGHIKLMNPQRSTWYV");
private final String validCharacters;
AlphabetType(String validCharacters) {
this. validCharacters = validCharacters;
}
public String getValidCharacters() {
return validCharacters;
}
}
public enum DMSData {
DMS_DATA();
public double[][] aminoAcidPreference = null;
private final double[] equalPreferences = new double[]{0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05};;
DMSData() {
try {
// Only initialize aminoAcidPreference if DMS data is provided.
if (Parameters.DMSFile != null) {
int numberOfAminoAcidSites = Parameters.startingSequence.length() / 3;
int numberOfAminoAcids = Biology.AlphabetType.AMINO_ACIDS.getValidCharacters().length();
aminoAcidPreference = new double[numberOfAminoAcidSites][numberOfAminoAcids];
Scanner dms = new Scanner(new File(Parameters.DMSFile));
dms.nextLine(); // read header
for (int i = 0; i < numberOfAminoAcidSites; i++) {
Scanner currentSite = new Scanner(dms.nextLine());
currentSite.useDelimiter(",");
currentSite.next(); // ignore site number
for (int j = 0; j < numberOfAminoAcids; j++) {
double probability = currentSite.nextDouble();
aminoAcidPreference[i][j] = probability;
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
public double[] getAminoAcidPreference(int site) {
// If DMS data is not provided, then return double[] of equal amino acid preferences.
if (aminoAcidPreference == null) {
return equalPreferences;
}
return aminoAcidPreference[site];
}
}
/**
* Transition/transversion ratio used to determine what a given wild type nucleotide should be mutated to.
*/
public enum K80DNAEvolutionModel {
MUTATION();
public final Map<Character, double[]> transitionTranversionProbability;
/**
* Constructor that creates an array of "boundaries" for each nucleotide,
* where each possible mutation is weighted by a pre-defined transition/transversion ratio.
*/
K80DNAEvolutionModel() {
this.transitionTranversionProbability = new HashMap<>() {{
double transition = 0.5 / (Parameters.transitionTransversionRatio + 1.0);
double transversion = Parameters.transitionTransversionRatio / (Parameters.transitionTransversionRatio + 1.0);
// key: nucleotide
// value: int[] that gives the probability of being < to the index which corresponds to [A, C, G, T]
// Example:
// Parameters.transitionTransversionRatio: 5.0
// A: {0, 0.5/6, 5.5/6.0, 1.0}
// C: {0.5/6.0, 0.5/6.0, 1.0/6.0, 1.0}
// G: {5.0/6.0, 5.5/6.0, 5.5/6.0, 1.0}
// T: {0.5/6.0, 5.5/6.0, 1.0, 1.0}
put('A', new double[]{0, transition, transition + transversion, 1.0});
put('C', new double[]{transition, transition, transition + transition, 1.0});
put('G', new double[]{transversion, transversion + transition, transversion + transition, 1.0});
put('T', new double[]{transition, transition + transversion, 1.0, 1.0});
}};
}
/**
* Returns a mutant nucleotide using the transition/transversion ratio.
*
* @param originalNucleotideToMutate the original nucleotide to mutate
* @return the new nucleotide to change the original nucleotide to
*/
public char sampleNucleotide(char originalNucleotideToMutate) {
double[] transitionTransversion = this.transitionTranversionProbability.get(originalNucleotideToMutate);
// Choose a random number between 0-1
double randomNum = Math.random();
int indexAlphabet = 0;
for (int i = 0; i < 4; i++) {
if (randomNum < transitionTransversion[i]) {
indexAlphabet = i;
break;
}
}
return Biology.AlphabetType.NUCLEOTIDES.getValidCharacters().charAt(indexAlphabet);
}
}
/**
* DNA codon table
*/
public enum CodonMap {
CODONS();
public final Map<String, String> codonMap;
CodonMap() {
this.codonMap = new HashMap<>() {{
put("TTT", "F");
put("TTC", "F");
put("TTA", "L");
put("TTG", "L");
put("CTT", "L");
put("CTC", "L");
put("CTA", "L");
put("CTG", "L");
put("ATT", "I");
put("ATC", "I");
put("ATA", "I");
put("ATG", "M");
put("GTT", "V");
put("GTC", "V");
put("GTA", "V");
put("GTG", "V");
put("TCT", "S");
put("TCC", "S");
put("TCA", "S");
put("TCG", "S");
put("CCT", "P");
put("CCC", "P");
put("CCA", "P");
put("CCG", "P");
put("ACT", "T");
put("ACC", "T");
put("ACA", "T");
put("ACG", "T");
put("GCT", "A");
put("GCC", "A");
put("GCA", "A");
put("GCG", "A");
put("TAT", "Y");
put("TAC", "Y");
put("TAA", "STOP");
put("TAG", "STOP");
put("CAT", "H");
put("CAC", "H");
put("CAA", "Q");
put("CAG", "Q");
put("AAT", "N");
put("AAC", "N");
put("AAA", "K");
put("AAG", "K");
put("GAT", "D");
put("GAC", "D");
put("GAA", "E");
put("GAG", "E");
put("TGT", "C");
put("TGC", "C");
put("TGA", "STOP");
put("TGG", "W");
put("CGT", "R");
put("CGC", "R");
put("CGA", "R");
put("CGG", "R");
put("AGT", "S");
put("AGC", "S");
put("AGA", "R");
put("AGG", "R");
put("GGT", "G");
put("GGC", "G");
put("GGA", "G");
put("GGG", "G");
}};
}
/**
* Translates and returns the amino acid specified by the given codon
*
* @param codon the sequence of three nucleotides to translate
* @return the corresponding amino acid
*/
public String getAminoAcid(String codon) {
return this.codonMap.get(codon);
}
}
/**
* Data type used to determine how changes in sequence lead to changes in to a virus's location in antigenic space.
* Specifically, for each site, a vector for each possible amino-acid mutations is pre-computed.
* Vector angles are drawn from a uniform distribution, while vector magnitudes are drawn from a gamma distribution.
* Gamma distributions differ for epitope and non-epitope sites.
*
* Each site is drawn from the gamma distribution
*/
public enum SiteMutationVectors {
VECTORS();
private final int totalSites = Parameters.startingSequence.length() / 3;
public final Map<Integer, MutationVector[][]> siteMutationVectors;
public final HashSet<Integer> epitopeSites;
public final HashSet<Integer> epitopeSitesLow;
public final HashSet<Integer> epitopeSitesHigh;
public final String[] stringOutputCSV = new String[totalSites]; // String[] to create CSV from
SiteMutationVectors() {
// SiteMutationVectors is represented using a Map that maps each protein site to a 2D array of double[].
// Rows in the 2D array represent wild type amino acids, while columns represent mutant amino acids.
// Each entry is a double[] of size 2 where the first index represents the x-coordinate
// and the second index represents the y-coordinate of the corresponding vector of the wild type and mutant amino acids.
// Example:
// A C D . . .
// A [0.0, 0.0] [-1.0, -2.0] [-1.0, -1.0]
// C [1.0, 2.0] [0.0, 0.0] [3.0, 0.5]
// D [1.0, 1.0] [-3.0, -0.5] [0.0, 0.0]
// .
// .
// .
HashSet<Integer> epitopeSitesSet = new HashSet<>();
for (int i = 0; i < Parameters.epitopeSites.length; i++) {
epitopeSitesSet.add(Parameters.epitopeSites[i] - 1); // Allow users to index starting at 1, but store values starting at 0
}
this.epitopeSites = epitopeSitesSet;
HashSet<Integer> epitopeSitesSetLow = new HashSet<>();
for (int i = 0; i < Parameters.epitopeSitesLow.length; i++) {
epitopeSitesSetLow.add(Parameters.epitopeSitesLow[i] - 1); // Allow users to index starting at 1, but store values starting at 0
}
this.epitopeSitesLow = epitopeSitesSetLow;
HashSet<Integer> epitopeSitesSetHigh = new HashSet<>();
for (int i = 0; i < Parameters.epitopeSitesHigh.length; i++) {
epitopeSitesSetHigh.add(Parameters.epitopeSitesHigh[i] - 1); // Allow users to index starting at 1, but store values starting at 0
}
this.epitopeSitesHigh = epitopeSitesSetHigh;
// Create a mapping of site # to 2D array of vectors
// mutations: 2D Array
// i: index of wild type amino acid
// j: index of mutant amino acid
// ACDEFGHIKLMNPQRSTWYV 0 -> A, 1 -> C, 2 -> D, etc.
int matrixSize = Biology.AlphabetType.AMINO_ACIDS.getValidCharacters().length();
Map<Integer, MutationVector[][]> currentSiteMutationVectors = new HashMap<>();
// Cycle over each site in the protein sequence and create a 2D array
for (int nucleotideSiteNumber = 0; nucleotideSiteNumber < Parameters.startingSequence.length() / 3; nucleotideSiteNumber += 1) {
String currentStringOutputCSV = "mutation,r,theta\n";
MutationVector[][] currentSiteMutationMatrix = new MutationVector[matrixSize][matrixSize];
// Create 2D where rows are wild type amino acids and columns are mutant amino acids
for (int wildTypeIndex = 0; wildTypeIndex < matrixSize; wildTypeIndex++) {
for (int mutationIndex = 0; mutationIndex < matrixSize; mutationIndex++) {
if (mutationIndex < wildTypeIndex) { // update lower and upper triangle in this branch.
boolean isEpitopeSite = this.epitopeSites.contains(nucleotideSiteNumber);
boolean isEpitopeSiteLow = this.epitopeSitesLow.contains(nucleotideSiteNumber);
boolean isEpitopeSiteHigh = this.epitopeSitesLow.contains(nucleotideSiteNumber);
MutationVector mut = MutationVector.calculateMutation(isEpitopeSite, isEpitopeSiteLow, isEpitopeSiteHigh);
// add vector=(mutA, mutB) to mutations_i,j, where
// mutA is the x-coordinate
// mutB is the y-coordinate
// mutations is the 2D array that corresponds to the current amino acid site
currentSiteMutationMatrix[wildTypeIndex][mutationIndex] = new MutationVector(mut.mutA, mut.mutB);
// add vector=(-mutA, -mutB) to mutations_j,i
currentSiteMutationMatrix[mutationIndex][wildTypeIndex] = new MutationVector(-1 * mut.mutA, -1 * mut.mutB);
// amino acid mutation notation
// wild type amino acid + site # + mutant amino acid
currentStringOutputCSV += "" + Biology.AlphabetType.AMINO_ACIDS.getValidCharacters().charAt(wildTypeIndex) +
nucleotideSiteNumber + Biology.AlphabetType.AMINO_ACIDS.getValidCharacters().charAt(mutationIndex) +
"," + mut.r + "," + mut.theta + '\n';
}
}
}
stringOutputCSV[nucleotideSiteNumber] = currentStringOutputCSV;
currentSiteMutationVectors.put(nucleotideSiteNumber, currentSiteMutationMatrix);
}
this.siteMutationVectors = currentSiteMutationVectors;
}
/**
* Retrieves the matrix associated with the (protein level) site mutation and
* returns the vector that represents where the virus will move in Euclidean space based on the wild type and mutant amino acid.
* Note, it's possible for wild type == mutant amino acid.
*
* @param mutationIndexSite the (protein level) site where a nucleotide mutated
* @param mSiteMutationVectors the index (within Parameters.AMINO_ACIDS) that represent the wild type amino acid
* @param nSiteMutationVectors the index (within Parameters.AMINO_ACIDS) that represent the mutant amino acid
* @return a vector to represent the change in Euclidean space
*/
public MutationVector getVector(int mutationIndexSite, int mSiteMutationVectors, int nSiteMutationVectors) {
return this.siteMutationVectors.get(mutationIndexSite)[mSiteMutationVectors][nSiteMutationVectors];
}
/**
* Returns a representation of a csv formatted as amino acid mutation notation (e.g., A4F), r, theta
* that represents all the entries from each site's matrix of vectors.
*
* @return a String representing vectors in each site's matrix
*/
public String[] getStringOutputCSV() {
return this.stringOutputCSV;
}
/**
* Return each site's matrix of vectors.
*
* @return site's matrices
*/
public Map<Integer, MutationVector[][]> getMatrices() {
return this.siteMutationVectors;
}
/**
* Returns a set of indices that represent epitope sites, which are fixed within a simulation run.
*
* @return epitope site indices
*/
public HashSet getEpitopeSites() {
return this.epitopeSites;
}
public HashSet getEpitopeSitesLow() {
return this.epitopeSitesLow;
}
public HashSet getEpitopeSitesHigh() {
return this.epitopeSitesHigh;
}
}
/**
* <b>MutationVector</b> stores a mutation vector's x and y coordinates
* and, optionally, direction and magnitude.
*
* @author Thien Tran
*/
public static class MutationVector {
public double mutA;
public double mutB;
public double theta;
public double r;
public MutationVector(double mutA, double mutB) {
this.mutA = mutA;
this.mutB = mutB;
}
public MutationVector(double mutA, double mutB, double theta, double r) {
this.mutA = mutA;
this.mutB = mutB;
this.theta = theta;
this.r = r;
}
public static MutationVector calculateMutation(boolean isEpitopeSite, boolean isEpitopeSiteLow, boolean isEpitopeSiteHigh) {
// direction of mutation
double theta;
if (Parameters.mut2D) {
theta = Random.nextDouble(0, 2 * Math.PI);
} else {
if (Random.nextBoolean(0.5)) {
theta = 0;
} else {
theta = Math.PI;
}
}
// size of mutation
double r;
double alpha;
double beta;
double meanStep;
double sdStep;
if (isEpitopeSite) {
// determine if epitope site is low or high distribution or neither
if (isEpitopeSiteLow) {
meanStep = Parameters.meanStepEpitopeLow;
sdStep = Parameters.sdStepEpitopeLow;
} else if (isEpitopeSiteHigh) {
meanStep = Parameters.meanStepEpitopeHigh;
sdStep = Parameters.sdStepEpitopeHigh;
} else {
meanStep = Parameters.meanStepEpitope;
sdStep = Parameters.sdStepEpitope;
}
} else {
// non-epitope sites
meanStep = Parameters.meanStep;
sdStep = Parameters.sdStep;
}
r = meanStep;
if (!Parameters.fixedStep) {
alpha = (meanStep * meanStep) / (sdStep * sdStep);
beta = (sdStep * sdStep) / meanStep;
r = Random.nextGamma(alpha, beta);
}
// create antigenic phenotype by computing the x and y coordinates (mutA and mutB) of the virus in antigenic space
double mutA = r * Math.cos(theta);
double mutB = r * Math.sin(theta);
return new MutationVector(mutA, mutB, theta, r);
}
}
}