Skip to content

Commit e4c2418

Browse files
author
Geraldine Van der Auwera
committed
Merge remote-tracking branch 'unstable/master'
2 parents c654e0b + a2f4594 commit e4c2418

File tree

23 files changed

+251
-89
lines changed

23 files changed

+251
-89
lines changed

protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java

+15-3
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,15 @@ public List<VariantDatum> getData() {
110110
return data;
111111
}
112112

113-
public void normalizeData(final boolean calculateMeans) {
113+
/**
114+
* Normalize annotations to mean 0 and standard deviation 1.
115+
* Order the variant annotations by the provided list {@code theOrder} or standard deviation.
116+
*
117+
* @param calculateMeans Boolean indicating whether or not to calculate the means
118+
* @param theOrder a list of integers specifying the desired annotation order. If this is null
119+
* annotations will get sorted in decreasing size of their standard deviations.
120+
*/
121+
public void normalizeData(final boolean calculateMeans, List<Integer> theOrder) {
114122
boolean foundZeroVarianceAnnotation = false;
115123
for( int iii = 0; iii < meanVector.length; iii++ ) {
116124
final double theMean, theSTD;
@@ -150,15 +158,19 @@ public void normalizeData(final boolean calculateMeans) {
150158

151159
// re-order the data by increasing standard deviation so that the results don't depend on the order things were specified on the command line
152160
// standard deviation over the training points is used as a simple proxy for information content, perhaps there is a better thing to use here
153-
final List<Integer> theOrder = calculateSortOrder(meanVector);
161+
// or use the serialized report's annotation order via the argument theOrder
162+
if (theOrder == null){
163+
theOrder = calculateSortOrder(meanVector);
164+
}
154165
annotationKeys = reorderList(annotationKeys, theOrder);
155166
varianceVector = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(varianceVector), theOrder));
156167
meanVector = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(meanVector), theOrder));
157168
for( final VariantDatum datum : data ) {
158169
datum.annotations = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(datum.annotations), theOrder));
159170
datum.isNull = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(datum.isNull), theOrder));
160171
}
161-
logger.info("Annotations are now ordered by their information content: " + annotationKeys.toString());
172+
logger.info("Annotation order is: " + annotationKeys.toString());
173+
162174
}
163175

164176
public double[] getMeanVector() {

protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java

+34-7
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151

5252
package org.broadinstitute.gatk.tools.walkers.variantrecalibration;
5353

54+
import com.google.common.annotations.VisibleForTesting;
5455
import htsjdk.variant.variantcontext.Allele;
5556
import org.broadinstitute.gatk.utils.commandline.*;
5657
import org.broadinstitute.gatk.engine.CommandLineGATK;
@@ -312,6 +313,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
312313
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
313314
protected Boolean TRUST_ALL_POLYMORPHIC = false;
314315

316+
@VisibleForTesting
317+
protected List<Integer> annotationOrder = null;
318+
315319
/////////////////////////////
316320
// Private Member Variables
317321
/////////////////////////////
@@ -372,18 +376,15 @@ public void initialize() {
372376
final GATKReportTable pPMixTable = reportIn.getTable("GoodGaussianPMix");
373377
final GATKReportTable anMeansTable = reportIn.getTable("AnnotationMeans");
374378
final GATKReportTable anStDevsTable = reportIn.getTable("AnnotationStdevs");
375-
final int numAnnotations = dataManager.annotationKeys.size();
376379

377-
if( numAnnotations != pmmTable.getNumColumns()-1 || numAnnotations != nmmTable.getNumColumns()-1 ) { // -1 because the first column is the gaussian number.
378-
throw new UserException.CommandLineException( "Annotations specified on the command line do not match annotations in the model report." );
379-
}
380+
orderAndValidateAnnotations(anMeansTable, dataManager.annotationKeys);
380381

381382
final Map<String, Double> anMeans = getMapFromVectorTable(anMeansTable);
382383
final Map<String, Double> anStdDevs = getMapFromVectorTable(anStDevsTable);
383384
dataManager.setNormalization(anMeans, anStdDevs);
384385

385-
goodModel = GMMFromTables(pmmTable, pmcTable, pPMixTable, numAnnotations);
386-
badModel = GMMFromTables(nmmTable, nmcTable, nPMixTable, numAnnotations);
386+
goodModel = GMMFromTables(pmmTable, pmcTable, pPMixTable, annotationOrder.size());
387+
badModel = GMMFromTables(nmmTable, nmcTable, nPMixTable, annotationOrder.size());
387388
}
388389

389390
final Set<VCFHeaderLine> hInfo = new HashSet<>();
@@ -401,6 +402,32 @@ public void initialize() {
401402

402403
}
403404

405+
/**
406+
* Order and validate annotations according to the annotations in the serialized model
407+
* Annotations on the command line must be the same as those in the model report or this will throw an exception.
408+
* Sets the {@code annotationOrder} list to map from command line order to the model report's order.
409+
* n^2 because we typically use 7 or less annotations.
410+
* @param annotationTable GATKReportTable of annotations read from the serialized model file
411+
*/
412+
protected void orderAndValidateAnnotations(final GATKReportTable annotationTable, final List<String> annotationKeys){
413+
annotationOrder = new ArrayList<Integer>(annotationKeys.size());
414+
415+
for (int i = 0; i < annotationTable.getNumRows(); i++){
416+
String serialAnno = (String)annotationTable.get(i, "Annotation");
417+
for (int j = 0; j < annotationKeys.size(); j++) {
418+
if (serialAnno.equals( annotationKeys.get(j) )){
419+
annotationOrder.add(j);
420+
}
421+
}
422+
}
423+
424+
if(annotationOrder.size() != annotationTable.getNumRows() || annotationOrder.size() != annotationKeys.size()) {
425+
final String errorMsg = "Annotations specified on the command line:"+annotationKeys.toString() +" do not match annotations in the model report:"+inputModel;
426+
throw new UserException.CommandLineException(errorMsg);
427+
}
428+
429+
}
430+
404431

405432
//---------------------------------------------------------------------------------------------------------------
406433
//
@@ -518,7 +545,7 @@ public void onTraversalDone( final ExpandingArrayList<VariantDatum> reduceSum )
518545
for (int i = 1; i <= max_attempts; i++) {
519546
try {
520547
dataManager.setData(reduceSum);
521-
dataManager.normalizeData(inputModel.isEmpty()); // Each data point is now (x - mean) / standard deviation
548+
dataManager.normalizeData(inputModel.isEmpty(), annotationOrder); // Each data point is now (x - mean) / standard deviation
522549

523550
final List<VariantDatum> positiveTrainingData = dataManager.getTrainingData();
524551
final List<VariantDatum> negativeTrainingData;

protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/bqsr/BQSRIntegrationTest.java

+6-6
Original file line numberDiff line numberDiff line change
@@ -198,13 +198,13 @@ public String toString() {
198198
public Object[][] createPRTestData() {
199199
List<Object[]> tests = new ArrayList<Object[]>();
200200

201-
tests.add(new Object[]{1, new PRTest(" -qq -1", "8a38828e3b14ce067614d4248e3ea95a")});
202-
tests.add(new Object[]{1, new PRTest(" -qq 6", "e4f23250b2c87f0d68d042cc3d2ec1d3")});
203-
tests.add(new Object[]{1, new PRTest(" -DIQ", "2dfa45f004d3a371fd290ed67fbdf573")});
204-
tests.add(new Object[]{1, new PRTest(" --useOriginalQualities -SQQ 10 -SQQ 20 -SQQ 30", "4882354d9e603f9bbe7c9591bba0a573")});
205-
tests.add(new Object[]{1, new PRTest(" --useOriginalQualities -SQQ 10 -SQQ 20 -SQQ 30 -RDQ", "6ffdfc4593e83f7c234b6249412433af")});
201+
tests.add(new Object[]{1, new PRTest(" -qq -1", "e9969c6d7fd35d96b82c691b4ced5443")});
202+
tests.add(new Object[]{1, new PRTest(" -qq 6", "b4b271acac003b8504b530c5526d43ad")});
203+
tests.add(new Object[]{1, new PRTest(" -DIQ", "ec795d16746c3bdf5e54c57337e6eed6")});
204+
tests.add(new Object[]{1, new PRTest(" --useOriginalQualities -SQQ 10 -SQQ 20 -SQQ 30", "d50df5a7bcc8f195479f64285c124d18")});
205+
tests.add(new Object[]{1, new PRTest(" --useOriginalQualities -SQQ 10 -SQQ 20 -SQQ 30 -RDQ", "58b9df85e49eb3ee228f0d581c168791")});
206206
for ( final int nct : Arrays.asList(1, 2, 4) ) {
207-
tests.add(new Object[]{nct, new PRTest("", "6451093cadfc14d7359617b2a7ea6db8")});
207+
tests.add(new Object[]{nct, new PRTest("", "f4704ba2894ec0aec8a55ce6b361f768")});
208208
}
209209

210210
return tests.toArray(new Object[][]{});

protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ public void testTruePositivesDream3() {
138138
public void testTruePositivesDream3TrackedDropped() {
139139
M2TestWithDroppedReads(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, "21:10935369", "",
140140
"48a446d47bb10434cb7f0ee726d15721",
141-
"6ecaeb74893249dfa5723b2266c957e2");
141+
"265a72d3f79bb0fe054a847ab0c01c67");
142142
}
143143

144144
/**

protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ private void validateForwardedProgramRecords(final List<File> bamInFiles, final
133133

134134
@Test
135135
public void testHaplotypeBAMOutFlags() throws IOException {
136-
final String md5BAMOut = "69aae17f8cd384666ec7c3c1f3d3eb57";
136+
final String md5BAMOut = "0934466fa2b8648af9e6267286a57151";
137137
HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "df622103b0a6917f2299b1acfd0ed0ac", md5BAMOut);
138138
validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(NA12878_BAM))), md5BAMOut);
139139
}
@@ -330,7 +330,7 @@ public void HCTestDanglingTailMergingForDeletions() throws IOException {
330330
@Test
331331
public void testLeftAlignmentBamOutBugFix() throws IOException {
332332
final String outputVCF = createTempFile("temp", ".vcf").getAbsolutePath();
333-
final String md5BAMOut = "27e729df3b166c81792a62a5b57ef7b3";
333+
final String md5BAMOut = "60db3996cd37a863d8b93218fcbf1c8b";
334334
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
335335
+ " --no_cmdline_in_header -bamout %s -o " + outputVCF + " -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
336336
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(md5BAMOut));
@@ -541,7 +541,7 @@ public void testSetZeroGQsToNoCall() throws IOException{
541541
public void testHaplotypeCallerReadPosRankSum() throws IOException {
542542
final File testBAM = new File(privateTestDir + "testReadPos.snippet.bam");
543543
final String md5Variants = "e664b7a9da71cf81e14648ac7e698eea";
544-
final String md5BAMOut = "3ef35732e49980093ad445e3ac5731fa";
544+
final String md5BAMOut = "74f2cab2d2d0d999b54456b73e597d6c";
545545
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L 1:3753063 -ip 100 ", REF, testBAM) +
546546
" --no_cmdline_in_header -o %s -bamout %s";
547547
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList(md5Variants, md5BAMOut));

protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/rnaseq/SplitNCigarReadsIntegrationTest.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -87,39 +87,39 @@ public void testSplitWithDeletions() {
8787
public void testSplitsWithOverhangs() {
8888
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
8989
"-T SplitNCigarReads -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1,
90-
Arrays.asList("72fbeb2043f005e1698e21563f0625a9"));
90+
Arrays.asList("b721d997bd09873a244fee97c1e58af1"));
9191
executeTest("test splits with overhangs", spec);
9292
}
9393

9494
@Test
9595
public void testSplitsFixNDN() {
9696
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
9797
"-T SplitNCigarReads -R " + b37KGReference + " -I " + privateTestDir + "splitNCigarReadsSnippet.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS -fixNDN", 1,
98-
Arrays.asList("add7012d5e814d6cfd32f6cac1eb8ce3"));
98+
Arrays.asList("9aa80944c2c7ee8a1f259907e3d8b51c"));
9999
executeTest("test fix NDN", spec);
100100
}
101101

102102
@Test
103103
public void testSplitsWithOverhangsNotClipping() {
104104
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
105105
"-T SplitNCigarReads --doNotFixOverhangs -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1,
106-
Arrays.asList("6a55ac0a945e010bf03e1dd8f7749417"));
106+
Arrays.asList("4134e1ff0eaaa9986e19526a7c3a8319"));
107107
executeTest("test splits with overhangs not clipping", spec);
108108
}
109109

110110
@Test
111111
public void testSplitsWithOverhangs0Mismatches() {
112112
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
113113
"-T SplitNCigarReads --maxMismatchesInOverhang 0 -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1,
114-
Arrays.asList("8a577047955ff9edca3caf1f6e545d3e"));
114+
Arrays.asList("94ebc9fbd64684e50c5f54ad5ff042b6"));
115115
executeTest("test splits with overhangs 0 mismatches", spec);
116116
}
117117

118118
@Test
119119
public void testSplitsWithOverhangs5BasesInOverhang() {
120120
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
121121
"-T SplitNCigarReads --maxBasesInOverhang 5 -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1,
122-
Arrays.asList("bdd822868b88063cf50c6336ed1a5e64"));
122+
Arrays.asList("b737b2dfb22a608ee3def6137fed9414"));
123123
executeTest("test splits with overhangs 5 bases in overhang", spec);
124124
}
125125
}

protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/simulatereads/SimulateReadsForVariantsIntegrationTest.java

+7-7
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public void testDefaults() {
6464
WalkerTestSpec spec = new WalkerTestSpec(
6565
"-T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s",
6666
1,
67-
Arrays.asList("d929369b9095420a8aaff2595ec2f80a"));
67+
Arrays.asList("b5680f835aff1da6e1e60123d39f2371"));
6868
executeTest("testVariants", spec);
6969
}
7070

@@ -74,7 +74,7 @@ public void testReadLength() {
7474
WalkerTestSpec spec = new WalkerTestSpec(
7575
"-RL 70 -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s",
7676
1,
77-
Arrays.asList("bbc65e4f8bd3a1656616476a1e190ecf"));
77+
Arrays.asList("177d32e0b13bd40aaeef71e8e5ffefe7"));
7878
executeTest("testReadLength", spec);
7979
}
8080

@@ -84,7 +84,7 @@ public void testErrorRate() {
8484
WalkerTestSpec spec = new WalkerTestSpec(
8585
"-ER 40 -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s",
8686
1,
87-
Arrays.asList("cb0e4b11bbd1b5a154ad6c99541cd017"));
87+
Arrays.asList("7353b4d148221f4f4975f07712413e18"));
8888
executeTest("testErrorRate", spec);
8989
}
9090

@@ -94,7 +94,7 @@ public void testPlatformTag() {
9494
WalkerTestSpec spec = new WalkerTestSpec(
9595
"-RGPL SOLID -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s",
9696
1,
97-
Arrays.asList("2b5c6cda9a434c9e25e5da5599eeae51"));
97+
Arrays.asList("6f7a2ff3aa3c2e24a00f04dc770a4140"));
9898
executeTest("testPlatformTag", spec);
9999
}
100100

@@ -105,7 +105,7 @@ public void testAlleleFraction() {
105105
WalkerTestSpec spec = new WalkerTestSpec(
106106
"-T SimulateReadsForVariants --no_pg_tag --useAFAsAlleleFraction -DP 100 -R " + b37KGReference + " -V " + publicTestDir + "forAlleleFractionSimulation.vcf -o %s",
107107
1,
108-
Arrays.asList("1ae2c354718b470e30b44d5e59cb9944"));
108+
Arrays.asList("f19d4b62269512fff0dcce21874c0d43"));
109109
executeTest("testAlleleFraction", spec);
110110

111111
}
@@ -116,7 +116,7 @@ public void testLongInsertFailure() {
116116
WalkerTestSpec spec = new WalkerTestSpec(
117117
"-T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forLongInsert.vcf -o %s",
118118
1,
119-
Arrays.asList("5c069bff8efb988660c7f6d28a3117fc"));
119+
Arrays.asList("052f1b644848cfd058cd2aeb0b6f2fd2"));
120120
executeTest("testLongInsertFailure", spec);
121121
}
122122

@@ -126,7 +126,7 @@ public void testLongInsertSuccess() {
126126
WalkerTestSpec spec = new WalkerTestSpec(
127127
"-RL 269 -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forLongInsert.vcf -o %s",
128128
1,
129-
Arrays.asList("0657f6a692d22b5e2b7f5832710042e4"));
129+
Arrays.asList("33f7da2e0b711a9ad28cb49c60e648be"));
130130
executeTest("testLongInsertSuccess", spec);
131131
}
132132

0 commit comments

Comments
 (0)