forked from CyberPoint/libem
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGaussMix.cpp
1513 lines (1296 loc) · 56 KB
/
GaussMix.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*********************************************************************************
# Copyright (c) 2012, CyberPoint International, LLC
# All rights reserved.
#
# This software is offered under the NewBSD license:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the CyberPoint International, LLC nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL CYBERPOINT INTERNATIONAL, LLC BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
/*! \file GaussMix.cpp
\version 1.0
\brief core libGaussMix++ em algorithm method implementations.
\author Elizabeth Garbee & CyberPoint Labs, CyberPoint International LLC
\date Summer 2012
*/
/*! \mainpage libGaussMix: An Expectation Maximization Algorithm for Training Gaussian Mixture Models
*
*\authors Elizabeth Garbee & CyberPoint Labs
*\date February 7, 2013
*
* Copyright 2013 CyberPoint International LLC.
*
* The contents of libGaussMix are offered under the NewBSD license:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* (1) Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* (2) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* (3) Neither the name of the CyberPoint International, LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL CYBERPOINT INTERNATIONAL, LLC BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*\section intro_sec Introduction
*
* A Gaussian mixture model is method of approximating, as a linear combination of multivariate Gaussian density functions, an unknown density function from which a given
* data set is presumed to be drawn. The means and covariance matrices of these Gaussian densities are sometimes referred to as the "hidden" parameters of the data set.
*
* Expectation Maximization (EM) is perhaps the most popular technique for discovering the parameters of a mixture with a given number of components. Here's the usual scenario:
* one is given a set of N data points in an M-dimensional space. One seeks to fit to the data a set of k multivariate Gaussian distributions (or "clusters") that best represent the observed distribution of the data
* points, where k is specified, but the means and covariances of the Gaussians are unknown. The desired output includes the means and covariances of the Gaussians, along with the cluster weights
* (coefficients of the linear combination of Gaussians). One may think of the data as being drawn by first randomly choosing a particular Gaussian according to the cluster weights, and then
* choosing a data point according to the selected distribution. The overall probability density assigned to a point is then given by the sum over k of P(n|k)P(k) where k denotes the cluster, P(k) its probability, and P(n|k) denotes
* the probability density of the point given the cluster. The likelihood density L of the data is the product of these sums, taken over all data points (which are assumed to be independent and identically distributed).
* The EM algorithm produces an estimate of the P(k) and the P(n|k),by attempting to maximize L. Specifically, it yields the estimated means mu(k), covariance matrices sigma(k), and weights P(k) for each Gaussian.
*
* An outline of the EM calculations is best described by working backwards from L. Since the data points are (assumed) to be independent, L is the product of the probability densities of
* each observed data point, which splits into a contribution P(n|k) from each Gaussian (these contributions are sometimes called the data point "mixture weights").
* In the language of EM, the L and P(n|k) calculations, given known mu(k), sigma(k), and P(k), comprise the algorithm's "expectation step", or E step.
* Now if the P(n|k)'s are known, we can derive from them maximum likelihood estimates of the mu(k), sigma(k), and P(k), in a process called the "maximization step", or M step, from which we then obtain a new set of P(n|k), and a new (better) L.
*
* The power of the EM algorithm comes from a theorem (see the first reference) stating that, starting from any reasonable guess of parameter values, an iteration of an E step followed by an M
* step will always increase the likelihood value L, and that repeated iterations between these two steps will converge to (at least) a local maximum. Often, the convergence is indeed to the
* global maximum. The EM algorithm, in brief, goes like this:
* - Guess starting values for the mu(k), sigma(k) and P(k) for each Gaussian,
* - Repeat: an E step to get a new L and new P(n|k)s, followed by an M step to get new mu(k)s, sigma(k)s, and P(k)s,
* - Stop when the value of L is no longer meaningfully changing.
*
* The libGaussMix implementation of the EM algorithm uses the "KMeans" clustering algorithm to provide the initial guesses for the means of the K Gaussians, in order to increase the efficiency and efficacy of
* EM.
*
* The libGaussMix code base also includes support for adapation of a GMM to a specific subpopulation of the population on which it was trained. In a sense, this
* "biases" the model towards the subpopulation. If the population splits into distinct subpopulations, one may then classify a sample as belonging to a particular
* subpopulation, by assiging it to the subpopulation under whose adapted GMM it has the highest likelihood density. One often normalizes
* these scores by the unadapted GMM density. See reference 2 for details. This functionality is provided by the adapt() method of the GaussMix API.
*
* \section usage_sec Usage
*
* The libGaussMix library builds into both .a and .so files. The API is provided in GaussMix.h; API implementations are contained in GaussMix.cpp. The implementations
* use helper routines provided in Matrix.h/ Matrix.cpp, which in turn wrap LAPACK linear algebra functions, and in KMeans.h/ KMeans.cpp, to generate initial model parameter guesses.
* To build the lib/API into an executable using the sample driver function provided in sample_main.cpp, follow these steps:
*
* - 1. Install BLAS, LAPACK, and LAPACKE on your machine if they're not already there (c.f. http://www.netlib.org/lapack/, which bundles a reference vesion of BLAS).
* - 2. Update the environment variables in libgaussmix.inc to point to your environment's BLAS and LAPACK header and library locations.
* - 3. run make on the libGaussMix makefile.
* - 4. run the resulting executable via: "gaussmix <data_file> <num_dimensions> <num_data_points> <num_clusters>". \n
* Try using one of the sample *.csv or *.svm data files as the first argument: e.g. "gaussmix multid_data_1.csv 3 20 2".
*
* The libGaussMix source code has been built and tested on SL6.2 using gcc 4.4.5.
*
*
* \section example_sec Caller Example
*
* For an example "main" that invokes the libGaussMix API routines, see sample_main.cpp.
*
* \section Notes
*
* 1. Per the first reference, the values of the Gaussian density functions may be so small as to underflow to zero, and therefore, it is desirable to perform
* the EM algorithm in the log domain. This implementation takes that approach.
*
* 2. libGaussMix supports csv and svm-style data formats (c.f. www.csie.ntu.edu.tw/~cjlin/libsvm/). Sample files
* (multid_data_1.csv and multid_data_2.svm, respectively) are provided for convenience. The former consists of 20
* three-dimensional data points falling into two clusters centered on (1,1,1) and (100,100,100). The second is similarly
* distributed, with a "+1" cluster centered on (10,10,10) and "-1" cluster centered on (50,50,50).
*
* 3. The Matrix.h/ cpp code wraps LAPACK routines. To work with these routines efficiently, the Matrix class maintains
* internal buffers that may be modified by operations that, from a semantic point of view, are read-only. Hence many
* libGauss routines take non-const Matrix arguments, where const Matrix arguments would be typically be used.
*
* 4. For compilers that support it, the code is configured to use open mp (www.openmp.org) to parallelize various for loops,
* where the loop is over the cluster of the Gaussian Mixture Model.
* .
* \section References
*
* 1. Press, et. al., Numerical Recipes 3rd Edition: The Art of Scientific Computing,
* Cambridge University Press New York, NY, USA ©2007,
* ISBN:0521880688 9780521880688
* (c.f. chapter 16).
*
* 2. Douglas A. Reynolds, et. al., "Speaker Verication Using Adapated Gaussian Mixture Models,"
* Digital Signal Processing 10, 19-24 (2000).
*
*/
//header brick
#include <math.h>
#include <iostream>
#include <ostream>
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <string>
#include <cmath>
#include <vector>
#include <istream>
#include <list>
#include <numeric>
#include <functional>
#include <algorithm>
#include <time.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <set>
#include <exception>
#ifdef _OPENMP
#include <omp.h>
#endif /* _OPENMP */
static int myNode=0, totalNodes=1;
#ifdef UseMPI
#include <mpi.h>
#endif /* UseMPI */
//#undef OPENCL
#ifdef OPENCL
#define __NO_STD_VECTOR
#define __NO_STD_STRING
#include <CL/cl.h>
const char* estep_srcfile = "oclEstep.cl";
static char* estep_src = 0;
static int devType = CL_DEVICE_TYPE_GPU;
static cl_int clerr = CL_SUCCESS;
static cl_platform_id cpPlatform = 0;
static cl_device_id device_id = 0;
static cl_context context = 0;
static cl_command_queue commands = 0;
static cl_program program = 0;
static bool isProgBuilt = false;
#endif /* OPENCL */
#include <lapacke.h>
// for kmeans utils
#include "KMeans.h"
// for adaptation utils
#include "Adapt.h"
//API header file
#include "GaussMix.h"
// for error handling in C libraries
#include <errno.h>
#define MAX_CLUSTERS 50
//MAX_ITERATIONS is the while loop limiter for Kmeans
#define MAX_ITERATIONS 100
#define BIG_double (INFINITY)
#define MAX_LINE_SIZE 1000
/*
* SET THIS TO 1 FOR DEBUGGING STATEMENT SUPPORT (via std out)
*/
#define DEBUG 0
using namespace std;
/********************************************************************************************************
* PRIVATE FUNCTION PROTOTYPES
********************************************************************************************************/
// EM helper functions
double estep(int n, int m, int k, const double *X, Matrix &p_nk_matrix, const std::vector<Matrix *> &sigma_matrix, \
const Matrix &mu_matrix, const std::vector<double> &Pk_vec);
bool mstep(int n, int m, int k, const double *X, const Matrix &p_nk_matrix, Matrix *sigma_matrix, \
Matrix &mu_matrix, std::vector<double> &Pk_vec);
double * matrixToRaw(const Matrix & X);
/******************************************************************************************
* IMPLEMENTATION OF PRIVATE FUNCTIONS
*******************************************************************************************/
/*! \brief estep is the function that calculates the L and Pnk for a given data point(n) and gaussian(k).
*
@param n number of data points
@param m dimensionality of data
@param k number of clusters
@param X data
@param p_nk_matrix matrix generated by the caller of EM that holds the pnk's calculated
@param sigma_matrix vector of matrix pointers generated by the caller of EM that holds the sigmas calculated
@param mu_matrix matrix of mean vectors generated by caller
@param Pk_vec vector generated by the caller of EM that holds the Pk's calculated
*/
double estep(int n, int m, int k, const double *X, Matrix &p_nk_matrix, const std::vector<Matrix *> &sigma_matrix,
const Matrix &mu_matrix, const std::vector<double> & Pk_vec)
{
//initialize likelihood
double likelihood = 0.0;
#ifndef OPENCL /* non-OpenCL variant */
//initialize variables
std::vector<Matrix*> sigma_inverses;
std::vector<double> determinants;
for (int gauss = 0; gauss < k; gauss++)
{
Matrix *sigma_inv = sigma_matrix[gauss]->inv();
sigma_inverses.push_back(sigma_inv);
double determinant = sigma_matrix[gauss]->det();
determinants.push_back(determinant);
}
//for each data point in n
for (int data_point = 0; data_point < n; data_point++)
{
//initialize the x matrix, which holds the data passed in from double *X
Matrix x(1,m);
//initialize the P_xn to zero to start
double P_xn = 0.0;
//for each dimension
for (int dim = 0; dim < m; dim++)
{ //put the data stored in the double* in the x matrix you just created
x.update( X[m*data_point + dim],0,dim );
}
//z_max is the maximum cluster weighted density for the data point under any gaussian
double z_max = 0.0;
bool z_max_assigned = false;
#ifdef _OPENMP
#pragma omp parallel for
#endif /* _OPENMP */
for (int gaussian = 0; gaussian < k; ++gaussian)
{ //initialize the row representation of the mu matrix
Matrix mu_matrix_row(1,m);
//for each dimension
for (int dim = 0; dim < m; dim++)
{ //fill in that matrix
double temp = mu_matrix.getValue(gaussian,dim);
mu_matrix_row.update(temp,0,dim);
}
//(x - mu)
Matrix* difference_row = x.subtract(mu_matrix_row);
if (DEBUG)
difference_row->print();
//transpose(x - mu)
Matrix difference_column(m,1);
for (int i = 0; i < m; i++)
{ // fill it in
difference_column.update( difference_row->getValue(0,i), i, 0 );
}
//transpose(x - mu) * inv(sigma)
Matrix* term1 = sigma_inverses[gaussian]->dot( difference_column );
if (DEBUG)
{
std::cout << "sigma_inv dot difference_column" << std::endl;
term1->print();
}
//transpose(x - mu) * inv(sigma) * (x - mu)
Matrix *term2 = difference_row->dot(*term1);
if (DEBUG)
{
std::cout << "term2" << std::endl;
term2->print();
}
//create a double to represent term2, since it's a scalar
double term2_d = term2->getValue(0,0);
if( DEBUG )
printf("Datapoint: %d Gauss: %d Term2: %f\n", data_point, gaussian, term2_d);
// log norm factor is the normalization constant for the density functions
double log_norm_factor = -0.5*( m*log(2.0*M_PI) + log(determinants[gaussian]) );
//log density is the log of the density function for the kth gaussian evaluated on the nth data point
double log_density = log_norm_factor + (-0.5*term2_d);
//temp1 is the cluster weight for the current gaussian
double temp1 = Pk_vec[gaussian];
//temp2 is the log of the cluster weight for the current gaussian
double temp2 = log(temp1);
//current z is the log of the density function times the cluster weight
double current_z = temp2 + log_density;
//assign current_z
#ifdef _OPENMP
# pragma omp critical(z_max)
#endif /* _OPENMP */
if ((z_max_assigned == false) || current_z > z_max)
{
z_max = current_z;
z_max_assigned = true;
}
//calculate p_nk = density * Pk / weight
p_nk_matrix.update(current_z, data_point,gaussian);
if( DEBUG )
{
std::cout << "p_nk_matrix" << std::endl;
p_nk_matrix.print();
}
#ifdef _OPENMP
# pragma omp critical(sigma_inv_tracking)
#endif /* _OPENMP */
delete difference_row;
delete term1;
delete term2;
} // end gaussian
//calculate the P_xn
for (int gaussian = 0; gaussian < k; gaussian++)
P_xn += exp( p_nk_matrix.getValue(data_point, gaussian) - z_max );
//log of total density for data point
double tempa = log(P_xn);
double log_P_xn = tempa + z_max;
//normalize the probabilities per cluster for data point
for (int gaussian = 0; gaussian < k; gaussian++)
p_nk_matrix.update( p_nk_matrix.getValue(data_point,gaussian)-log_P_xn, data_point,gaussian );
//calculate the likelihood of this model
likelihood += log_P_xn;
if (DEBUG)
std::cout << "The likelihood for this iteration is " << likelihood << std::endl;
} // end data_point
#ifdef UseMPI
// Now reduce the likelihood over all data points:
double totalLikelihood;
if (DEBUG)
std::cout << "Reducing likelihood: " << likelihood << " on node "<< myNode << std::endl;
MPI_Allreduce(&likelihood, &totalLikelihood, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
likelihood = totalLikelihood;
#endif /* UseMPI */
for (int i = 0; i < k; i++)
delete sigma_inverses[i];
#else /* do OpenCL stuff */
if( !isProgBuilt )
{
ostringstream sout;
char* prg_opts = 0;
sout << " -D ESTEP_N=" << n;
sout << " -D ESTEP_M=" << m;
sout << " -D ESTEP_K=" << k;
prg_opts = (char*)(sout.str().c_str());
clerr = clBuildProgram(program, 0, 0, prg_opts, 0, 0);
if( clerr != CL_SUCCESS )
{
size_t len;
char buffer[2048];
clGetProgramBuildInfo( program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
std::cerr << clerr << std::endl << buffer << std::endl;
throw std::runtime_error("Failed to build program");
}
else
{
isProgBuilt = true;
delete[] estep_src;
}
}
commands = clCreateCommandQueue(context, device_id, 0, &clerr);
if( clerr != CL_SUCCESS )
throw std::runtime_error("Failed to create command queue");
cl_kernel estep_krnl = clCreateKernel( program, "oclEstep", &clerr );
if( clerr != CL_SUCCESS )
throw std::runtime_error("Failed to create estep kernel");
//@NOTE We must use single precision floats here otherwise rely on ocl ext
// init variables
cl_mem h_likelihood = 0;
cl_mem cl_likelihood = 0;
cl_mem cl_X = 0;
cl_mem cl_p_nk = 0;
cl_mem cl_sigma_invs = 0;
cl_mem cl_dets = 0;
cl_mem cl_mus = 0;
cl_mem cl_Pk = 0;
float* pMapLikelihood = 0;
float* pMapX = 0;
float* pMapP_nk = 0;
float* pMapSigmaInvs = 0;
float* pMapDets = 0;
float* pMapMus = 0;
float* pMapPk = 0;
// create host and device buffers
h_likelihood = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, \
sizeof(float)*(n), 0, &clerr);
cl_likelihood = clCreateBuffer(context, CL_MEM_WRITE_ONLY, \
sizeof(float)*(n), 0, &clerr);
cl_X = clCreateBuffer(context, CL_MEM_READ_ONLY, \
sizeof(float)*(n*m), 0, &clerr);
cl_p_nk = clCreateBuffer(context, CL_MEM_READ_WRITE, \
sizeof(float)*(n*k), 0, &clerr);
cl_sigma_invs = clCreateBuffer(context, CL_MEM_READ_ONLY, \
sizeof(float)*(m*m*k), 0, &clerr);
cl_dets = clCreateBuffer(context, CL_MEM_READ_ONLY, \
sizeof(float)*k, 0, &clerr);
cl_mus = clCreateBuffer(context, CL_MEM_READ_ONLY, \
sizeof(float)*(k*m), 0, &clerr);
cl_Pk = clCreateBuffer(context, CL_MEM_READ_ONLY, \
sizeof(float)*(n*k), 0, &clerr);
// X buffer- init and copy to dev
pMapX = (float*)clEnqueueMapBuffer(commands, cl_X, CL_TRUE, CL_MAP_WRITE, \
0, sizeof(float)*(n*m), 0, 0, 0, &clerr);
if(clerr != CL_SUCCESS)
throw std::runtime_error("failed to map data points");
for( int i=0; i<(n*m); ++i )
pMapX[i] = (float)X[i];
clerr = clEnqueueUnmapMemObject(commands, cl_X, pMapX, 0, 0, 0);
// p_nk buffer- init and copy to dev
pMapP_nk = (float*)clEnqueueMapBuffer(commands, cl_p_nk, CL_TRUE, CL_MAP_WRITE, \
0, sizeof(float)*(n*k), 0, 0, 0, &clerr);
if(clerr != CL_SUCCESS)
throw std::runtime_error("failed to map p_nk");
for(int i=0; i<n; ++i)
for(int j=0; j<k; ++j)
pMapP_nk[i*k+j] = (float)p_nk_matrix.getValue(i,j);
clerr = clEnqueueUnmapMemObject(commands, cl_p_nk, pMapP_nk, 0, 0, 0);
// for each gaussian's weight, mu, det(cov), and inv(cov)- init and copy to dev
pMapSigmaInvs = (float*)clEnqueueMapBuffer(commands, cl_sigma_invs, CL_TRUE, CL_MAP_WRITE, \
0, sizeof(float)*(m*m*k), 0, 0, 0, &clerr);
if(clerr != CL_SUCCESS)
throw std::runtime_error("failed to map sigma inverse");
pMapDets = (float*)clEnqueueMapBuffer(commands, cl_dets, CL_TRUE, CL_MAP_WRITE, \
0, sizeof(float)*k, 0, 0, 0, &clerr);
if(clerr != CL_SUCCESS)
throw std::runtime_error("failed to map determinant");
pMapPk = (float*)clEnqueueMapBuffer(commands, cl_Pk, CL_TRUE, CL_MAP_WRITE, \
0, sizeof(float)*k, 0, 0, 0, &clerr);
if(clerr != CL_SUCCESS)
throw std::runtime_error("failed to map Pk");
pMapMus = (float*)clEnqueueMapBuffer(commands, cl_mus, CL_TRUE, CL_MAP_WRITE, \
0, sizeof(float)*(k*1*m), 0, 0, 0, &clerr);
if(clerr != CL_SUCCESS)
throw std::runtime_error("failed to map mu");
for( int gMatIdx=0; gMatIdx<k; ++gMatIdx )
{ // for each distribution cluster
pMapPk[gMatIdx] = (float)Pk_vec[gMatIdx]; // copy the distribution weight
Matrix* pMat = sigma_matrix[ gMatIdx ]; // get cov
pMapDets[ gMatIdx ] = (float)pMat->det(); // ... save det(cov)
Matrix* pMatInv = pMat->inv(); // ... save inv(cov)
//flatten mat inv to 1-D array
for( int rIdx=0; rIdx<m; ++rIdx )
{
pMapMus[gMatIdx*m + rIdx] = (float)mu_matrix.getValue(gMatIdx,rIdx);
for( int cIdx=0; cIdx<m; ++cIdx )
{
int idx = (gMatIdx * m*m) + (cIdx*m + rIdx);
pMapSigmaInvs[idx] = (float)pMatInv->getValue(rIdx,cIdx);
}
}
// the inv() return a heap object, so free it
delete pMatInv;
}
clerr = clEnqueueUnmapMemObject(commands, cl_sigma_invs, pMapSigmaInvs, 0, 0, 0);
clerr |= clEnqueueUnmapMemObject(commands, cl_dets, pMapDets, 0, 0, 0);
clerr |= clEnqueueUnmapMemObject(commands, cl_Pk, pMapPk, 0, 0, 0);
clerr |= clEnqueueUnmapMemObject(commands, cl_mus, pMapMus, 0, 0, 0);
if( clerr != CL_SUCCESS )
throw std::runtime_error("failed to unmap gaussian objects");
// TODO make use of a summation reduction and local scoped vars
clerr = clSetKernelArg(estep_krnl, 0, sizeof(cl_mem), &cl_likelihood);
clerr |= clSetKernelArg(estep_krnl, 1, sizeof(int), &n);
clerr |= clSetKernelArg(estep_krnl, 2, sizeof(int), &m);
clerr |= clSetKernelArg(estep_krnl, 3, sizeof(int), &k);
clerr |= clSetKernelArg(estep_krnl, 4, sizeof(cl_mem), &cl_X);
clerr |= clSetKernelArg(estep_krnl, 5, sizeof(cl_mem), &cl_p_nk);
clerr |= clSetKernelArg(estep_krnl, 6, sizeof(cl_mem), &cl_sigma_invs);
clerr |= clSetKernelArg(estep_krnl, 7, sizeof(cl_mem), &cl_dets);
clerr |= clSetKernelArg(estep_krnl, 8, sizeof(cl_mem), &cl_mus);
clerr |= clSetKernelArg(estep_krnl, 9, sizeof(cl_mem), &cl_Pk);
if(clerr != CL_SUCCESS)
throw std::runtime_error("failed to set kernel arguments");
// TODO : find better parallelisms to saturate work threads
size_t globalSize[1];
size_t localSize[1];
globalSize[0] = (size_t)(n*k); // each work group is on a data point, and we want k groups
localSize[0] = (size_t)k; // each work instance is on a cluster
clerr = clEnqueueNDRangeKernel(commands, estep_krnl, 1, 0, globalSize, localSize, 0, 0, 0);
if( clerr != CL_SUCCESS )
throw std::runtime_error("failed to execute kernel");
// make likelihood values available to host
pMapLikelihood = (float*)clEnqueueMapBuffer(commands, cl_likelihood, CL_TRUE, CL_MAP_READ, \
0, sizeof(float)*(n), 0, 0, 0, &clerr);
if(clerr != CL_SUCCESS)
throw std::runtime_error("failed to map likelihood");
if( DEBUG )
{
std::cout << "================ LIKELIHOODS " << std::endl;
for(int i=0; i<n; ++i)
std::cout << i << " " << pMapLikelihood[i] << std::endl;
}
likelihood = 0.0;
for( int i=0; i<n; ++i )
likelihood += pMapLikelihood[i];
clerr = clEnqueueUnmapMemObject(commands, cl_likelihood, pMapLikelihood, 0, 0, 0);
// make p_nk values available to host to send to mstep
pMapP_nk = (float*)clEnqueueMapBuffer(commands, cl_p_nk, CL_TRUE, CL_MAP_READ, \
0, sizeof(float)*(n*k), 0, 0, 0, &clerr);
if( clerr != CL_SUCCESS )
throw std::runtime_error("failed to map p_nk");
for(int i=0; i<n; ++i)
for(int j=0; j<k; ++j)
p_nk_matrix.update( pMapP_nk[i*k+j], i, j);
clerr = clEnqueueUnmapMemObject(commands, cl_p_nk, pMapP_nk, 0, 0, 0);
// wait for command queue to finish running all kernels
clerr = clFinish(commands);
if( clerr != CL_SUCCESS )
throw std::runtime_error("failed to finish out command queue");
// release ocl memory buffers
clerr = clReleaseMemObject(h_likelihood);
clerr = clReleaseMemObject(cl_likelihood);
clerr = clReleaseMemObject(cl_X);
clerr = clReleaseMemObject(cl_p_nk);
clerr = clReleaseMemObject(cl_sigma_invs);
clerr = clReleaseMemObject(cl_dets);
clerr = clReleaseMemObject(cl_mus);
clerr = clReleaseMemObject(cl_Pk);
// cleanup OCL kernel
clerr = clReleaseKernel( estep_krnl );
#endif /* OPENCL */
if (DEBUG)
{
std::cout << "Likelihood after E-Step: " << likelihood << std::endl;
std::cout<<"P_nk for itr is";
p_nk_matrix.print();
}
//return the likelihood of this model
return likelihood;
}
/*! \brief mstep is the function that approximates the mu, sigma and P(k) paramters for a given Gaussian fit.
*
@param n number of data points
@param m dimensionality of data
@param k number of clusters
@param X data
@param p_nk_matrix matrix generated by the caller of EM that holds the pnk's calculated
@param sigma_matrix vector of matrix pointers generated by the caller of EM that holds the sigmas calculated
@param mu_matrix matrix of mean vectors generated by caller
@param Pk_vec vector generated by the caller of EM that holds the Pk's calculated
*/
bool mstep(int n, int m, int k, const double *X, Matrix &p_nk_matrix, std::vector<Matrix *> &sigma_matrix,
Matrix &mu_matrix, std::vector<double> & Pk_vec)
{
// Update Pk_vec and mu_matrix
int gaussian = 0;
#ifdef _OPENMP
# pragma omp parallel for
#endif /* _OPENMP */
for (gaussian = 0; gaussian < k; gaussian++)
{
//initialize the mean and covariance matrices that hold the mstep approximations
Matrix mu_hat(1,m);
//initialize the normalization factor - this will be the sum of the densities for each data point for the current gaussian
double norm_factor = 0;
//initialize the array of doubles of length m that represents the data with some modification
double x[m];
//do the mu calculation point by point
for (int data_point = 0; data_point < n; data_point++)
{
// No need to calculate this multiple times
double p_nk_local = p_nk_matrix.getValue(data_point,gaussian);
double exp_p_nk = exp(p_nk_local);
for (int dim = 0; dim < m; dim++)
{
x[dim] = X[m*data_point + dim]* exp_p_nk;
}
//sum up all the individual mu calculations
mu_hat.add(x, m, 0);
//calculate the normalization factor
norm_factor += exp_p_nk;
}
Pk_vec[gaussian] = norm_factor;
//fill in the mu hat matrix with your new mu calculations, adjusted by the normalization factor
// Update mu_hat and mu_matrix. Scale mu_matrix if and only if not using MPI.
for (int dim = 0; dim < m; dim++)
{
mu_matrix.update(mu_hat.getValue(0,dim),gaussian,dim);
}
} // parallel for loop over gaussians
// Reduce and scale PK and mu
// Temporary space for reduced Pk_vec - also used in calculation of sigma
double unscaled_Pk_vec[k];
double global_scale=0.0;
{
// Pk
#ifdef UseMPI
MPI_Allreduce(&(Pk_vec[0]),unscaled_Pk_vec,k,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
for (int i=0; i<k; i++)
global_scale += unscaled_Pk_vec[i];
for (int i=0; i<k; i++)
Pk_vec[i] = unscaled_Pk_vec[i]/global_scale;
#else
for (int i=0; i<k; i++)
global_scale += Pk_vec[i];
for (int i=0; i<k; i++)
{
// Fill in unscaled vector, because we didn't reduce into it.
unscaled_Pk_vec[i] = Pk_vec[i];
Pk_vec[i] = Pk_vec[i]/global_scale;
}
#endif /* UseMPI */
// Mu
#ifdef UseMPI
// MPI workspace
double global_work[k*m];
double local_work[k*m];
for (int gaussian=0; gaussian<k; gaussian++)
{
// extract, already denormalized
for (int dim = 0; dim < m; dim++)
{
local_work[gaussian*m+dim] = mu_matrix.getValue(gaussian,dim);
}
}
// Reduce
MPI_Allreduce(local_work,global_work,k*m,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
// Scale and restore reduced Mu
for (int gaussian=0; gaussian<k; gaussian++)
{
// replace and normalize
for (int dim = 0; dim < m; dim++)
{
mu_matrix.update(global_work[gaussian*m+dim] / unscaled_Pk_vec[gaussian],gaussian,dim);
}
}
#else
for (int gaussian=0; gaussian<k; gaussian++)
for (int dim = 0; dim < m; dim++)
mu_matrix.update(mu_matrix.getValue(gaussian,dim) / unscaled_Pk_vec[gaussian],gaussian,dim);
#endif /* UseMPI */
}
// Using new Pk_vec and mu_matrix, calculate updated sigma
int successflag = 0;
#ifdef _OPENMP
# pragma omp parallel for
#endif /* _OPENMP */
for (gaussian = 0; gaussian < k; gaussian++)
{
// Check success flag
#ifdef _OPENMP
#pragma omp flush(successflag)
#endif /* _OPENMP */
if (successflag == 0)
{
Matrix sigma_hat(m,m);
//calculate the new covariances, sigma_hat
for (int data_point = 0; data_point < n; data_point++)
{
const double *x = &(X[m*data_point]);
//magical kronecker tensor product calculation
double pk = exp(p_nk_matrix.getValue(data_point,gaussian));
for (int i = 0; i < m; i++)
{
for (int j = 0; j < m; j++)
{
double temp1 = (x[i]-mu_matrix.getValue(gaussian,i)) *
(x[j]-mu_matrix.getValue(gaussian,j)) *
pk;
sigma_hat.update(sigma_hat.getValue(i,j) + temp1, i, j);
}
}
}//end data point
//rest of the sigma calculation, adjusted by the normalization factor
for (int i = 0; i < m; i++)
{
for (int j = 0; j < m; j++)
{
sigma_hat.update(sigma_hat.getValue(i,j)/(2*Pk_vec[gaussian]),i,j);
}
}
//you can't have a negative determinant - if somehow you do, mstep throws up its hands and EM will terminate
double determinant;
{
determinant = sigma_hat.det();
}
if (determinant < 0)
{
successflag = 1;
#ifdef _OPENMP
#pragma omp flush(successflag)
#endif /* _OPENMP */
}
//assign sigma_hat to sigma_matrix[gaussian]
for (int i = 0; i < m; i++)
{
for (int j = 0; j < m; j++)
{
sigma_matrix[gaussian]->update(sigma_hat.getValue(i,j), i, j);
}
}
} // end if successflag == true
#ifdef UseMPI
#ifndef _OPENMP
// MPI Collectives don't work well inside OpenMP parallel regions.
// Move this outside the loop.
{
int global_successflag;
MPI_Allreduce(&successflag,&global_successflag,1,MPI_INT, MPI_LOR, MPI_COMM_WORLD);
if (DEBUG)
std::cout << "global_successflag: "<<global_successflag<<std::endl;
successflag = global_successflag;
}
#endif /* _OPEN_MP */
#endif /* UseMPI */
} //end gaussian
#ifdef UseMPI
#ifdef _OPENMP
// Moved outside the parallel loop if using OpenMP and MPI both
{
int global_successflag;
MPI_Allreduce(&successflag,&global_successflag,1,MPI_INT, MPI_LOR, MPI_COMM_WORLD);
if (DEBUG)
std::cout << "global_successflag: "<<global_successflag<<std::endl;
successflag = global_successflag;
}
#endif /* _OPENMP */
#endif /* UseMPI */
// Reduce and scale sigma
#ifdef UseMPI
{
// MPI workspace
double global_work[k*m*m];
double local_work[k*m*m];
// Sigma
for (int gaussian=0; gaussian<k; gaussian++)
{
for (int i = 0; i < m; i++)
{
for (int j = 0; j < m; j++)
{
local_work[gaussian*m*m+i*m+j] = sigma_matrix[gaussian]->getValue(i,j);
}
}
}
// Reduce
MPI_Allreduce(local_work,global_work,k*m*m,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
// Restore and normalize reduced sigma
for (int gaussian=0; gaussian<k; gaussian++)
for (int i = 0; i < m; i++)
for (int j = 0; j < m; j++)
sigma_matrix[gaussian]->update(global_work[gaussian*m*m+i*m+j] / unscaled_Pk_vec[gaussian], i, j);
}
#else
// Restore and normalize reduced sigma
for (int gaussian=0; gaussian<k; gaussian++)
for (int i = 0; i < m; i++)
for (int j = 0; j < m; j++)
sigma_matrix[gaussian]->update( sigma_matrix[gaussian]->getValue(i,j)/ unscaled_Pk_vec[gaussian], i, j );
#endif /* UseMPI */
if (DEBUG)
{
#ifdef UseMPI
MPI_Barrier(MPI_COMM_WORLD);
#endif /* UseMPI */
std::cout << "Finished M-Step - printing"<<std::endl;
// Print all the return values: mu, sigma,
//for (node=0; node < totalNodes; node++)
for (int node=0; node<totalNodes; node++)
for (int gaussian=0; gaussian<k; gaussian++)
for (int dim=0; dim<m ; dim++)
{
double tmp=mu_matrix.getValue(gaussian,dim);
if (myNode == node) std::cout << "mu_matrix: Node: "<<node<<", Gaussian: "<<gaussian<<", dim: "<<dim<<", Value: "<<tmp<<std::endl;
#ifdef UseMPI
MPI_Barrier(MPI_COMM_WORLD);
#endif /* UseMPI */
}
for (int node=0; node<totalNodes; node++)
for (int gaussian=0; gaussian<k; gaussian++)
for (int i = 0; i < m; i++)
for (int j = 0; j < m; j++)
if (myNode == node)
std::cout << "sigma: Node: "<<node<<",Gaussian: "<<gaussian<<", i,j: ("<<i<<", "<<j<<") :"<< sigma_matrix[gaussian]->getValue(i,j)<<std::endl;
#ifdef UseMPI
MPI_Barrier(MPI_COMM_WORLD);
#endif /* UseMPI */
for (int node=0; node<totalNodes; node++)
for (int i=0; i<k; i++)
std::cout << "Node: "<<myNode<<", Pk_vec["<<i<<"]: "<<Pk_vec[i]<<std::endl;
#ifdef UseMPI
MPI_Barrier(MPI_COMM_WORLD);
#endif /* UseMPI */
std::cout << "Finished Printing M-Step"<<std::endl;
}
return !successflag;
}
/*******************************************************************************************
* IMPLEMENTATIONS OF PUBLIC FUNCTIONS
******************************************************************************************/
int gaussmix::gaussmix_adapt(Matrix & X, int n, vector<Matrix*> &sigma_matrix,
Matrix &mu_matrix, std::vector<double> &Pks, vector<Matrix*> &adapted_sigma_matrix,
Matrix &adapted_mu_matrix, std::vector<double> &adapted_Pks)
{
int result = gaussmix::adapt(X,n,sigma_matrix,mu_matrix,Pks,adapted_sigma_matrix,adapted_mu_matrix,adapted_Pks);
return result;
}
double* gaussmix::gaussmix_matrixToRaw(const Matrix & X)
{
unsigned int rows = X.rowCount();
unsigned int cols = X.colCount();
double* ptr = new double[rows * cols];
for (unsigned int i = 0; i < rows; ++i)
for (unsigned int j = 0; j < cols; ++j)
ptr[i*cols + j] = X.getValue(i,j);
// returns pointer to heap allocated array
return ptr;
}
int* serializeIntVector(const std::vector<int> &vec)
{
// +1 to size to store vector size
int *array = new int[vec.size()+1];
array[0] = vec.size();
for (unsigned int i=1; i<=vec.size(); ++i)
array[i] = vec[i-1];
// returns a pointer to the heap allocated array
return array;
}
std::vector<int> deserializeIntVector(const int *array)
{
std::vector<int> vec(array[0]);
for (int i=0; i<array[0]; i++)
vec[i] = array[i+1];
// return a copy of the vector
return vec;
}
int gaussmix::parse_line(char * buffer, Matrix & X, std::vector<int> & labels, int row, int m)
{
if (DEBUG)
std::cout << "Parsing line: " << buffer << std::endl;
int cols = 0;
double temp;
// Reset errno from possible previous issues
errno = 0;
if (buffer[MAX_LINE_SIZE - 1] != 0)
{
if (DEBUG)
std::cout << "Max line size exceeded at zero relative line " << row << std::endl;
return GAUSSMIX_FILE_NOT_FOUND;
}
if (strstr(buffer,":"))
{ // we have svm format (labelled data)
char * plabel = strtok(buffer," ");
if(plabel)
sscanf(plabel,"%d",&(labels[row]));
if (errno != 0)
{
if (DEBUG)
std::cout << "Could not convert label at row " << row << ": " << strerror(errno) << std::endl;
return GAUSSMIX_FILE_NOT_FOUND;