IT++ Logo
mog_diag_kmeans.cpp
Go to the documentation of this file.
1
31#include <iostream>
32
33namespace itpp
34{
35
36inline double MOG_diag_kmeans_sup::dist(const double * x, const double * y) const
37{
38 double acc = 0.0;
39 for (int d = 0;d < D;d++) { double tmp = x[d] - y[d]; acc += tmp * tmp; }
40 return(acc);
41}
42
44{
45
46 for (int k = 0;k < K;k++) c_count[k] = 0;
47
48 for (int n = 0;n < N;n++) {
49
50 int k = 0;
51 double min_dist = dist(c_means[k], c_X[n]);
52 int k_winner = k;
53
54 for (int k = 1;k < K;k++) {
55 double tmp_dist = dist(c_means[k], c_X[n]);
56 if (tmp_dist < min_dist) { min_dist = tmp_dist; k_winner = k; }
57 }
58
61 }
62}
63
64
66{
67
68 for (int k = 0;k < K;k++) {
69
70 for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0;
71
72 int Nk = c_count[k];
73
74 for (int n = 0;n < Nk;n++) {
75 double * x = c_X[ c_partitions[k][n] ];
76 for (int d = 0;d < D;d++) c_tmpvec[d] += x[d];
77 }
78
79 if (Nk > 0) {
80 double * c_mean = c_means[k];
81 for (int d = 0;d < D;d++) c_mean[d] = c_tmpvec[d] / Nk;
82 }
83 }
84
85}
86
87
89{
90
91 static int counter = 0;
92
93 bool zombie_mean = false;
94
95 int k = 0;
96 int max_count = count[k];
97 int k_hog = k;
98
99 for (int k = 1;k < K;k++) if (c_count[k] > max_count) { max_count = c_count[k]; k_hog = k; }
100
101 for (int k = 0;k < K;k++) {
102 if (c_count[k] == 0) {
103
104 zombie_mean = true;
105 if (verbose) {
106 it_warning("MOG_diag_kmeans_sup::dezombify_means(): detected zombie mean");
107 }
108 if (k_hog == k) {
109 it_warning("MOG_diag_kmeans_sup::dezombify_means(): weirdness: k_hog == k");
110 return(false);
111 }
112
113 if (counter >= c_count[k_hog]) counter = 0;
114
115 double * c_mean = c_means[k];
116 double * c_x = c_X[ c_partitions[k_hog][counter] ];
117
118 for (int d = 0;d < D;d++) c_mean[d] = 0.5 * (c_means[k_hog][d] + c_x[d]);
119 counter++;
120 }
121
122 }
123
125
126 return(true);
127}
128
129
131{
132
133 double tmp_dist = 0.0;
134 for (int k = 0;k < K;k++) tmp_dist += dist(c_means[k], c_means_old[k]);
135 return(tmp_dist);
136}
137
138
140{
141
142 for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0;
143
144 for (int n = 0;n < N;n++) {
145 double * c_x = c_X[n];
146 for (int d = 0;d < D;d++) c_tmpvec[d] += c_x[d];
147 }
148
149 for (int d = 0;d < D;d++) c_tmpvec[d] /= N;
150
151 int step = int(floor(double(N) / double(K)));
152 for (int k = 0;k < K;k++) {
153 double * c_mean = c_means[k];
154 double * c_x = c_X[k*step];
155
156 for (int d = 0;d < D;d++) c_mean[d] = 0.5 * (c_tmpvec[d] + c_x[d]);
157 }
158}
159
160
162{
163
164 for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) c_means_old[k][d] = c_means[k][d];
165
166 for (int i = 0;i < max_iter;i++) {
167
169 if (!dezombify_means()) return;
171
172 double change = measure_change();
173
174 if (verbose) std::cout << "MOG_diag_kmeans_sup::iterate(): iteration = " << i << " change = " << change << std::endl;
175 if (change == 0) break;
176
177 for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) c_means_old[k][d] = c_means[k][d];
178 }
179
180}
181
182
188
189
191{
192
193 for (int k = 0;k < K;k++) {
194 int Nk = c_count[k];
195
196 if (Nk >= 2) {
197 double * c_mean = c_means[k];
198
199 for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0;
200
201 for (int n = 0;n < Nk;n++) {
202 double * c_x = c_X[ c_partitions[k][n] ];
203 for (int d = 0;d < D;d++) { double tmp = c_x[d] - c_mean[d]; c_tmpvec[d] += tmp * tmp; }
204 }
205
206 for (int d = 0;d < D;d++) c_diag_covs[k][d] = trust * (c_tmpvec[d] / (Nk - 1.0)) + (1.0 - trust) * (1.0);
207 }
208 else {
209 for (int d = 0;d < D;d++) c_diag_covs[k][d] = 1.0;
210 }
211 }
212
213}
214
215
217{
218 for (int k = 0;k < K;k++) c_weights[k] = trust * (c_count[k] / double(N)) + (1.0 - trust) * (1.0 / K);
219}
220
221
223{
224
225 for (int d = 0;d < D;d++) {
226 double acc = 0.0;
227 for (int n = 0;n < N;n++) acc += c_X[n][d];
228 c_norm_mu[d] = acc / N;
229 }
230
231 for (int d = 0;d < D;d++) {
232 double acc = 0.0;
233 for (int n = 0;n < N;n++) { double tmp = c_X[n][d] - c_norm_mu[d]; acc += tmp * tmp; }
234 c_norm_sd[d] = std::sqrt(acc / (N - 1));
235 }
236
237 for (int n = 0;n < N;n++) for (int d = 0;d < D;d++) {
238 c_X[n][d] -= c_norm_mu[d];
239 if (c_norm_sd[d] > 0.0) c_X[n][d] /= c_norm_sd[d];
240 }
241}
242
243
245{
246
247 for (int n = 0;n < N;n++) for (int d = 0;d < D;d++) {
248 if (c_norm_sd[d] > 0.0) c_X[n][d] *= c_norm_sd[d];
249 c_X[n][d] += c_norm_mu[d];
250 }
251}
252
253
255{
256
257 for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) {
258 if (norm_sd[d] > 0.0) c_means[k][d] *= c_norm_sd[d];
259 c_means[k][d] += norm_mu[d];
260 }
261}
262
263
265{
266
267 it_assert(model_in.is_valid(), "MOG_diag_kmeans_sup::run(): given model is not valid");
268 it_assert((max_iter_in > 0), "MOG_diag_kmeans_sup::run(): 'max_iter' needs to be greater than zero");
269 it_assert(((trust_in >= 0.0) && (trust_in <= 1.0)), "MOG_diag_kmeans_sup::run(): 'trust' must be between 0 and 1 (inclusive)");
270
272
273 Array<vec> means_in = model_in.get_means();
274 Array<vec> diag_covs_in = model_in.get_diag_covs();
275 vec weights_in = model_in.get_weights();
276
278
282
283 it_assert(check_size(X_in), "MOG_diag_kmeans_sup::run(): 'X' is empty or contains vectors of wrong dimensionality");
284
285 N = X_in.size();
286
287 if (K > N) {
288 it_warning("MOG_diag_kmeans_sup::run(): K > N");
289 }
290 else {
291 if (K > N / 10) {
292 it_warning("MOG_diag_kmeans_sup::run(): K > N/10");
293 }
294 }
295
297 trust = trust_in;
298
300 for (int k = 0;k < K;k++) means_old(k).set_size(D);
302 for (int k = 0;k < K;k++) partitions(k).set_size(N);
304 tmpvec.set_size(D);
305 norm_mu.set_size(D);
306 norm_sd.set_size(D);
307
312 c_tmpvec = enable_c_access(tmpvec);
313 c_norm_mu = enable_c_access(norm_mu);
314 c_norm_sd = enable_c_access(norm_sd);
315
317
318 calc_means();
320 calc_covs();
321 calc_weights();
322
324
329 disable_c_access(c_tmpvec);
330 disable_c_access(c_norm_mu);
331 disable_c_access(c_norm_sd);
332
335 count.set_size(0);
336 tmpvec.set_size(0);
337 norm_mu.set_size(0);
338 norm_sd.set_size(0);
339
340 cleanup();
341
342}
343
344//
345// convenience functions
346
352
353
354}
355
General array class.
Definition array.h:105
int size() const
Returns the number of data elements in the array object.
Definition array.h:155
void set_size(int n, bool copy=false)
Resizing an Array<T>.
Definition array.h:257
support class for MOG_diag_kmeans()
double measure_change() const
ADD DOCUMENTATION HERE.
void run(MOG_diag &model_in, Array< vec > &X_in, int max_iter_in=10, double trust_in=0.5, bool normalise_in=true, bool verbose_in=false)
ADD DOCUMENTATION HERE.
void unnormalise_vectors()
ADD DOCUMENTATION HERE.
void calc_means()
ADD DOCUMENTATION HERE.
bool dezombify_means()
ADD DOCUMENTATION HERE.
int * c_count
'C' pointer to the count vector
int N
number of training vectors
double ** c_X
'C' pointers to training vectors
void initial_means()
ADD DOCUMENTATION HERE.
void normalise_vectors()
ADD DOCUMENTATION HERE.
Array< vec > means_old
means from the previous iteration, used to measure progress
void calc_covs()
ADD DOCUMENTATION HERE.
int ** c_partitions
'C' pointers to partition vectors
void assign_to_means()
ADD DOCUMENTATION HERE.
void unnormalise_means()
ADD DOCUMENTATION HERE.
double ** c_means_old
'C' pointers to old means
double dist(const double *x, const double *y) const
ADD DOCUMENTATION HERE.
void iterate()
ADD DOCUMENTATION HERE.
int max_iter
Maximum number of iterations.
double trust
trust factor, where 0 <= trust <= 1.
ivec count
keeps a count of the number of vectors assigned to each mean
Array< ivec > partitions
contains indices of vectors assigned to each mean
void recalculate_means()
ADD DOCUMENTATION HERE.
bool verbose
Whether we print the progress.
void calc_weights()
ADD DOCUMENTATION HERE.
Diagonal Mixture of Gaussians (MOG) class.
Definition mog_diag.h:56
double ** c_means
pointers to the mean vectors
Definition mog_diag.h:197
double ** disable_c_access(double **A_in)
Disable C style access to an Array of vectors (vec)
Definition mog_diag.cpp:300
double ** enable_c_access(Array< vec > &A_in)
Enable C style access to an Array of vectors (vec)
Definition mog_diag.cpp:284
void cleanup()
Release memory used by the model. The model will be empty.
Definition mog_diag.h:111
double * c_weights
pointer to the weight vector
Definition mog_diag.h:206
double ** c_diag_covs
pointers to the covariance vectors
Definition mog_diag.h:200
bool check_size(const vec &x_in) const
Check if vector x_in has the same dimensionality as the model.
void init()
Initialise the model to be empty.
int K
number of gaussians
vec weights
weights
Array< vec > diag_covs
diagonal covariance matrices, stored as vectors
int D
dimensionality
Array< vec > means
means
void MOG_diag_kmeans(MOG_diag &model_in, Array< vec > &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in)
#define it_warning(s)
Display a warning message.
Definition itassert.h:173
#define it_assert(t, s)
Abort if t is not true.
Definition itassert.h:94
K-means based optimiser for Mixture of Gaussians - header file.
itpp namespace
Definition itmex.h:37
vec floor(const vec &x)
Round to nearest lower integer.
Definition converters.h:346

Generated on Tue Aug 17 2021 10:59:15 for IT++ by Doxygen 1.9.8