Browse code

starting som prediction fine-tuned class-performance visualisation

git-svn-id: https://svn.discofish.de/MATLAB/spmtoolbox/SVMCrossVal@112 83ab2cfd-5345-466c-8aeb-2b2739fb922d

Christoph Budziszewski authored on21/01/2009 16:34:25
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,339 @@
1
+function sMap = som_make(D, varargin)
2
+
3
+%SOM_MAKE Create, initialize and train Self-Organizing Map.
4
+%
5
+% sMap = som_make(D, [[argID,] value, ...])
6
+%
7
+%  sMap = som_make(D);
8
+%  sMap = som_make(D, 'munits', 20);
9
+%  sMap = som_make(D, 'munits', 20, 'hexa', 'sheet');
10
+%  sMap = som_make(D, 'msize', [4 6 7], 'lattice', 'rect');
11
+%
12
+%  Input and output arguments ([]'s are optional): 
13
+%   D        (matrix) training data, size dlen x dim
14
+%            (struct) data struct
15
+%   [argID,  (string) See below. The values which are unambiguous can 
16
+%    value]  (varies) be given without the preceeding argID.
17
+%
18
+%   sMap     (struct) map struct
19
+%
20
+% Here are the valid argument IDs and corresponding values. The values 
21
+% which are unambiguous (marked with '*') can be given without the
22
+% preceeding argID.
23
+%   'init'       *(string) initialization: 'randinit' or 'lininit' (default)
24
+%   'algorithm'  *(string) training: 'seq' or 'batch' (default) or 'sompak'
25
+%   'munits'      (scalar) the preferred number of map units
26
+%   'msize'       (vector) map grid size
27
+%   'mapsize'    *(string) do you want a 'small', 'normal' or 'big' map
28
+%                          Any explicit settings of munits or msize override this.
29
+%   'lattice'    *(string) map lattice, 'hexa' or 'rect'
30
+%   'shape'      *(string) map shape, 'sheet', 'cyl' or 'toroid'
31
+%   'neigh'      *(string) neighborhood function, 'gaussian', 'cutgauss',
32
+%                          'ep' or 'bubble'
33
+%   'topol'      *(struct) topology struct
34
+%   'som_topol','sTopol' = 'topol'
35
+%   'mask'        (vector) BMU search mask, size dim x 1
36
+%   'name'        (string) map name
37
+%   'comp_names'  (string array / cellstr) component names, size dim x 1
38
+%   'tracking'    (scalar) how much to report, default = 1
39
+%   'training'    (string) 'short', 'default', 'long'
40
+%                 (vector) size 1 x 2, first length of rough training in epochs, 
41
+%                          and then length of finetuning in epochs
42
+%
43
+% For more help, try 'type som_make' or check out online documentation.
44
+% See also SOM_MAP_STRUCT, SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT,
45
+%          SOM_RANDINIT, SOM_LININIT, SOM_SEQTRAIN, SOM_BATCHTRAIN.          
46
+
47
+%%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
48
+%
49
+% som_make
50
+%
51
+% PURPOSE
52
+%
53
+% Creates, initializes and trains a SOM using default parameters.
54
+%
55
+% SYNTAX
56
+%
57
+%  sMap = som_make(D);
58
+%  sMap = som_make(...,'argID',value,...);
59
+%  sMap = som_make(...,value,...);
60
+%
61
+% DESCRIPTION
62
+%
63
+% Creates, initializes and trains a SOM with default parameters. Uses functions
64
+% SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT, SOM_DATA_STRUCT and SOM_MAP_STRUCT to come
65
+% up with the default values.
66
+%
67
+% First, the number of map units is determined. Unless they are
68
+% explicitly defined, function SOM_TOPOL_STRUCT is used to determine this.
69
+% It uses a heuristic formula of 'munits = 5*dlen^0.54321'. The 'mapsize'
70
+% argument influences the final number of map units: a 'big' map has 
71
+% x4 the default number of map units and a 'small' map has x0.25 the
72
+% default number of map units. 
73
+%
74
+% After the number of map units has been determined, the map size is 
75
+% determined. Basically, the two biggest eigenvalues of the training
76
+% data are calculated and the ratio between sidelengths of the map grid
77
+% is set to this ratio. The actual sidelengths are then set so that 
78
+% their product is as close to the desired number of map units as
79
+% possible.
80
+%
81
+% Then the SOM is initialized. First, linear initialization along two
82
+% greatest eigenvectors is tried, but if this can't be done (the
83
+% eigenvectors cannot be calculated), random initialization is used
84
+% instead.  After initialization, the SOM is trained in two phases:
85
+% first rough training and then fine-tuning. If the 'tracking'
86
+% argument is greater than zero, the average quantization error and
87
+% topographic error of the final map are calculated.
88
+%
89
+% REQUIRED INPUT ARGUMENTS
90
+%
91
+%  D           The data to use in the training.
92
+%     (struct) A data struct. If a struct is given, '.comp_names' field as 
93
+%              well as '.comp_norm' field is copied to the map struct.
94
+%     (matrix) A data matrix, size dlen x dim. The data matrix may
95
+%              contain unknown values, indicated by NaNs. 
96
+%  
97
+% OPTIONAL INPUT ARGUMENTS 
98
+%
99
+%  argID (string) Argument identifier string (see below).
100
+%  value (varies) Value for the argument (see below).
101
+%
102
+% Here are the valid argument IDs and corresponding values. The values 
103
+% which are unambiguous (marked with '*') can be given without the
104
+% preceeding argID.
105
+%   'init'       *(string) initialization: 'randinit' or 'lininit' (default)
106
+%   'algorithm'  *(string) training: 'seq' or 'batch' (default) or 'sompak'
107
+%   'munits'      (scalar) the preferred number of map units
108
+%   'msize'       (vector) map grid size
109
+%   'mapsize'    *(string) do you want a 'small', 'normal' or 'big' map
110
+%                          Any explicit settings of munits or msize override this.
111
+%   'lattice'    *(string) map lattice, 'hexa' or 'rect'
112
+%   'shape'      *(string) map shape, 'sheet', 'cyl' or 'toroid'
113
+%   'neigh'      *(string) neighborhood function, 'gaussian', 'cutgauss',
114
+%                          'ep' or 'bubble'
115
+%   'topol'      *(struct) topology struct
116
+%   'som_topol','sTopol' = 'topol'
117
+%   'mask'        (vector) BMU search mask, size dim x 1
118
+%   'name'        (string) map name
119
+%   'comp_names'  (string array / cellstr) component names, size dim x 1
120
+%   'tracking'    (scalar) how much to report, default = 1
121
+%   'training'    (string) 'short', 'default' or 'long'
122
+%                 (vector) size 1 x 2, first length of rough training in epochs, 
123
+%                          and then length of finetuning in epochs
124
+%
125
+% OUTPUT ARGUMENTS
126
+% 
127
+%  sMap (struct) the trained map struct
128
+%
129
+% EXAMPLES
130
+%
131
+%  To simply train a map with default parameters: 
132
+%
133
+%   sMap = som_make(D); 
134
+%  
135
+%  With the optional arguments, the initialization and training can be
136
+%  influenced. To change map size, use 'msize', 'munits' or 'mapsize'
137
+%  arguments:  
138
+%
139
+%   sMap = som_make(D,'mapsize','big'); or sMap=som_make(D,'big');
140
+%   sMap = som_make(D,'munits', 100);
141
+%   sMap = som_make(D,'msize', [20 10]); 
142
+%
143
+%  Argument 'algorithm' can be used to switch between 'seq' and 'batch'
144
+%  algorithms. 'batch' is the default, so to use 'seq' algorithm: 
145
+%
146
+%   sMap = som_make(D,'algorithm','seq'); or sMap = som_make(D,'seq'); 
147
+%
148
+%  The 'tracking' argument can be used to control the amout of reporting
149
+%  during training. The argument is used in this function, and it is
150
+%  passed to the training functions. To make the function work silently
151
+%  set it to 0.
152
+%
153
+%   sMap = som_make(D,'tracking',0); 
154
+%
155
+% SEE ALSO
156
+% 
157
+%  som_map_struct   Create a map struct.
158
+%  som_topol_struct Default values for SOM topology.
159
+%  som_train_struct Default values for SOM training parameters.
160
+%  som_randinint    Random initialization algorithm.
161
+%  som_lininit      Linear initialization algorithm.
162
+%  som_seqtrain     Sequential training algorithm.
163
+%  som_batchtrain   Batch training algorithm.
164
+
165
+% Copyright (c) 1999-2000 by the SOM toolbox programming team.
166
+% http://www.cis.hut.fi/projects/somtoolbox/
167
+
168
+% Version 2.0beta juuso 111199
169
+
170
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
171
+%% check arguments
172
+
173
+% D
174
+if isstruct(D) 
175
+  data_name = D.name; 
176
+  comp_names = D.comp_names;
177
+  comp_norm = D.comp_norm;
178
+  D = D.data;
179
+else 
180
+  data_name = inputname(1);
181
+  sDummy = som_data_struct(D(1,:)); 
182
+  comp_names = sDummy.comp_names;
183
+  comp_norm = sDummy.comp_norm;
184
+end
185
+[dlen dim] = size(D);
186
+
187
+% defaults
188
+mapsize = '';
189
+sM = som_map_struct(dim); 
190
+sTopol = sM.topol;
191
+munits = prod(sTopol.msize); % should be zero
192
+mask = sM.mask; 
193
+name = sM.name; 
194
+neigh = sM.neigh; 
195
+tracking = 1;
196
+algorithm = 'batch'; 
197
+initalg = 'lininit';
198
+training = 'default'; 
199
+
200
+% varargin
201
+i=1; 
202
+while i<=length(varargin), 
203
+  argok = 1; 
204
+  if ischar(varargin{i}), 
205
+    switch varargin{i}, 
206
+      % argument IDs
207
+     case 'mask',       i=i+1; mask = varargin{i}; 
208
+     case 'munits',     i=i+1; munits = varargin{i}; 
209
+     case 'msize',      i=i+1; sTopol.msize = varargin{i}; 
210
+                        munits = prod(sTopol.msize); 
211
+     case 'mapsize',    i=i+1; mapsize = varargin{i}; 
212
+     case 'name',       i=i+1; name = varargin{i};
213
+     case 'comp_names', i=i+1; comp_names = varargin{i}; 
214
+     case 'lattice',    i=i+1; sTopol.lattice = varargin{i};
215
+     case 'shape',      i=i+1; sTopol.shape = varargin{i}; 
216
+     case {'topol','som_topol','sTopol'}, 
217
+                        i=i+1; sTopol = varargin{i}; munits = prod(sTopol.msize); 
218
+     case 'neigh',      i=i+1; neigh = varargin{i};
219
+     case 'tracking',   i=i+1; tracking = varargin{i};
220
+     case 'algorithm',  i=i+1; algorithm = varargin{i}; 
221
+     case 'init',       i=i+1; initalg = varargin{i};
222
+     case 'training',   i=i+1; training = varargin{i}; 
223
+      % unambiguous values
224
+     case {'hexa','rect'}, sTopol.lattice = varargin{i};
225
+     case {'sheet','cyl','toroid'}, sTopol.shape = varargin{i}; 
226
+     case {'gaussian','cutgauss','ep','bubble'}, neigh = varargin{i};
227
+     case {'seq','batch','sompak'}, algorithm = varargin{i}; 
228
+     case {'small','normal','big'}, mapsize = varargin{i}; 
229
+     case {'randinit','lininit'}, initalg = varargin{i};
230
+     case {'short','default','long'}, training = varargin{i}; 
231
+     otherwise argok=0; 
232
+    end
233
+  elseif isstruct(varargin{i}) & isfield(varargin{i},'type'), 
234
+    switch varargin{i}(1).type, 
235
+     case 'som_topol', sTopol = varargin{i}; 
236
+     otherwise argok=0; 
237
+    end
238
+  else
239
+    argok = 0; 
240
+  end
241
+  if ~argok, 
242
+    disp(['(som_make) Ignoring invalid argument #' num2str(i+1)]); 
243
+  end
244
+  i = i+1; 
245
+end
246
+
247
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
248
+%% make the map struct
249
+
250
+%% map size
251
+if isempty(sTopol.msize) | ~prod(sTopol.msize), 
252
+  if tracking>0, fprintf(1,'Determining map size...\n'); end
253
+  if ~munits,     
254
+    sTemp = som_topol_struct('dlen',dlen);
255
+    munits = prod(sTemp.msize);
256
+    switch mapsize,
257
+     case 'small', munits = max(9,ceil(munits/4));
258
+     case 'big',   munits = munits*4;
259
+     otherwise % nil
260
+    end
261
+  end
262
+  sTemp = som_topol_struct('data',D,'munits',munits);
263
+  sTopol.msize = sTemp.msize;
264
+  if tracking>0, 
265
+    fprintf(1,' map size [%d, %d]\n',sTopol.msize(1), sTopol.msize(2));   
266
+  end
267
+end
268
+
269
+% map struct
270
+sMap = som_map_struct(dim,sTopol,neigh,'mask',mask,'name',name, ...
271
+                      'comp_names', comp_names, 'comp_norm', comp_norm); 
272
+       
273
+% function
274
+if strcmp(algorithm,'sompak'), 
275
+  algorithm = 'seq';
276
+  func = 'sompak';
277
+else
278
+  func = algorithm;
279
+end
280
+
281
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
282
+%% initialization
283
+
284
+if tracking>0, fprintf(1,'Initialization...\n'); end
285
+
286
+switch initalg, 
287
+ case 'randinit', sMap = som_randinit(D, sMap);
288
+ case 'lininit', sMap = som_lininit(D, sMap); 
289
+end
290
+sMap.trainhist(1) = som_set(sMap.trainhist(1),'data_name',data_name);
291
+
292
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
293
+%% training
294
+
295
+if tracking>0, fprintf(1,'Training using %s algorithm...\n',algorithm); end
296
+
297
+% rough train
298
+if tracking>0, fprintf(1,'Rough training phase...\n'); end
299
+sTrain = som_train_struct(sMap,'dlen',dlen,'algorithm',algorithm,'phase','rough');
300
+sTrain = som_set(sTrain,'data_name',data_name);
301
+if isnumeric(training), sTrain.trainlen = training(1); 
302
+else
303
+  switch training, 
304
+   case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4);
305
+   case 'long',  sTrain.trainlen = sTrain.trainlen*4;
306
+  end
307
+end
308
+switch func,
309
+ case 'seq',    sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
310
+ case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
311
+ case 'batch',  sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
312
+end
313
+
314
+% finetune
315
+if tracking>0, fprintf(1,'Finetuning phase...\n'); end
316
+sTrain = som_train_struct(sMap,'dlen',dlen,'phase','finetune');
317
+sTrain = som_set(sTrain,'data_name',data_name,'algorithm',algorithm);
318
+if isnumeric(training), sTrain.trainlen = training(2); 
319
+else
320
+  switch training, 
321
+   case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4);
322
+   case 'long',  sTrain.trainlen = sTrain.trainlen*4;
323
+  end
324
+end
325
+switch func,
326
+ case 'seq',    sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
327
+ case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
328
+ case 'batch',  sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
329
+end
330
+
331
+% quality
332
+if tracking>0, 
333
+  [mqe,tge] = som_quality(sMap,D);
334
+  fprintf(1,'Final quantization error: %5.3f\n',mqe)
335
+  fprintf(1,'Final topographic error:  %5.3f\n',tge)
336
+end  
337
+
338
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
339
+