Browse code

starting som prediction fine-tuned class-performance visualisation

git-svn-id: https://svn.discofish.de/MATLAB/spmtoolbox/SVMCrossVal@112 83ab2cfd-5345-466c-8aeb-2b2739fb922d

Christoph Budziszewski authored on21/01/2009 16:34:25
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,278 @@
1
+function [sC,old2new,newi] = som_clset(sC,action,par1,par2)
2
+
3
+% SOM_CLSET Create and/or set values in the som_clustering struct.
4
+%
5
+%   first argument
6
+%     sC       (struct) a som_clustering struct
7
+%     Z        (matrix) size nb-1 x 3, as given by LINKAGE function
8
+%     base     (vector) size dlen x 1, a partitioning of the data
9
+%
10
+%   actions    
11
+%     'remove'           removes the indicated clusters (par1: vector)
12
+%     'add'              add a cluster by making a combination of the indicated
13
+%                        clusters (par1: vector)
14
+%     %'move'             moves a child cluster (par1: scalar) from a parent to another
15
+%     %                   (par2: vector 1 x 2)
16
+%     'merge'            like 'add', followed by removing the indicated clusters (par1: vector)
17
+%     %'split'            the indicated cluster (par1: scalar) is partitioned into indicated
18
+%     %                   parts (par2: vector), which are then added, while the indicated cluster
19
+%     %                   (par1) is removed
20
+%     'coord'            sets the coordinates of base clusters (par1: matrix nb x *), and 
21
+%                        recalculates coordinates of the derived clusters (by averaging base cluster
22
+%                        coordinates)
23
+%     'color'            sets the colors of base clusters (par1: matrix nb x 3), and recalculates
24
+%                        colors of the derived clusters (as averages of base cluster colors)
25
+%                        
26
+%   sC
27
+%     .type     (string) 'som_clustering'
28
+%     .name     (string) Identifier for the clustering.
29
+%     .nb       (scalar) Number of base clusters in the clustering.
30
+%     .base     (vector) Size dlen x 1, the basic groups of data 
31
+%                        forming the base clusters, e.g. as a result 
32
+%                        of partitive clustering. Allowed values are 
33
+%                         1:nb   indicating the base cluster
34
+%                                to which the data belongs to. 
35
+%                         NaN    indicating that the data has
36
+%                                been ignored in the clustering                        
37
+%     .nc       (scalar) Number of clusters in the clustering (nb + derived clusters).
38
+%     .children (cellarray) size nc x 1, each cell gives the list of indeces
39
+%                        of child clusters for the cluster
40
+%     .parent   (vector) size nc x 1, the index of parent of each cluster 
41
+%                        (or zero if the cluster does not have a parent)
42
+%     .coord    (matrix) size nc x *, visualization coordinates for each cluster
43
+%                        By default the coordinates are set so that 
44
+%                        the base clusters are ordered on a line, and the
45
+%                        position of each combined cluster is average of 
46
+%                        the base clusters that constitute it.
47
+%     .color    (matrix) size nc x 3, color for each cluster. 
48
+%                        By default the colors are set so that the 
49
+%                        base clusters are ordered on a line,
50
+%                        and then colors are assigned from the 'hsv' 
51
+%                        colormap to the base clusters. The color
52
+%                        of each combined cluster is average as above.
53
+%     .cldist   (string) Default cluster distance function.
54
+
55
+inew = []; 
56
+if isstruct(sC), 
57
+    % it should be a som_clustering struct
58
+    old2new = [1:sC.nc];
59
+elseif size(sC,2)==3, 
60
+    % assume it is a cluster hierarchy matrix Z 
61
+    sC = Z2sC(sC); 
62
+    old2new = [1:sC.nc];
63
+else
64
+    % assume it is a partitioning vector
65
+    base = sC; 
66
+    u = unique(base(isfinite(base)));
67
+    old2new = sparse(u,1,1:length(u));
68
+    base = old2new(base);
69
+    sC = part2sC(base); 
70
+end 
71
+
72
+switch action, 
73
+case 'remove',        
74
+    for i=1:length(par1),         
75
+        [sC,o2n] = removecluster(sC,old2new(par1(i)));
76
+        old2new = o2n(old2new);
77
+    end 
78
+case 'add', 
79
+    [sC,old2new,inew] = addmergedcluster(sC,par1);    
80
+case 'move',
81
+    % not implemented yet
82
+case 'split', 
83
+    % not implemented yet
84
+case 'merge', 
85
+    [sC,old2new,inew] = addmergedcluster(sC,par1);
86
+    for i=1:length(par1), 
87
+        [sC,o2n] = removecluster(sC,old2new(par1(i)));
88
+        old2new = o2n(old2new);
89
+    end 
90
+case 'color', 
91
+    sC.color = derivative_average(sC,par1);
92
+case 'coord',
93
+    sC.coord = derivative_average(sC,par1);
94
+end 
95
+
96
+return;
97
+
98
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
99
+%% subfunctions
100
+
101
+function sC = clstruct(nb,nc)
102
+
103
+    sC = struct('type','som_clustering',...
104
+                'name','','base',[],'nb',nb,'nc',nc,...
105
+                'parent',[],'children',[],'coord',[],'color',[],'cldist','centroid');
106
+    sC.base = [1:nb]; 
107
+    sC.parent = zeros(nc,1);
108
+    sC.children = cell(nc,1); sC.children(:) = {[]}; 
109
+    sC.coord = zeros(nc,2);
110
+    sC.color = zeros(nc,3);
111
+    return;
112
+
113
+function Z = sC2Z(sC,height)
114
+
115
+    if nargin<2, height = 'level'; end
116
+
117
+    root   = find(sC.parent==0); 
118
+    order  = [root]; 
119
+    ch     = sC.children(root); 
120
+    while any(ch), i = ch(1); order = [ch(1), order]; ch = [ch(2:end), sC.children{i}]; end 
121
+
122
+    he = zeros(sC.nc,1); 
123
+    if strcmp(height,'level'), 
124
+        ch = sC.children{root}; 
125
+        while any(ch),
126
+            i = ch(1); he(i) = he(sC.parent(i))+1; 
127
+            ch = [ch(2:end), sC.children{i}]; 
128
+        end 
129
+        he = max(he)-he; 
130
+    elseif strcmp(height,'level2'), 
131
+        for i=order, if any(sC.children{i}), he(i) = max(he(sC.children{i}))+1; end, end
132
+    else
133
+        %he = som_cldist ( between children )
134
+    end 
135
+    
136
+    Z = zeros(sC.nb-1,3);    
137
+    i = sC.nb-1; 
138
+    inds = root; 
139
+    while i>0, 
140
+        ch = sC.children{inds(1)}; h = he(inds(1)); inds = [inds(2:end), ch]; 
141
+        if length(ch)>=2,
142
+            for k=1:length(ch)-2, Z(i,:) = [i-1, ch(k), h]; i = i - 1; end
143
+            Z(i,:) = [ch(end-1) ch(end) h]; i = i - 1;             
144
+        end 
145
+    end 
146
+    return;
147
+
148
+function sC = Z2sC(Z)
149
+
150
+    nb        = size(Z,1)+1;
151
+    nc        = 2*nb-1;
152
+    sC        = clstruct(nb,nc);
153
+    sC.base   = [1:nb];
154
+    for i=1:nc, 
155
+        j = find(Z(:,1)==i | Z(:,2)==i); 
156
+        sC.parent(i) = nb+j;
157
+        sC.children{sC.parent(i)}(end+1) = i; 
158
+    end 
159
+    % coords and color
160
+    order = nc; 
161
+    nonleaves = 1; 
162
+    while any(nonleaves), 
163
+        j = nonleaves(1); 
164
+        ch = sC.children{order(j)};
165
+        if j==1, oleft = []; else oleft = order(1:(j-1)); end
166
+        if j==length(order), oright = []; else oright = order((j+1):length(order)); end
167
+        order = [oleft, ch, oright];
168
+        nonleaves = find(order>nb); 
169
+    end
170
+    [dummy,co] = sort(order);     
171
+    sC.coord   = derivative_average(sC,co');
172
+    H          = hsv(nb+1);
173
+    sC.color   = derivative_average(sC,H(co,:));    
174
+    return;
175
+    
176
+function sC = part2sC(part)
177
+
178
+    nb      = max(part); 
179
+    nc      = nb+1; 
180
+    sC      = clstruct(nb,nc);
181
+    sC.base = part; 
182
+    sC.parent(1:nb) = nc; 
183
+    sC.children{nc} = [1:nb]; 
184
+    co       = [1:nb]'; 
185
+    sC.coord = derivative_average(sC,co);
186
+    H        = hsv(nb+1);
187
+    sC.color = derivative_average(sC,H(1:nb,:));
188
+    return;
189
+
190
+function [sC,old2new] = removecluster(sC,ind)
191
+  
192
+    old2new = [1:sC.nc]; 
193
+    parent_ind = sC.parent(ind);
194
+    ch = sC.children{ind};
195
+    if ~parent_ind, 
196
+        % trying to remove root cluster - no go
197
+        return; 
198
+    elseif ~any(ch), 
199
+        % trying to remove a base cluster - no go
200
+        return;
201
+    else
202
+        % ok, proceed
203
+        old2new = [1:ind-1 0 ind:sC.nc-1];
204
+        % update parent and child fields
205
+        sC.parent(ch) = parent_ind;
206
+        sC.children{parent_ind} = setdiff([sC.children{parent_ind}, ch],ind);
207
+        % remove old cluster
208
+        j = [1:ind-1, ind+1:sC.nc]; 
209
+        sC.parent   = sC.parent(j);
210
+        sC.children = sC.children(j);
211
+        sC.color    = sC.color(j,:);
212
+        sC.coord    = sC.coord(j,:);
213
+        sC.nc       = sC.nc-1; 
214
+        % update old indeces to new indices
215
+        sC.parent = old2new(sC.parent);
216
+        for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end
217
+    end     
218
+    return;
219
+
220
+function [sC,old2new,inew] = addmergedcluster(sC,inds)
221
+
222
+    old2new    = [1:sC.nc]; 
223
+    inew       = 0; 
224
+    p_inds     = sC.parent(inds); 
225
+    if ~all(p_inds(1)==p_inds),  
226
+        % clusters are not siblings - no go
227
+        return;
228
+    end
229
+    parent_ind = p_inds(1); 
230
+    if isempty(setdiff(sC.children{parent_ind},inds)),  
231
+        % such a merged cluster exists already
232
+        return;     
233
+    else
234
+        % ok, proceed
235
+        inew = parent_ind;
236
+        old2new = [1:inew-1,inew+1:sC.nc+1];
237
+        % add the new cluster (=copy of parent_ind) 
238
+        j = [1:inew,inew:sC.nc];
239
+        sC.parent   = sC.parent(j);
240
+        sC.children = sC.children(j);
241
+        sC.color    = sC.color(j,:);
242
+        sC.coord    = sC.coord(j,:);
243
+        sC.nc       = sC.nc+1;
244
+        % update old indeces to new indices
245
+        sC.parent = old2new(sC.parent);
246
+        for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end
247
+        inds = old2new(inds);
248
+        parent_ind = old2new(parent_ind);
249
+        % update parent, child, color and coord fields
250
+        sC.parent(inds)         = inew; 
251
+        sC.parent(inew)         = parent_ind;
252
+        sC.children{inew}       = inds; 
253
+        sC.children{parent_ind} = [setdiff(sC.children{parent_ind}, inds), inew];
254
+        b = baseind(sC,inew); 
255
+        sC.color(inew,:)        = mean(sC.color(b,:));
256
+        sC.coord(inew,:)        = mean(sC.coord(b,:));
257
+    end    
258
+    return;
259
+    
260
+function C = derivative_average(sC,Cbase)
261
+
262
+    [n dim] = size(Cbase);
263
+    if n ~= sC.nb, error('Color / Coord matrix should have nb rows'); end
264
+    C = zeros(sC.nc,dim);     
265
+    for i=1:sC.nc, C(i,:) = mean(Cbase(baseind(sC,i),:)); end   
266
+    return;
267
+    
268
+function bi = baseind(sC,ind)
269
+
270
+    bi = [ind]; 
271
+    i = 1; 
272
+    while i<=length(bi), bi = [bi, sC.children{bi(i)}]; end 
273
+    bi = bi(bi<=sC.nb);
274
+    return;
275
+  
276
+
277
+      
278
+