somtoolbox2/som_clset.m
4dbef185
 function [sC,old2new,newi] = som_clset(sC,action,par1,par2)
 
 % SOM_CLSET Create and/or set values in the som_clustering struct.
 %
 %   first argument
 %     sC       (struct) a som_clustering struct
 %     Z        (matrix) size nb-1 x 3, as given by LINKAGE function
 %     base     (vector) size dlen x 1, a partitioning of the data
 %
 %   actions    
 %     'remove'           removes the indicated clusters (par1: vector)
 %     'add'              add a cluster by making a combination of the indicated
 %                        clusters (par1: vector)
 %     %'move'             moves a child cluster (par1: scalar) from a parent to another
 %     %                   (par2: vector 1 x 2)
 %     'merge'            like 'add', followed by removing the indicated clusters (par1: vector)
 %     %'split'            the indicated cluster (par1: scalar) is partitioned into indicated
 %     %                   parts (par2: vector), which are then added, while the indicated cluster
 %     %                   (par1) is removed
 %     'coord'            sets the coordinates of base clusters (par1: matrix nb x *), and 
 %                        recalculates coordinates of the derived clusters (by averaging base cluster
 %                        coordinates)
 %     'color'            sets the colors of base clusters (par1: matrix nb x 3), and recalculates
 %                        colors of the derived clusters (as averages of base cluster colors)
 %                        
 %   sC
 %     .type     (string) 'som_clustering'
 %     .name     (string) Identifier for the clustering.
 %     .nb       (scalar) Number of base clusters in the clustering.
 %     .base     (vector) Size dlen x 1, the basic groups of data 
 %                        forming the base clusters, e.g. as a result 
 %                        of partitive clustering. Allowed values are 
 %                         1:nb   indicating the base cluster
 %                                to which the data belongs to. 
 %                         NaN    indicating that the data has
 %                                been ignored in the clustering                        
 %     .nc       (scalar) Number of clusters in the clustering (nb + derived clusters).
 %     .children (cellarray) size nc x 1, each cell gives the list of indeces
 %                        of child clusters for the cluster
 %     .parent   (vector) size nc x 1, the index of parent of each cluster 
 %                        (or zero if the cluster does not have a parent)
 %     .coord    (matrix) size nc x *, visualization coordinates for each cluster
 %                        By default the coordinates are set so that 
 %                        the base clusters are ordered on a line, and the
 %                        position of each combined cluster is average of 
 %                        the base clusters that constitute it.
 %     .color    (matrix) size nc x 3, color for each cluster. 
 %                        By default the colors are set so that the 
 %                        base clusters are ordered on a line,
 %                        and then colors are assigned from the 'hsv' 
 %                        colormap to the base clusters. The color
 %                        of each combined cluster is average as above.
 %     .cldist   (string) Default cluster distance function.
 
 inew = []; 
 if isstruct(sC), 
     % it should be a som_clustering struct
     old2new = [1:sC.nc];
 elseif size(sC,2)==3, 
     % assume it is a cluster hierarchy matrix Z 
     sC = Z2sC(sC); 
     old2new = [1:sC.nc];
 else
     % assume it is a partitioning vector
     base = sC; 
     u = unique(base(isfinite(base)));
     old2new = sparse(u,1,1:length(u));
     base = old2new(base);
     sC = part2sC(base); 
 end 
 
 switch action, 
 case 'remove',        
     for i=1:length(par1),         
         [sC,o2n] = removecluster(sC,old2new(par1(i)));
         old2new = o2n(old2new);
     end 
 case 'add', 
     [sC,old2new,inew] = addmergedcluster(sC,par1);    
 case 'move',
     % not implemented yet
 case 'split', 
     % not implemented yet
 case 'merge', 
     [sC,old2new,inew] = addmergedcluster(sC,par1);
     for i=1:length(par1), 
         [sC,o2n] = removecluster(sC,old2new(par1(i)));
         old2new = o2n(old2new);
     end 
 case 'color', 
     sC.color = derivative_average(sC,par1);
 case 'coord',
     sC.coord = derivative_average(sC,par1);
 end 
 
 return;
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% subfunctions
 
 function sC = clstruct(nb,nc)
 
     sC = struct('type','som_clustering',...
                 'name','','base',[],'nb',nb,'nc',nc,...
                 'parent',[],'children',[],'coord',[],'color',[],'cldist','centroid');
     sC.base = [1:nb]; 
     sC.parent = zeros(nc,1);
     sC.children = cell(nc,1); sC.children(:) = {[]}; 
     sC.coord = zeros(nc,2);
     sC.color = zeros(nc,3);
     return;
 
 function Z = sC2Z(sC,height)
 
     if nargin<2, height = 'level'; end
 
     root   = find(sC.parent==0); 
     order  = [root]; 
     ch     = sC.children(root); 
     while any(ch), i = ch(1); order = [ch(1), order]; ch = [ch(2:end), sC.children{i}]; end 
 
     he = zeros(sC.nc,1); 
     if strcmp(height,'level'), 
         ch = sC.children{root}; 
         while any(ch),
             i = ch(1); he(i) = he(sC.parent(i))+1; 
             ch = [ch(2:end), sC.children{i}]; 
         end 
         he = max(he)-he; 
     elseif strcmp(height,'level2'), 
         for i=order, if any(sC.children{i}), he(i) = max(he(sC.children{i}))+1; end, end
     else
         %he = som_cldist ( between children )
     end 
     
     Z = zeros(sC.nb-1,3);    
     i = sC.nb-1; 
     inds = root; 
     while i>0, 
         ch = sC.children{inds(1)}; h = he(inds(1)); inds = [inds(2:end), ch]; 
         if length(ch)>=2,
             for k=1:length(ch)-2, Z(i,:) = [i-1, ch(k), h]; i = i - 1; end
             Z(i,:) = [ch(end-1) ch(end) h]; i = i - 1;             
         end 
     end 
     return;
 
 function sC = Z2sC(Z)
 
     nb        = size(Z,1)+1;
     nc        = 2*nb-1;
     sC        = clstruct(nb,nc);
     sC.base   = [1:nb];
     for i=1:nc, 
         j = find(Z(:,1)==i | Z(:,2)==i); 
         sC.parent(i) = nb+j;
         sC.children{sC.parent(i)}(end+1) = i; 
     end 
     % coords and color
     order = nc; 
     nonleaves = 1; 
     while any(nonleaves), 
         j = nonleaves(1); 
         ch = sC.children{order(j)};
         if j==1, oleft = []; else oleft = order(1:(j-1)); end
         if j==length(order), oright = []; else oright = order((j+1):length(order)); end
         order = [oleft, ch, oright];
         nonleaves = find(order>nb); 
     end
     [dummy,co] = sort(order);     
     sC.coord   = derivative_average(sC,co');
     H          = hsv(nb+1);
     sC.color   = derivative_average(sC,H(co,:));    
     return;
     
 function sC = part2sC(part)
 
     nb      = max(part); 
     nc      = nb+1; 
     sC      = clstruct(nb,nc);
     sC.base = part; 
     sC.parent(1:nb) = nc; 
     sC.children{nc} = [1:nb]; 
     co       = [1:nb]'; 
     sC.coord = derivative_average(sC,co);
     H        = hsv(nb+1);
     sC.color = derivative_average(sC,H(1:nb,:));
     return;
 
 function [sC,old2new] = removecluster(sC,ind)
   
     old2new = [1:sC.nc]; 
     parent_ind = sC.parent(ind);
     ch = sC.children{ind};
     if ~parent_ind, 
         % trying to remove root cluster - no go
         return; 
     elseif ~any(ch), 
         % trying to remove a base cluster - no go
         return;
     else
         % ok, proceed
         old2new = [1:ind-1 0 ind:sC.nc-1];
         % update parent and child fields
         sC.parent(ch) = parent_ind;
         sC.children{parent_ind} = setdiff([sC.children{parent_ind}, ch],ind);
         % remove old cluster
         j = [1:ind-1, ind+1:sC.nc]; 
         sC.parent   = sC.parent(j);
         sC.children = sC.children(j);
         sC.color    = sC.color(j,:);
         sC.coord    = sC.coord(j,:);
         sC.nc       = sC.nc-1; 
         % update old indeces to new indices
         sC.parent = old2new(sC.parent);
         for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end
     end     
     return;
 
 function [sC,old2new,inew] = addmergedcluster(sC,inds)
 
     old2new    = [1:sC.nc]; 
     inew       = 0; 
     p_inds     = sC.parent(inds); 
     if ~all(p_inds(1)==p_inds),  
         % clusters are not siblings - no go
         return;
     end
     parent_ind = p_inds(1); 
     if isempty(setdiff(sC.children{parent_ind},inds)),  
         % such a merged cluster exists already
         return;     
     else
         % ok, proceed
         inew = parent_ind;
         old2new = [1:inew-1,inew+1:sC.nc+1];
         % add the new cluster (=copy of parent_ind) 
         j = [1:inew,inew:sC.nc];
         sC.parent   = sC.parent(j);
         sC.children = sC.children(j);
         sC.color    = sC.color(j,:);
         sC.coord    = sC.coord(j,:);
         sC.nc       = sC.nc+1;
         % update old indeces to new indices
         sC.parent = old2new(sC.parent);
         for i=1:sC.nc, sC.children{i} = old2new(sC.children{i}); end
         inds = old2new(inds);
         parent_ind = old2new(parent_ind);
         % update parent, child, color and coord fields
         sC.parent(inds)         = inew; 
         sC.parent(inew)         = parent_ind;
         sC.children{inew}       = inds; 
         sC.children{parent_ind} = [setdiff(sC.children{parent_ind}, inds), inew];
         b = baseind(sC,inew); 
         sC.color(inew,:)        = mean(sC.color(b,:));
         sC.coord(inew,:)        = mean(sC.coord(b,:));
     end    
     return;
     
 function C = derivative_average(sC,Cbase)
 
     [n dim] = size(Cbase);
     if n ~= sC.nb, error('Color / Coord matrix should have nb rows'); end
     C = zeros(sC.nc,dim);     
     for i=1:sC.nc, C(i,:) = mean(Cbase(baseind(sC,i),:)); end   
     return;
     
 function bi = baseind(sC,ind)
 
     bi = [ind]; 
     i = 1; 
     while i<=length(bi), bi = [bi, sC.children{bi(i)}]; end 
     bi = bi(bi<=sC.nb);
     return;