function [Class,P]=knn_old(Data, Proto, proto_class, K) %KNN_OLD A K-nearest neighbor classifier using Euclidean distance % % [Class,P]=knn_old(Data, Proto, proto_class, K) % % [sM_class,P]=knn_old(sM, sData, [], 3); % [sD_class,P]=knn_old(sD, sM, class); % [class,P]=knn_old(data, proto, class); % [class,P]=knn_old(sData, sM, class,5); % % Input and output arguments ([]'s are optional): % Data (matrix) size Nxd, vectors to be classified (=classifiees) % (struct) map or data struct: map codebook vectors or % data vectors are considered as classifiees. % Proto (matrix) size Mxd, prototype vector matrix (=prototypes) % (struct) map or data struct: map codebook vectors or % data vectors are considered as prototypes. % [proto_class] (vector) size Nx1, integers 1,2,...,k indicating the % classes of corresponding protoptypes, default: see the % explanation below. % [K] (scalar) the K in KNN classifier, default is 1 % % Class (matrix) size Nx1, vector of 1,2, ..., k indicating the class % desicion according to the KNN rule % P (matrix) size Nxk, the relative amount of prototypes of % each class among the K closest prototypes for % each classifiee. % % If 'proto_class' is _not_ given, 'Proto' _must_ be a labeled SOM % Toolbox struct. The label of the data vector or the first label of % the map model vector is considered as class label for th prototype % vector. In this case the output 'Class' is a copy of 'Data' (map or % data struct) relabeled according to the classification. If input % argument 'proto_class' _is_ given, the output argument 'Class' is % _always_ a vector of integers 1,2,...,k indiacating the class. % % If there is a tie between representatives of two or more classes % among the K closest neighbors to the classifiee, the class is % selected randomly among these candidates. % % IMPORTANT % % ** Even if prototype vectors are given in a map struct the mask _is not % taken into account_ when calculating Euclidean distance % ** The function calculates the total distance matrix between all % classifiees and prototype vectors. This results to an MxN matrix; % if N is high it is recommended to divide the matrix 'Data' % (the classifiees) into smaller sets in order to avoid memory % overflow or swapping. Also, if K>1 this function uses 'sort' which is % considerably slower than 'max' which is used for K==1. % % See also KNN, SOM_LABEL, SOM_AUTOLABEL % Contributed to SOM Toolbox 2.0, February 11th, 2000 by Johan Himberg % Copyright (c) by Johan Himberg % http://www.cis.hut.fi/projects/somtoolbox/ % Version 2.0beta Johan 040200 %% Init %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % This must exist later classnames=''; % Check K if nargin<4 | isempty(K), K=1; end if ~vis_valuetype(K,{'1x1'}) error('Value for K must be a scalar.'); end % Take data from data or map struct if isstruct(Data); if isfield(Data,'type') & ischar(Data.type), ; else error('Invalid map/data struct?'); end switch Data.type case 'som_map' data=Data.codebook; case 'som_data' data=Data.data; end else % is already a matrix data=Data; end % Take prototype vectors from prototype struct if isstruct(Proto), if isfield(Proto,'type') & ischar(Proto.type), ; else error('Invalid map/data struct?'); end switch Proto.type case 'som_map' proto=Proto.codebook; case 'som_data' proto=Proto.data; end else % is already a matrix proto=Proto; end % Check that inputs are matrices if ~vis_valuetype(proto,{'nxm'}) | ~vis_valuetype(data,{'nxm'}), error('Prototype or data input not valid.') end % Record data&proto sizes and check their dims [N_data dim_data]=size(data); [N_proto dim_proto]=size(proto); if dim_proto ~= dim_data, error('Data and prototype vector dimension does not match.'); end % Check if the classes are given as labels (no class input arg.) % if they are take them from prototype struct if nargin<3 | isempty(proto_class) if ~isstruct(Proto) error(['If prototypes are not in labeled map or data struct' ... 'class must be given.']); % transform to interger (numerical) class labels else [proto_class,classnames]=class2num(Proto.labels); end end % Check class label vector: must be numerical and of integers if ~vis_valuetype(proto_class,{[N_proto 1]}); error(['Class vector is invalid: has to be a N-of-data_rows x 1' ... ' vector of integers']); elseif sum(fix(proto_class)-proto_class)~=0 error('Class labels in vector ''Class'' must be integers.'); end % Find all class labels ClassIndex=unique(proto_class); N_class=length(ClassIndex); % number of different classes % Calculate euclidean distances between classifiees and prototypes d=distance(proto,data); %%%% Classification %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if K==1, % sort distances only if K>1 % 1NN % Select the closest prototype [tmp,proto_index]=min(d); class=proto_class(proto_index); else % Sort the prototypes for each classifiee according to distance [tmp,proto_index]=sort(d); %% Select K closest prototypes proto_index=proto_index(1:K,:); knn_class=proto_class(proto_index); for i=1:N_class, classcounter(i,:)=sum(knn_class==ClassIndex(i)); end %% Vote between classes of K neighbors [winner,vote_index]=max(classcounter); %% Handle ties % set index to clases that got as amuch votes as winner equal_to_winner=(repmat(winner,N_class,1)==classcounter); % set index to ties tie_index=find(sum(equal_to_winner)>1); % drop the winner from counter % Go through equal classes and reset vote_index randomly to one % of them for i=1:length(tie_index), tie_class_index=find(equal_to_winner(:,tie_index(i))); fortuna=randperm(length(tie_class_index)); vote_index(tie_index(i))=tie_class_index(fortuna(1)); end class=ClassIndex(vote_index); end %% Build output %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Relative amount of classes in K neighbors for each classifiee if K==1, P=zeros(N_data,N_class); if nargout>1, for i=1:N_data, P(i,ClassIndex==class(i))=1; end end else P=classcounter'./K; end % xMake class names to struct if they exist if ~isempty(classnames), Class=Data; for i=1:N_data, Class.labels{i,1}=classnames{class(i)}; end else Class=class; end %%% Subfunctions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [nos,names] = class2num(class) % Change string labels in map/data struct to integer numbers names = {}; nos = zeros(length(class),1); for i=1:length(class) if ~isempty(class{i}) & ~any(strcmp(class{i},names)) names=cat(1,names,class(i)); end end tmp_nos = (1:length(names))'; for i=1:length(class) if ~isempty(class{i}) nos(i,1) = find(strcmp(class{i},names)); end end function d=distance(X,Y); % Euclidean distance matrix between row vectors in X and Y U=~isnan(Y); Y(~U)=0; V=~isnan(X); X(~V)=0; d=X.^2*U'+V*Y'.^2-2*X*Y';