Browse code

starting som prediction fine-tuned class-performance visualisation

git-svn-id: https://svn.discofish.de/MATLAB/spmtoolbox/SVMCrossVal@112 83ab2cfd-5345-466c-8aeb-2b2739fb922d

Christoph Budziszewski authored on21/01/2009 16:34:25
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,288 @@
1
+function sData = som_read_data(filename, varargin)
2
+
3
+%SOM_READ_DATA Read data from an ascii file in SOM_PAK format.
4
+%
5
+% sD = som_read_data(filename, dim, [missing])
6
+% sD = som_read_data(filename, [missing])
7
+%
8
+%  sD = som_read_data('system.data');
9
+%  sD = som_read_data('system.data',10);
10
+%  sD = som_read_data('system.data','*');
11
+%  sD = som_read_data('system.data',10,'*');
12
+%
13
+%  Input and output arguments ([]'s are optional): 
14
+%   filename    (string) input file name
15
+%   dim         (scalar) input space dimension
16
+%   [missing]   (string) string which indicates a missing component
17
+%                        value, 'NaN' by default
18
+%
19
+%   sD          (struct) data struct
20
+%
21
+% Reads data from an ascii file. The file must be in SOM_PAK format, 
22
+% except that it may lack the input space dimension from the first
23
+% line. 
24
+%
25
+% For more help, try 'type som_read_data' or check out online documentation.
26
+% See also  SOM_WRITE_DATA, SOM_READ_COD, SOM_WRITE_COD, SOM_DATA_STRUCT.
27
+
28
+%%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
29
+%
30
+% som_read_data
31
+%
32
+% PURPOSE
33
+%
34
+% Reads data from an ascii file in SOM_PAK format.
35
+%
36
+% SYNTAX
37
+%
38
+%  sD = som_read_data(filename)
39
+%  sD = som_read_data(..., dim)
40
+%  sD = som_read_data(..., 'missing')
41
+%  sD = som_read_data(..., dim, 'missing')
42
+%
43
+% DESCRIPTION
44
+%
45
+% This function is offered for compatibility with SOM_PAK, a SOM software
46
+% package in C. It reads data from a file in SOM_PAK format.
47
+%
48
+% The SOM_PAK data file format is as follows. The first line must
49
+% contain the input space dimension and nothing else. The following
50
+% lines are comment lines, empty lines or data lines. Unlike programs
51
+% in SOM_PAK, this function can also determine the input dimension
52
+% from the first data lines, if the input space dimension line is
53
+% missing.  Note that the SOM_PAK format is not fully supported: data
54
+% vector 'weight' and 'fixed' properties are ignored (they are treated
55
+% as labels).
56
+%
57
+% Each data line contains one data vector and its labels. From the beginning
58
+% of the line, first are values of the vector components separated by
59
+% whitespaces, then labels also separated by whitespaces. If there are
60
+% missing values in the vector, the missing value marker needs to be
61
+% specified as the last input argument ('NaN' by default). The missing
62
+% values are stored as NaNs in the data struct. 
63
+% 
64
+% Comment lines start with '#'. Comment lines as well as empty lines are
65
+% ignored, except if the comment lines that start with '#n' or '#l'. In that
66
+% case the line should contain names of the vector components or label names
67
+% separated by whitespaces.
68
+%
69
+% NOTE: The minimum value Matlab is able to deal with (realmax)
70
+% should not appear in the input file. This is because function sscanf is
71
+% not able to read NaNs: the NaNs are in the read phase converted to value
72
+% realmax.
73
+%
74
+% REQUIRED INPUT ARGUMENTS
75
+%
76
+%  filename    (string) input filename
77
+%
78
+% OPTIONAL INPUT ARGUMENTS
79
+%
80
+%  dim         (scalar) input space dimension
81
+%  missing     (string) string used to denote missing components (NaNs); 
82
+%                       default is 'NaN'
83
+%
84
+% OUTPUT ARGUMENTS
85
+%
86
+%  sD   (struct) the resulting data struct
87
+%
88
+% EXAMPLES
89
+%
90
+% The basic usage is:
91
+%  sD = som_read_data('system.data');
92
+%
93
+% If you know the input space dimension beforehand, and the file does
94
+% not contain it on the first line, it helps if you specify it as the
95
+% second argument: 
96
+%  sD = som_read_data('system.data',9);
97
+%
98
+% If the missing components in the data are marked with some other
99
+% characters than with 'NaN', you can specify it with the last argument: 
100
+%  sD = som_read_data('system.data',9,'*')
101
+%  sD = som_read_data('system.data','NaN')
102
+%
103
+% Here's an example data file:
104
+%
105
+% 5
106
+% #n one two three four five
107
+% #l ID
108
+% 10 2 3 4 5 1stline label
109
+% 0.4 0.3 0.2 0.5 0.1 2ndline label1 label2
110
+% # comment line: missing components are indicated by 'x':s
111
+% 1 x 1 x 1 3rdline missing_components
112
+% x 1 2 2 2 
113
+% x x x x x 5thline emptyline
114
+%
115
+% SEE ALSO
116
+%
117
+%  som_write_data   Writes data structs/matrices to a file in SOM_PAK format.
118
+%  som_read_cod     Read a map from a file in SOM_PAK format.
119
+%  som_write_cod    Writes data struct into a file in SOM_PAK format.
120
+%  som_data_struct  Creates data structs.
121
+
122
+% Copyright (c) 1997-2000 by the SOM toolbox programming team.
123
+% http://www.cis.hut.fi/projects/somtoolbox/
124
+
125
+% Version 1.0beta ecco 221097
126
+% Version 2.0beta ecco 060899, juuso 151199
127
+
128
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
129
+%% check arguments
130
+
131
+error(nargchk(1, 3, nargin))  % check no. of input args is correct
132
+
133
+dont_care       = 'NaN';  % default don't care string
134
+comment_start   = '#';    % the char a SOM_PAK command line starts with
135
+comp_name_line  = '#n';   % string denoting a special command line,
136
+                          % which contains names of each component
137
+label_name_line = '#l';   % string denoting a special command line,
138
+                          % which contains names of each label
139
+block_size      = 1000;   % block size used in file read
140
+
141
+kludge          = num2str(realmax, 100); % used in sscanf                
142
+  
143
+
144
+% open input file
145
+
146
+fid = fopen(filename);
147
+if fid < 0
148
+  error(['Cannot open ' filename]); 
149
+end
150
+
151
+% process input arguments
152
+
153
+if nargin == 2 
154
+  if isstr(varargin{1})
155
+    dont_care = varargin{1};
156
+  else
157
+    dim      = varargin{1};
158
+  end
159
+elseif nargin == 3
160
+  dim       = varargin{1};
161
+  dont_care = varargin{2};
162
+end
163
+
164
+% if the data dimension is not specified, find out what it is
165
+
166
+if nargin == 1 | (nargin == 2 & isstr(varargin{1}))
167
+
168
+  fpos1 = ftell(fid); c1 = 0;      % read first non-comment line
169
+  while c1 == 0,
170
+    line1 = strrep(fgetl(fid), dont_care, kludge);
171
+    [l1, c1] = sscanf(line1, '%f ');
172
+  end
173
+
174
+  fpos2 = ftell(fid); c2 = 0;      % read second non-comment line
175
+  while c2 == 0,
176
+    line2 = strrep(fgetl(fid), dont_care, kludge);
177
+    [l2, c2] = sscanf(line2, '%f ');
178
+  end
179
+
180
+  if (c1 == 1 & c2 ~= 1) | (c1 == c2 & c1 == 1 & l1 == 1)
181
+    dim = l1;
182
+    fseek(fid, fpos2, -1);
183
+  elseif (c1 == c2)
184
+    dim = c1;
185
+    fseek(fid, fpos1, -1);
186
+    warning on
187
+    warning(['Automatically determined data dimension is ' ...
188
+	     num2str(dim) '. Is it correct?']); 
189
+  else
190
+    error(['Invalid header line: ' line1]);
191
+  end
192
+end 
193
+
194
+% check the dimension is valid
195
+
196
+if dim < 1 | dim ~= round(dim) 
197
+  error(['Illegal data dimension: ' num2str(dim)]);
198
+end
199
+
200
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
201
+%% read data
202
+
203
+sData       = som_data_struct(zeros(1, dim), 'name', filename); 
204
+lnum        = 0;                                    % data vector counter
205
+data_temp   = zeros(block_size, dim);
206
+labs_temp   = cell(block_size, 1);
207
+comp_names  = sData.comp_names;
208
+label_names = sData.label_names;
209
+form        = [repmat('%g',[1 dim-1]) '%g%[^ \t]'];
210
+
211
+limit       = block_size;
212
+while 1,
213
+  li = fgetl(fid);                         % read next line
214
+  if ~isstr(li), break, end;               % is this the end of file? 
215
+
216
+  % all missing vectors are replaced by value realmax because
217
+  % sscanf is not able to read NaNs  
218
+  li = strrep(li, dont_care, kludge);     
219
+  [data, c, err, n] = sscanf(li, form);
220
+  if c < dim % if there were less numbers than dim on the input file line
221
+    if c == 0
222
+      if strncmp(li, comp_name_line, 2) % component name line?
223
+	li = strrep(li(3:end), kludge, dont_care); i = 0; c = 1;
224
+	while c
225
+	  [s, c, e, n] = sscanf(li, '%s%[^ \t]');
226
+	  if ~isempty(s), i = i + 1; comp_names{i} = s; li = li(n:end); end
227
+	end
228
+
229
+	if i ~= dim 
230
+	  error(['Illegal number of component names: ' num2str(i) ...
231
+		 ' (dimension is ' num2str(dim) ')']); 
232
+	end
233
+      elseif strncmp(li, label_name_line, 2) % label name line?
234
+	li = strrep(li(3:end), kludge, dont_care); i = 0; c = 1;
235
+	while c
236
+	  [s, c, e, n] = sscanf(li, '%s%[^ \t]');
237
+	  if ~isempty(s), i = i + 1; label_names{i} = s; li = li(n:end); end
238
+	end
239
+      elseif ~strncmp(li, comment_start, 1) % not a comment, is it error?
240
+	[s, c, e, n] = sscanf(li, '%s%[^ \t]');
241
+	if c
242
+	  error(['Invalid vector on input file data line ' ...
243
+		 num2str(lnum+1) ': [' deblank(li) ']']),
244
+	end
245
+      end
246
+    else
247
+      error(['Only ' num2str(c) ' vector components on input file data line ' ...
248
+	     num2str(lnum+1) ' (dimension is ' num2str(dim) ')']);
249
+    end
250
+
251
+  else
252
+
253
+    lnum = lnum + 1;                % this was a line containing data vector
254
+    data_temp(lnum, 1:dim) = data'; % add data to struct
255
+
256
+    if lnum == limit       % reserve more memory if necessary
257
+      data_temp(lnum+1:lnum+block_size, 1:dim) = zeros(block_size, dim);
258
+      [dummy nl] = size(labs_temp);
259
+      labs_temp(lnum+1:lnum+block_size,1:nl) = cell(block_size, nl);
260
+      limit = limit + block_size;
261
+    end
262
+    
263
+    % read labels
264
+    
265
+    if n < length(li)
266
+      li = strrep(li(n:end), kludge, dont_care); i = 0; n = 1; c = 1;
267
+      while c
268
+	[s, c, e, n_new] = sscanf(li(n:end), '%s%[^ \t]');
269
+	if c, i = i + 1; labs_temp{lnum, i} = s; n = n + n_new - 1; end
270
+      end
271
+    end
272
+  end
273
+end
274
+
275
+% close input file
276
+if fclose(fid) < 0, error(['Cannot close file ' filename]);
277
+else fprintf(2, '\rdata read ok         \n'); end
278
+
279
+% set values
280
+data_temp(data_temp == realmax) = NaN;
281
+sData.data        = data_temp(1:lnum,:);
282
+sData.labels      = labs_temp(1:lnum,:);
283
+sData.comp_names  = comp_names;
284
+sData.label_names = label_names;
285
+
286
+return;
287
+
288
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0 289
\ No newline at end of file