function d=dataset(varargin)
%DATASET Creator of the dataset class
%    
%    DATA creates an empty data object.
%    
%    DATA(D) clone the data object or fix a broken data object
%    from its struct form.
%    
%    DATA(DATA,'ATTR1',VAL1,'ATTR2',VAL2,...) create a data
%    object. DATA are the data (each piece of data is on a
%    different line). The couples of ATTR1, VAL1, ... are used
%    to set the attributes. 
%    
%    DATA(DATA,SYMBOLS,VARTYPES,LABELS,VARIABLES) create a
%    data object. DATA are the data (each piece of data is on
%    a different line). LABELS is a column vector of the
%    labels (optional) corresponding to each piece of data.
%    VARIABLES [cell],     are the names of the columns of the
%    data. SYMBOLS [cell] are the symbolic correspondance for
%    the symbolic data. VARTYPES is a cell array containing
%    the type of the variable for each column
%    ('continuous','discrete','symbolic','class'). There can
%    only be one 'class' variable at a time. If vartypes is
%    not specified, the types are deduced from the DATA and
%    the symbol field 
%    
%    DATA(CLASSES,DATA,SYMBOLS,VARTYPES,LABELS,VARIABLES)
%    create a data object. DATA are the data (each piece of
%    data is on a different line). LABELS is a column vector
%    of the labels (optional) corresponding to each piece of
%    data. CLASSES contains the class associated with each
%    piece of data. VARIABLES [cell],      are the names of
%    the columns of the data. SYMBOLS [cell] are the symbolic
%    correspondance for the symbolic data.
%    
%    DATA(DATAFILE,VARIABLES,SYMBOLS,VARTYPES,LABELS) create a
%    data object from a mat datafile. DATAFILE is the name of
%    the mat file where the data are saved. The file must
%    contain a series of variables referenced by
%    VARIABLES[cell]. Each of this variable must be a column
%    vector([double] or [cell]) of length equal to the number
%    of data points.
%    
%    The class defines the following methods:
%    
%      ADDCLASS: add a class to the data set (obsolete)
%      ADDFEATURE: Adds a new feature to the data set
%      ADDPOINT: Add a point to the dataset
%      ADDSFEATURE: Adds a new symbolic feature to the data
%          set
%      ADDVARIABLE: Add a new variable(feature) to the data
%          set
%      BOOTSTRAP: BOOTSTRAP creates n new datasets ready for
%          Bagging purpose or cross validation
%      BTSTRAP2: BTSTRAP2 creates n new datasets ready for
%          Bagging purpose or cross validation
%      CLASSGROUP: Groups a few classes into more general
%          classes
%      CLASSPART: selects a part (of the classes) of the
%          data set
%      CSPLIT: Cut the dataset into n pieces for x
%          validation purposes
%      CUT: Cut the dataset into n pieces for x validation
%          purposes
%      NORMALISE: performs a denormalisation of the
%          continuous data field of a dataset
%      DISPLAY: displays the dataset object
%      DOUBLE: converts the dataset object to double
%      EXPORT: Export a data set to a comma delimited text
%          file
%      FS: FS performs a features selection on the dataset d
%      FUZZYKNN: performs fuzzy nearest neightbours
%          algorithm
%      FUZZYKNN2: performs fuzzy nearest neightbours
%          algorithm (matlab release)
%      GET: Access/query dataset property values
%      GETMISS: get missing values from a dataset
%      GFKCLUS: performs Gustaffson Kessel clustering
%      HORZCAT: Horizontal concatenation operator
%      JOIN: joins two data sets
%      KMEANS: performs kMeans clustering
%    <DD>
%      LEAVE1OUT: LEAVE1OUT PERFORMS THE LEAVE-ONE-OUT
%          VALIDATION AND COMPUTES THE CONFUSION MATRIX,
%          ERROR RATES AND KAPPA INSIDE A REPORT STRUCTURE  
%          MKMEANS: PERFORMS MULTI PROTOTYPES CLUSTERING  
%          NORMALISE: NORMALISE PERFORMS THE LINEAR	    
%          NORMALISATION OF A LEARNING DATASET AND	   
%          TRANSFORMS THE CORRESPONDING TESTING DATASET IF  
%    	  IT IS PROVIDED   NORMSTD: NORMSTD PERFORMS A
%          NORMALISATION OF THE	 CONTINUOUS FEATURES OF
%          A LEARNING SET AND A	 TESTING SET   PICK:
%          PICKS RANDOMLY CHOSEN EXAMPLES OUT OF A DATA     
%           SET   PLACEKERNELS: PLACE KERNELS TO BE USED
%          WITH SMOOTHKNN   PLOT: PLOT A DATASET  
%          POSTMINING: COMPUTE THE CONFUSION MATRIX AND
%          ERROR	  RATES BETWEEN SUPERVISION AND
%          PREDICTION   RANDFS1: RANDFS1 : A FEATURES
%          SUBSETS BUILDER BY       SAMPLING WITH
%          REPLACEMENT   RANDFS2: RANDFS2 : A FEATURES
%          SUBSETS BUILDER BY       SAMPLING WITHOUT
%          REPLACEMENT   REMPOINT: REMOVE A POINT FROM A
%          DATA SET	 SET: SET OBJECT PROPERTIES  
%          SIMPLE_KNN: CLASSIFIES A TESTING SET USING KNN  
%          SIMPLE_KNN2: CLASSIFIES THE QUERY USING KNN      
%    	     (MATLAB RELEASE)	SIMPLE_MLP: SIMPLE_MLP
%          PERFORMS A ONE-HIDDEN-LAYER	NEURAL NETWORK
%          CLASSIFICATION   SIMPLEKNN: CLASSIFIES THE QUERY
%          USING KNN   SIMPLEKNN: CLASSIFIES THE QUERY USING
%          KNN   SIZE: GET THE SIZE OF THE DATA SET	
%          SMOOTHKNN: CLASSIFIES THE QUERY USING SMOOTHKNN  
%          SUBSREF: OVERLOAD THE SUBSCRIPT REFERENCE
%          OPERATOR	 SVM: SUPPORT VECTORS MACHINE
%          CLASSIFICATION   SVMINIT: INITIALISE THE SUPPORT
%          VECTORS FOR THE SVM	CLASSIFICATION <DT>
%          VERTCAT: vertical concatenation operator
%    
%    

%MAN_PAGE_BEGIN
%
%	 @purpose	Creator of the dataset class
%
%	 @synopsis	data
%	 @description	creates an empty data object.
%
%	 @synopsis	data(d)
%	 @description	clone the data object or fix a broken data object from its struct form.
%
%	 @synopsis	data(data,'attr1',val1,'attr2',val2,...)
%	 @description	create a data object. <code>data</code> are the data
%	(each piece of data is on a different line). The couples of <code>attr1</code>,
%	<code>val1</code>, ... are used to set the attributes. 
%
%	 @synopsis	data(datafile,variables,'attr1',val1,'attr2',val2)
%	 @description	create a data object from a mat datafile. <code>datafile</code> is the
%	name of the mat file where the data are saved. The file must contain a series of
%	variables referenced by <CODE>variables</CODE>[cell]. Each of this variable must be a
%	column vector([double] or [cell]) of length equal to the number of data points.
%
%MAN_PAGE_END

% By Antoine Duchateau. All Rights Reserved.
% Written: 19/03/99, update 12/09/99 (Patrice Latinne).
calling0=checkargs(varargin,1,'dataset');
calling0b=checkargs(varargin,1,'struct');
calling1=checkargs(varargin,[1 23],'numeric','char','any','char','any','char','any','char','any','char','any',...
							'char','any','char','any','char','any','char','any','char','any','char','any');
calling2=checkargs(varargin,[2 24],'char','cell','char','any','char','any','char','any','char','any','char','any',...
							'char','any','char','any','char','any','char','any','char','any','char','any');

d.data=[];
d.symbols={};
d.vartypes = {};
d.variables={};
d.labels=[];
d.filename = '';
d.opt = '';

d.nv = 0;
d.ns = 0;
if nargin==0,
	d=class(d,'dataset');
elseif isempty(calling0),	%Trivial situation (call with a dataset object as input)
	d=varargin{1};		  
elseif isempty(calling0b),	%Emergency situation (rescue of a damaged/old dataset object)
	arg = struct(varargin{1});
	fn=fieldnames(d);

    for i=1:size(fn,1),
		if isfield(arg,fn{i}),
	  		d=setfield(d,fn{i},getfield(arg,fn{i}));
		end
	end  
    
    d=class(d,'dataset');
    d = synccache(d);

elseif isempty(calling2),	%Data stored in a file
	
	datafile = varargin{1};
	variables = varargin{2};
	
	d.filename = datafile;
	d.variables = variables;
	d.symbols = cell(1,size(varargin{2},2));
	vartypes = cell(1,size(varargin{2},2));
	[vartypes{:}] = deal('continuous');
	d.vartypes = vartypes;
	
	% Check the content of the file
	vars = who('-file',datafile);
	typeinfile = whos('-file',datafile);
	nl = [];
	
	for i = 1:length(variables),
		if ~any(strcmp(variables{i},vars)),
			error(['The variable ' variables{i} ' is missing in the file ' datafile]);
		end
		num = find(strcmp(variables{i},vars));
		
		if isempty(nl),
			nl = typeinfile(num).size(1);
		else
			if typeinfile(num).size(1) ~= nl,
	  			error(['The variable ' variables{i} ' doesn''t have the same dimension as the other variables']);
			end
		end
		
		if typeinfile(num).size(2) ~= 1,
			error(['The variable ' variables{i} ' is not a column vector']);
		end
		
		if strcmp(typeinfile(num).class , 'double'),
			%Do nothing
		elseif strcmp(typeinfile(num).class , 'cell'),
			s = load(datafile,variables{i});
			dd = getfield(s,variables{i});
			clear s
			classcodes = zeros(size(dd,1),1);
			d.vartypes{i} = 'symbolic';
			d.symbols{i} = sortrows(compress(dd))';
			for j=1:length(d.symbols{end}),
				classcodes(find(strcmp(dd,d.symbols{i}{j}))) = j;
			end
			eval([variables{i} '_' ' = classcodes;']);
			save(datafile,[variables{i} '_'],'-APPEND');
			d.variables{i} = [variables{i} '_'];
		else
			error(['The variable ' variables{i} ' is not of type double or cell in the file ' datafile]);
		end
	end
		
	
	% Cache the information regarding the size here because later on
	% it won't be available
	d.nv = length(varargin{2});
	d.ns = nl;
	d.labels = (1:nl)';
	
	d=class(d,'dataset');
	d = synccache(d);
	
	if nargin>2,
		d = set(d,varargin{3:end});
	end
elseif isempty(calling1),
	d.data = varargin{1};
	
	for i=1:size(varargin{1},2),
		d.variables{i} = ['f' num2str(i)];
	end
	d.labels = (1:size(varargin{1},1))';
		
	vartypes = cell(1,size(varargin{1},2));
	[vartypes{:}] = deal('continuous');
	d.vartypes = vartypes;
	
	d.symbols = cell(1,size(varargin{1},2));
	[d.symbols{:}] = deal({});
	d=class(d,'dataset');
	d = synccache(d);
	if nargin>1,
		d = set(d,varargin{2:end});
	end
else
	error(calling1);
end
