/*
%CLQLL  Lazy Learning Algorithm: Local combination of models.
%  
%   Ultra-fast version of the lazy learning algorithm.
%  
%  
%   Local models                   Combination of constant, 
%                                  linear, and quadratic models
%   Kernel functions               rectangular
%   Identification                 recursive mean and variance
%   Metric                         L1
%   Model selection                Minimum of leave-one-out error
%  
%   -------------------------------------------------------------------------
%  
%     
%     How to compile:
%     If a C compiler is correctly installed, and if matlab knows 
%     its path, the function can be compiled from the matlab
%     prompt, as follows:
%     
%  		 >> mex -O clqLL.c
%     
%     How to use:
%     From matlab, the resulting mex file can be called using 
%     the following syntax.
%     
%  		 >> [h,t] = clqLL(X,Y,Q,id_par);
%     
%     where 
%       INPUT:
%              X[n,m]              Examples: Input
%              Y[n,1]              Examples: Ouput
%              Q[q,m]              Query points
%              id_par[_,_]         Identification parameters 
%  				   (Details on dimensions follow)
%        OPTIONAL INPUT:
%  	       cmb_par[_,_]        Combination parameters
%  				   Default --> 1 
%  				   (Details on dimensions follow)
%  	       LAMBDA[1,1]         **ONLY PARTIALLY DOCUMENTED**
%  				   Initialization of the diagonal elements
%  				   of the local variance/covariance matrix
%  				   Default --> 1E6 
%  	       W[1,m]              **ONLY PARTIALLY DOCUMENTED**
%  				   Weights used to evaluate the distances
%  				   Default --> ones(1,m)
%  
%  
%  	  OUTPUT:
%  	       h[q,1]              Prediction with the selected number of 
%  				   neighbors and with the selected
%  				   combination
%              t[_,q]              Selected model for each query  
%  				   (Details on dimensions follow)
%  
%   -------------------------------------------------------------------------
%  
%     The function needs 4 mandatory inputs. If an optional input 
%     parameter is given, also all the preceding parameter MUST be
%     given.  E.G.  if W is specified, also LAMBDA and cmb_par...
%  
%     
%     The identification parameter can assume the following forms:
%   
%  	      1)   id_par[3,3]         | idm0  idM0 valM0 |   
%  				       | idm1  idM1 valM1 |
%  				       | idm2  idM2 valM2 |
%     
%  		   where [idmX,idMX] is the range in which the best
%  		   number of neighbors is searched when identifying
%  		   the local model of degree X and where valMX is the
%  		   maximum number of neighbors used in validation
%  		   for the model of degree X. This means that the
%  		   model of degree X identified with k neighbors,
%  		   is validated un the first v neighbors, where
%  		   v=min(k,valMX).
%
%  	      2)   id_par[3,2]         | idm0  idM0 |   
%  				       | idm1  idM1 |
%  				       | idm2  idM2 |
%     
%  		   where idmX and idMX have the same role they have in 
%  		   point 1, and valMX is by default set to idMX:
%  		   each model is validated on all the neighbors
%  		   used in identification. 
%     
%     
%  	      3)   id_par[3,1]         | c0 |
%  				       | c1 |
%  				       | c2 | 
%     
%  		   Here idmX and idMX are obtained according to the 
%  		   following fotmulas:
%  		    idmX = 3 * no_parX * cX     idMX = 5 * no_parX * cX 
%  		   where no_parX is the number of parameter of the model
%  		   of degree X. Recommended choice: cX = 1.
%                  As far as the valMX are concerned, they get the
%                  default value as in point 2.
%
%
%     If cmb_par is not given, the best model is selected among those
%     identified (as specified by id_par). In this case, the model
%     combination reduces to a simple "model selection".
%     The default value for cmb_par is 1 as it will be clear from what
%     follows.   If given, cmb_par can assume the following to forms:
%  
%  	      1)   cmb_par[3,1]        | cmb0 |
%  				       | cmb1 |
%  				       | cmb2 |
%     
%  		   where cmbX is the number of models of degree X
%  		   that will be included in the local combination. 
%  		   Each local model will be therfore a combination of 
%  		   "the best cmb0 models of degree 0", 
%  		   "the best cmb1 models of degree 1", and
%  		   "the best cmb2 models of degree 1" identified 
%                  as specified by id_par.
%     
%  	      2)   cmb_par[1,1]        | cmb |
%     
%  		   where cmb is the number of models that will be
%  		   combined, disregarding any constraint on the degree
%  		   of the models that will be considered.  Each local
%  		   model will be therfore a combination of "the best
%  		   cmb models", identified as specified by id_par.
%     
%  	      
%     The second output t, is a matrix in which the i-th line contains
%     the parameters of the local model used for the i-th query.
%  
%     *** IMPORTANT REMARK *** a translation of the axes is considered 
%     which centers all the local models in the respective query point.
%  
%      1)  If according to id_par and cmb_par only constant models are 
%      	   considered t[1,q] reduces to a vector in which each "line" 
%          is the single parameter of a constant model.
%
%      2)  If at least one model of degree 1 and no model of degree 2
%          are considered each line of t[d+1,q] is a vector that 
%          containes d+1 parameters: 
%           
%                                | a0 a1 a2 ... | 
%
%          where a0 is the constant term of the model and  a1 is
%          the parameter associated with the 1-st input
%          variable x1, etc...
%
%      3)  If at least one model of degree 2 is considered, each 
%      	   line of t[z,q], where z = (d+1)*(d+2)/2, is a vector that 
%          contains the parameters of a local model with the 
%          following convention:
%      	
%      	      | a0 a1 a2 ... a11 a12 ... a22 a23 ... a33 a34 ... |
%      	
%      	   where a0 a1 a2 ... have the same meaning as in the previous
%      	   case and a11 is the parameter of the quadratic term (x1)^2,
%      	   and a12 is the parameter of the cross-term x1*x2, etc...
%

%   The Lazy Learning Toolbox --- Version 1.0:
%   -------------------------------------------------------------------------
%   Copyright (c) 1999 by Mauro Birattari & Gianluca Bontempi
%   -------------------------------------------------------------------------
%  
%               Mauro Birattari                   Gianluca Bontempi
%                   IRIDIA                             IRIDIA 
%       Universite' Libre de Bruxelles     Universite' Libre de Bruxelles
%              mbiro@ulb.ac.be                    gbonte@ulb.ac.be
%  
%   -------------------------------------------------------------------------
%   Mao: May 7, 1999
*/


#include <mex.h>
#include<math.h>
#include <string.h>


void mexFunction(int nlhs, mxArray *plhs[],
		 int nrhs, const mxArray *prhs[]){

  double LAMBDA;
  int mx, nx, my, ny, mq, nq, mw, nw;
  int nz0, mz1, nz1, mz2, nz2, nz;

  double* id;
  int mid, nid;
  int idm0, idM0, idm1, idM1, idm2, idM2;
  int val0, val1, val2, vl;

  double * cb;
  int tcb, cbA, cb0, cb1, cb2, noI;
  int Go0, Go1, Go2;

  double initDist;
  double* Xvec;
  double* Qvec;
  double* Cvec;
  double* Wvec;
  double*  Y;
  double*  W1;
  double*  W2;
  double* y_hat;
  double* t_hat;
  double* t;
  double** X;
  double** Q;
  double ** C;
  double** Z1;
  double** Z2;
  double* Zvec1;
  double* Zvec2;
  double c_id0, c_id1, c_id2;


  int* BestIndx;
  double* BestDist;
  int noBestDistIndx;
  double dist;

  double* Best_yA;
  double* Best_sA;
  double* Best_tA;
  int noBestA;

  double* Best_y0;
  double* Best_s0;
  int noBest0;

  double* Best_y1;
  double* Best_s1;
  double* Best_t1;
  int noBest1;

  double* Best_y2;
  double* Best_s2;
  double* Best_t2;
  int noBest2;

  double e, b, tmp, sse, eC;
  double* Vvec1;
  double** v1;
  double* t1;
  double* a1;
  double* Vvec2;
  double** v2;
  double* t2;
  double* a2;
  int indx;
  double y, w;
  int i, j, k, q, p, r, m;





  initDist = mxGetInf();
  LAMBDA = 1E6;
  Wvec = NULL;

  if (nrhs<4)
    mexErrMsgTxt("Not enough arguments.");

  // Examples: input
  mx = mxGetM(prhs[0]);                //number of examples
  nx = mxGetN(prhs[0]);             

  // Examples: output
  my = mxGetM(prhs[1]);                //number of examples
  ny = mxGetN(prhs[1]);

  // Queries
  mq = mxGetM(prhs[2]);                //number of queries
  nq = mxGetN(prhs[2]);


  if (  (ny  != 1)                 ||
	(mx  != my)                ||
	(nq  != nx)    )
    mexErrMsgTxt("Matrix dimensions must agree.");

  Xvec = mxGetPr(prhs[0]);
  Y = mxGetPr(prhs[1]);
  Qvec = mxGetPr(prhs[2]);


  
  mid = mxGetM(prhs[3]);
  nid = mxGetN(prhs[3]);

  if ((mid!=3)||(nid>3)||(nid<1))
    mexErrMsgTxt("Illegal id_par.");

  id   = mxGetPr(prhs[3]);
  
  c_id0 = *(id++);
  c_id1 = *(id++);
  c_id2 = *(id++);

  c_id0 = (c_id0>0)? c_id0 : 0;
  c_id1 = (c_id1>0)? c_id1 : 0;
  c_id2 = (c_id2>0)? c_id2 : 0;
    
  Go0 = 0;
  Go1 = 0;
  Go2 = 0;

  if (nid==1){
    
    idM0 = 0;
    idM1 = 0;
    idM2 = 0;
    val0 = 0;
    val1 = 0;
    val2 = 0;

    if (c_id0){
	idM0 = ceil(5*c_id0);
	idm0 = floor(3*c_id0);
    }
    if (c_id1){
	i = nx+1;
	idM1 = ceil(5*i*c_id1);
	idm1 = floor(3*i*c_id1);
    }
    if (c_id2){
	i = (nx+1)*(nx+2)/2;
	idM2 = ceil(5*i*c_id2);
	idm2 = floor(3*i*c_id2);
    }

  }else{
    idm0 = (int)c_id0;
    idm1 = (int)c_id1;
    idm2 = (int)c_id2;

    idM0 = (int)*(id++);
    idM1 = (int)*(id++);
    idM2 = (int)*(id++);

    idM0 = (idM0>0)? idM0 : 0;
    idM1 = (idM1>0)? idM1 : 0;
    idM2 = (idM2>0)? idM2 : 0;


    if (nid==2){
	
	val0 = 0;
	val1 = 0;
	val2 = 0;
  
    }else{

	val0 = (int)*(id++);
	val1 = (int)*(id++);
	val2 = (int)*(id++);
  
	val0 = (val0>0)? val0 : 0;
	val1 = (val1>0)? val1 : 0;
	val2 = (val2>0)? val2 : 0;

	val0 = (val0*(val0<2))? 2 : val0;
	val1 = (val1*(val1<2))? 2 : val1;
	val2 = (val2*(val2<2))? 2 : val2;
    }

  }


  idm0 = (idm0<2)? 2 : idm0;
  idM0 = (idM0>mx)? mx : idM0;
  Go0  = ((idM0-idm0+1)>0)? 1 : 0;
  
  idm1 = (idm1<2)? 2 : idm1;
  idM1 = (idM1>mx)? mx : idM1;
  Go1  = ((idM1-idm1+1)>0)? 1 : 0;
  
  idm2 = (idm2<2)? 2 : idm2;
  idM2 = (idM2>mx)? mx : idM2;
  Go2  = ((idM2-idm2+1)>0)? 1 : 0;





  if (Go0+Go1+Go2==0)
  mexErrMsgTxt("Identification is empty.");

  cbA = 1;
  cb0 = 0;
  cb1 = 0;
  cb2 = 0;

  switch (nrhs){
    case 7:
      mw = mxGetM(prhs[6]);  
      nw = mxGetN(prhs[6]);
      Wvec = mxGetPr(prhs[6]);
    case 6:
      LAMBDA = mxGetScalar(prhs[5]);
    case 5:
      tcb = mxGetM(prhs[4])*mxGetN(prhs[4]);

      if ( (tcb != 3)&&(tcb !=1) )
	mexErrMsgTxt("Combination parameter no good.");

      cb   = mxGetPr(prhs[4]);
      if (tcb == 3){
	cbA = 0;
	cb0 = abs((int)*(cb++));
    	cb1 = abs((int)*(cb++));
    	cb2 = abs((int)*(cb++));
    	cb0 = (cb0<idM0-idm0+1)? cb0 : idM0-idm0+1;
    	cb1 = (cb1<idM1-idm1+1)? cb1 : idM1-idm1+1;
    	cb2 = (cb2<idM2-idm2+1)? cb2 : idM2-idm2+1;

	cb0 *= Go0;
	cb1 *= Go1;
	cb2 *= Go2;

    	Go0 *= cb0;
    	Go1 *= cb1;
    	Go2 *= cb2;

    	if ( Go0+Go1+Go2 == 0)
    	  mexErrMsgTxt("Combination is empty!.");
      }else{
    	cbA = abs((int)*cb);
    	noI = Go0*(idM0-idm0+1)+Go1*(idM1-idm1+1)+Go2*(idM2-idm2+1);
    	cbA = (cbA< noI)? cbA : noI;
    	cb0 = 0;
    	cb1 = 0;
    	cb2 = 0;
    	if (cbA==0)
    	  mexErrMsgTxt("Combination is empty!.");
      }
    case 4:
      break;
    default:
      mexErrMsgTxt("Number of argument no good.");
  }


  if ( nlhs > 1 ){
    t_hat = Xvec;
  }else{
    t_hat = 0;
  }

  // Create Fortran-style matrices from Matlab vectors
  X = mxCalloc(nx,sizeof(double*));
  Q = mxCalloc(nq,sizeof(double*));
  C = mxCalloc(nx,sizeof(double*));
  Cvec = mxCalloc(nx*mx,sizeof(double));



  for (i=0; i<nx; i++,Xvec+=mx,Qvec+=mq,Cvec+=mx){
    X[i] = Xvec;
    Q[i] = Qvec;
    C[i] = Cvec;
  }  


  if (Go0){

    nz0 = 1;
    nz = nz0;

    if (!cbA){
      noBest0 = cb0;
      Best_y0 = mxCalloc(noBest0+1,sizeof(double));
      Best_s0 = mxCalloc(noBest0+2,sizeof(double));
      *Best_s0 = 0;
    }

  }




  if (Go1){

    nz1 = nx+1;
    mz1 = idM1;
    nz = nz1;

    Zvec1 = mxCalloc(mz1*nz1,sizeof(double));
    Z1 = mxCalloc(mz1,sizeof(double*));
    W1 = mxCalloc(mz1,sizeof(double));

    for (i=0; i<mz1; i++,Zvec1+=nz1)
      Z1[i] = Zvec1;

    Vvec1 = mxCalloc(nz1*nz1,sizeof(double));
    v1 = mxCalloc(nz1,sizeof(double*));
    t1  = mxCalloc(nz1,sizeof(double));
    a1  = mxCalloc(nz1,sizeof(double));
    
    for (i=0; i<nz1; i++)
      v1[i] = Vvec1 + i * nz1;

    if (!cbA){
      noBest1 = cb1;
      Best_y1 = mxCalloc(noBest1+1,sizeof(double));
      Best_s1 = mxCalloc(noBest1+2,sizeof(double));
      *Best_s1 = 0;
      if (t_hat)
	Best_t1 = mxCalloc(nz1*(noBest1+1),sizeof(double));
    }
 
  }





  if (Go2){

    nz2 = (nx+1)*(nx+2)/2;
    mz2 = idM2;
    nz = nz2;

    Zvec2 = mxCalloc(mz2*nz2,sizeof(double));
    Z2 = mxCalloc(mz2,sizeof(double*));
    W2 = mxCalloc(mz2,sizeof(double));

    for (i=0; i<mz2; i++,Zvec2+=nz2)
      Z2[i] = Zvec2;

    Vvec2 = mxCalloc(nz2*nz2,sizeof(double));
    v2 = mxCalloc(nz2,sizeof(double*));
    t2  = mxCalloc(nz2,sizeof(double));
    a2  = mxCalloc(nz2,sizeof(double));
    
    for (i=0; i<nz2; i++)
      v2[i] = Vvec2 + i * nz2;

    if (!cbA){
      noBest2 = cb2;
      Best_y2 = mxCalloc(noBest2+1,sizeof(double));
      Best_s2 = mxCalloc(noBest2+2,sizeof(double));
      *Best_s2 = 0;
      if (t_hat)
	Best_t2 = mxCalloc(nz2*(noBest2+1),sizeof(double));
    }
    
  }


  
  if (cbA){
    noBestA = cbA;
    Best_yA = mxCalloc(noBestA+1,sizeof(double));
    Best_sA = mxCalloc(noBestA+2,sizeof(double));
    *Best_sA = 0;
    if (t_hat)
      Best_tA = mxCalloc(nz*(noBestA+1),sizeof(double));
  }



  // allocate output
  plhs[0] = mxCreateDoubleMatrix(mq,1,mxREAL); 
  y_hat = mxGetPr(plhs[0]); 

  if (t_hat){
    plhs[1] = mxCreateDoubleMatrix(nz,mq,mxREAL); 
    t_hat = mxGetPr(plhs[1]);
    t = mxCalloc(nz,sizeof(double));
  }


  //  mexPrintf("t_hat=%d\n",t_hat);
  //  mexPrintf("cbA=%d\t cb0=%d\t cb1=%d\t cb2=%d\n", cbA, cb0, cb1, cb2);
  //  mexPrintf("Go0=%d\t Go1=%d\t Go2=%d\n",Go0, Go1, Go2);


  noBestDistIndx = (idM0<idM1)? idM1 : idM0;
  noBestDistIndx = (idM2>noBestDistIndx)? idM2 : noBestDistIndx;

  BestIndx = mxCalloc(noBestDistIndx+1,sizeof(int));
  BestDist = mxCalloc(noBestDistIndx+2,sizeof(double));
  *BestDist = 0;
  

  for( q=0; q<mq; q++){
    
    for (p=1; p<=noBestDistIndx; p++)
      BestDist[p] = initDist;
    
    if (Wvec){
      for (i=0; i<mx; i++){
	dist = 0.0;
	//Don't break the search
	//for (j=0; j<nx && dist < BestDist[noBestDistIndx] ; j++){
	for (j=0; j<nx; j++){
	  C[j][i] = X[j][i]-Q[j][q];
	  dist += Wvec[j]*fabs(C[j][i]);
	  /* dist += Wvec[j]*pow(C[j][i],2); */
	}
	for(p=noBestDistIndx; dist < BestDist[p] ; p--){
	  BestDist[p+1] = BestDist[p];
	  BestIndx[p] = BestIndx[p-1];
	}
	BestDist[p+1] = dist;
	BestIndx[p] = i;
      }
    } else {
      for (i=0; i<mx; i++){
	dist = 0.0;
	//Don't break the search
	//for (j=0; j<nx && dist < BestDist[noBestDistIndx] ; j++){
	for (j=0; j<nx; j++){
	  C[j][i] = X[j][i]-Q[j][q];
	  dist += fabs(C[j][i]);
	  /* dist += pow(C[j][i],2); */
	}
	for(p=noBestDistIndx; dist < BestDist[p] ; p--){
	  BestDist[p+1] = BestDist[p];
	  BestIndx[p] = BestIndx[p-1];
	}
	BestDist[p+1] = dist;
	BestIndx[p] = i;
      }
    }
    
    // **Mao** check if it's necessary to initialize Best_tA
    if (cbA){
      for (p=1; p<=noBestA; p++)
	Best_sA[p] = initDist;
      if (t_hat)
      for(p=0; p<nz*(noBestA+1) ; p++)
	Best_tA[p] = 0;
    }


    if (Go0){


      if (!cbA)
	for (p=1; p<=noBest0; p++)
	  Best_s0[p] = initDist;

      y = Y[BestIndx[0]];
      eC = 1;

      for (k=1; k<idM0; k++){

	if (val0){
	  y = (k * y + Y[BestIndx[k]])/(k+1);
	  e = 0;
	  vl = (val0<k+1)? val0 : k+1;
	  for (i=0;i<vl;i++)
	    e += pow( y - Y[BestIndx[i]] , 2);
	  eC = e * vl / (vl-1);
	}else{
	  eC = eC*(k+1)*pow(k-1,2)/pow(k,3) + pow(Y[BestIndx[k]]-y,2)/k;
	  y = (k * y + Y[BestIndx[k]])/(k+1);
	}


	if (k>=idm0-1){
	
	  if (cbA){
	  
	    if (t_hat){

	      for(p=noBestA; eC < Best_sA[p] ; p--){
		Best_sA[p+1] = Best_sA[p];
		Best_yA[p] = Best_yA[p-1];
		for( r=0, j=p*nz ; r<nz ; r++,j++ )
		  Best_tA[j] = Best_tA[j-nz];
	      }
	      Best_sA[p+1] = eC;
	      Best_yA[p] = y;
		Best_tA[p*nz] = y;
	      for( r=1,j=p*nz+1  ; r<nz ; r++,j++ )
		Best_tA[j] = 0;

	    }else{
	      for(p=noBestA; eC < Best_sA[p] ; p--){
		Best_sA[p+1] = Best_sA[p];
		Best_yA[p] = Best_yA[p-1];
	      }
	      Best_sA[p+1] = eC;
	      Best_yA[p] = y;
	    }

	  }else{
	    for(p=noBest0; eC < Best_s0[p] ; p--){
	      Best_s0[p+1] = Best_s0[p];
	      Best_y0[p] = Best_y0[p-1];
	    }
	    Best_s0[p+1] = eC;
	    Best_y0[p] = y;
	  }
	}
      }
    }




    if (Go1){


      if (!cbA)
	for (p=1; p<=noBest1; p++)
	  Best_s1[p] = initDist;

      for (i=0; i<nz1*nz1; i++)
	Vvec1[i] = 0.0;
      for (j=0; j<nz1; j++)
	v1[j][j] = LAMBDA;


      Zvec1 = *Z1;
      for(i=0;i<mz1;i++){
	indx = BestIndx[i];
	W1[i] = Y[indx];
	*(Zvec1++) = 1.0;
	for(j=0;j<nx;j++)
	  *(Zvec1++) = C[j][indx];
      }


      


      for(i=0;i<nz1;i++)
	t1[i] = 0.0;

      
      for (k=0; k<idM1; k++){
	e = W1[k];
	b = 1;
	for (i=0; i<nz1; i++){
	  tmp=0;
	  for(j=0; j<nz1; j++)
	    tmp += v1[j][i] * Z1[k][j];
	  a1[i] = tmp;
	  b += Z1[k][i] * tmp;
	  e -= Z1[k][i] * t1[i];
	}
	for (i=0; i<nz1; i++)
	  for(j=0; j<nz1; j++)
	    v1[j][i] -= a1[i] * a1[j] / b;
	for (i=0; i<nz1; i++){
	  tmp=0;
	  for(j=0; j<nz1; j++)
	    tmp += v1[j][i] * Z1[k][j];
	  t1[i] += e * tmp;
	}


	if (k>=idm1-1){
	  vl = (val1*(val1<k+1))? val1 : k+1;
	  sse=0;
	  for(m=0; m<vl; m++){
	    e = W1[m];
	    b = 1;
	    for (i=0; i<nz1; i++){
	      tmp=0;
	      for(j=0; j<nz1; j++)
		tmp += v1[j][i] * Z1[m][j];
	      b -= Z1[m][i] * tmp;
	      e -= Z1[m][i] * t1[i];
	    }
	    sse += pow(e/b,2);
	  }

	  eC = sse / (k+1);



	  if (cbA){
	    if (t_hat){

	      for(p=noBestA; eC < Best_sA[p] ; p--){
		Best_sA[p+1] = Best_sA[p];
		Best_yA[p] = Best_yA[p-1];
		for( r=0, j=p*nz ; r<nz ; r++,j++ )
		  Best_tA[j] = Best_tA[j-nz];
	      }
	      Best_sA[p+1] = eC;
	      Best_yA[p] = t1[0];
	      for( r=0, j=p*nz ; r<nz1 ; r++,j++ )
		Best_tA[j] = t1[r];
	      for(  ; r<nz ; r++,j++ )
		Best_tA[j] = 0;

	    }else{
	      for(p=noBestA; eC < Best_sA[p] ; p--){
		Best_sA[p+1] = Best_sA[p];
		Best_yA[p] = Best_yA[p-1];
	      }	
	      Best_sA[p+1] = eC;
	      Best_yA[p] = t1[0];
	    }
	  }else{

	    if (t_hat){

	      for(p=noBest1; eC < Best_s1[p] ; p--){
		Best_s1[p+1] = Best_s1[p];
		Best_y1[p] = Best_y1[p-1];
		for( r=0, j=p*nz1 ; r<nz1 ; r++,j++ )
		  Best_t1[j] = Best_t1[j-nz1];
	      }
	      Best_s1[p+1] = eC;
	      Best_y1[p] = t1[0];
	      for( r=0, j=p*nz1 ; r<nz1 ; r++,j++ )
		  Best_t1[j] = t1[r];
	    }else{
	      for(p=noBest1; eC < Best_s1[p] ; p--){
		Best_s1[p+1] = Best_s1[p];
		Best_y1[p] = Best_y1[p-1];
	      }
	      Best_s1[p+1] = eC;
	      Best_y1[p] = t1[0];
	    }
	  }
	}
      }
    }



    if (Go2){
 
      if (!cbA)
	for (p=1; p<=noBest2; p++)
	  Best_s2[p] = initDist;

      for (i=0; i<nz2*nz2; i++)
	Vvec2[i] = 0.0;
      for (j=0; j<nz2; j++)
	v2[j][j] = LAMBDA;


      Zvec2 = *Z2;
      for(i=0;i<mz2;i++){
	indx = BestIndx[i];
	W2[i] = Y[indx];
	*(Zvec2++) = 1.0;
	for(j=0;j<nx;j++)
	  *(Zvec2++) = C[j][indx];
	for(p=0;p<nx;p++)
	  for(m=p;m<nx;m++)
	    *(Zvec2++) = C[p][indx]*C[m][indx];
      }

      

      for(i=0;i<nz2;i++)
	t2[i] = 0.0;


      for (k=0; k<idM2; k++){
	e = W2[k];
	b = 1;
	for (i=0; i<nz2; i++){
	  tmp=0;
	  for(j=0; j<nz2; j++)
	    tmp += v2[j][i] * Z2[k][j];
	  a2[i] = tmp;
	  b += Z2[k][i] * tmp;
	  e -= Z2[k][i] * t2[i];
	}
	for (i=0; i<nz2; i++)
	  for(j=0; j<nz2; j++)
	    v2[j][i] -= a2[i] * a2[j] / b;
	for (i=0; i<nz2; i++){
	  tmp=0;
	  for(j=0; j<nz2; j++)
	    tmp += v2[j][i] * Z2[k][j];
	  t2[i] += e * tmp;
	}


	if (k>=idm2-1){
	  vl = (val2*(val2<k+1))? val2 : k+1;
	  sse=0;
	  for(m=0; m<vl; m++){
	    e = W2[m];
	    b = 1;
	    for (i=0; i<nz2; i++){
	      tmp=0;
	      for(j=0; j<nz2; j++)
		tmp += v2[j][i] * Z2[m][j];
	      b -= Z2[m][i] * tmp;
	      e -= Z2[m][i] * t2[i];
	    }
	    sse += pow(e/b,2);
	  }
	  eC = sse / (k+1);

	  if (cbA){
	    if (t_hat){

	      for(p=noBestA; eC < Best_sA[p] ; p--){
		Best_sA[p+1] = Best_sA[p];
		Best_yA[p] = Best_yA[p-1];
		for( r=0, j=p*nz ; r<nz ; r++,j++ )
		  Best_tA[j] = Best_tA[j-nz];
	      }
	      Best_sA[p+1] = eC;
	      Best_yA[p] = t2[0];

	      for( r=0, j=p*nz ; r<nz2 ; r++,j++ )
		Best_tA[j] = t2[r];
	      for(  ; r<nz ; r++,j++ )
		Best_tA[j] = 0;

	    }else{
	      for(p=noBestA; eC < Best_sA[p] ; p--){
		Best_sA[p+1] = Best_sA[p];
		Best_yA[p] = Best_yA[p-1];
	      }
	      Best_sA[p+1] = eC;
	      Best_yA[p] = t2[0];
	    }
	  }else{
	    if (t_hat){
	      for(p=noBest2; eC < Best_s2[p] ; p--){
		Best_s2[p+1] = Best_s2[p];
		Best_y2[p] = Best_y2[p-1];
		for( r=0, j=p*nz2 ; r<nz2 ; r++,j++ )
		  Best_t2[j] = Best_t2[j-nz2];
	      }
	      Best_s2[p+1] = eC;
	      Best_y2[p] = t2[0];
	      for( r=0, j=p*nz2 ; r<nz2 ; r++,j++ )
		  Best_t2[j] = t2[r];
	    }else{
	      for(p=noBest2; eC < Best_s2[p] ; p--){
		Best_s2[p+1] = Best_s2[p];
		Best_y2[p] = Best_y2[p-1];
	      }
	      Best_s2[p+1] = eC;
	      Best_y2[p] = t2[0];
	    }
	  }
	}
      }
    }



    y=0;
    w=0;   

    if (cbA){


      if (t_hat){

	for ( j=0; j<nz; j++ )
	  t[j] = 0;

	
	for(i=0;i<cbA;i++){	
	  e = Best_sA[i+1];
	  e = (e==0)? 1E-20 : e;
	  for ( j=0,r=i*nz; j<nz; j++,r++ )
	    t[j] += Best_tA[r]/e;
	  y += Best_yA[i]/e;
	  w += 1/e;
	}
	

      }else{
	for(i=0;i<cbA;i++){	
	  e = Best_sA[i+1];
	  e = (e==0)? 1E-20 : e;
	  y += Best_yA[i]/e;
	  w += 1/e;
	}
      }

    }else{
      if (t_hat){
	
	for ( j=0; j<nz; j++ )
	  t[j] = 0;



	for(i=0;i<cb0;i++){
	  e = Best_s0[i+1];
	  e = (e==0)? 1E-20 : e;
	  t[0] += Best_y0[i]/e;
	  y += Best_y0[i]/e;
	  w += 1/e;      
	}


	for(i=0;i<cb1;i++){
	  e = Best_s1[i+1];
	  e = (e==0)? 1E-20 : e;
	  for ( j=0,r=i*nz1; j<nz1; j++,r++ )
	    t[j] += Best_t1[r]/e;
	  y += Best_y1[i]/e;
	  w += 1/e;
	}



	for(i=0;i<cb2;i++){
	  e = Best_s2[i+1];
	  e = (e==0)? 1E-20 : e;
	  for ( j=0,r=i*nz2; j<nz2; j++,r++ )
	    t[j] += Best_t2[r]/e;
	  y += Best_y2[i]/e;
	  w += 1/e;
	}


      }else{

	for(i=0;i<cb0;i++){
	  e = Best_s0[i+1];
	  e = (e==0)? 1E-20 : e;
	  y += Best_y0[i]/e;
	  w += 1/e;      
	}

	
	for(i=0;i<cb1;i++){
	  e = Best_s1[i+1];
	  e = (e==0)? 1E-20 : e;
	  y += Best_y1[i]/e;
	  w += 1/Best_s1[i+1];
	}
	
	for(i=0;i<cb2;i++){
	  e = Best_s2[i+1];
	  e = (e==0)? 1E-20 : e;
	  y += Best_y2[i]/e;
	  w += 1/e;
	}
	
      }
    }
    

    if (t_hat)
      for ( i=0 ; i<nz ; i++)
	t_hat[q+i] = t[i]/w;

    y_hat[q] = y/w;
    
 }

}


