/***********************************************************************

    This file is part of
    FEINS, Finite Element Incompressible Navier-Stokes solver,
    which is expanding to a more general FEM solver and toolbox,
    Copyright (C) 2003--2008, Rene Schneider 
    <rene.schneider@mathematik.tu-chemnitz.de>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program. If not, see <http://www.gnu.org/licenses/>.

    Minor contributions to this program (for example bug-fixes and
    minor extensions) by third parties automatically transfer the
    copyright to the general author of FEINS, to maintain the
    possibility of commercial re-licensing. If you contribute but wish
    to keep the copyright of your contribution, make that clear in
    your contribution!

    Non-GPL licenses to this program are available upon request from
    the author.

************************************************************************/
#include <math.h>
#ifdef fmax
#warning "math.h"
#endif

#include "feins_macros.h"
#include "datastruc.h"
#include "mesh.h"
#include "navsto_solver.h"

#include <unistd.h>
#include <string.h>

  /* for gettimeofday */
#include <sys/types.h>
#include <sys/time.h>


#define TIMEGET {\
   gettimeofday(&tv,&tz); \
   sec=tv.tv_sec; \
   musec=tv.tv_usec; \
   ti=((double)sec+((double)musec)*1e-6); }


int navsto_solver_FDR(struct mesh *msh1, FIDX lvl_max,
		      struct vector *solref,
		      struct vector *pccrit,
		      struct vector *DIDF,
		      struct vector *Aspar,
		      struct vector *bspar,
		      FIDX A_n, FIDX A_m,
		      struct vector *rpar,
		      double h);

int main(int argc, char *argv[])
{
  struct mesh msh1;

  struct vector solref;
  struct vector pccrit;
  struct vector spar;
  struct vector Aspar, bspar, rpar0, DIDrpar;
  struct vector *DIDF;
  struct vector DIDrpar_FD, DIDrpar_FDR;

  struct timeval tv;
  struct timezone tz;
  int sec, musec;
  double ti, tstart, tADJ, tFD, tFDR;
  FIDX i_par;

  double h=1e-5;
  
  char *buffer;

  int  err, ihlp;
  FIDX i, j, A_n, A_m, level, level0, lvl;

  if (argc<3+1) 
    {
      printf("\n"
	     "%s: not enough arguments\n\n"
	     "usage:\n\n"
	     "   %s meshfile.f1m lvl0 lvl1 [para_file]\n"
	     "\n", argv[0], argv[0]);
      printf("       lvl0 - number of refinements before seting base mesh\n"
	     "       lvl1 - total number of refinements, "
	     "lvl1>lvl0 is required\n"
	     "  para_file - if provided the shape parameters in the\n"
	     "              mesh file are overriden\n"
	     "\n");
      printf("The computed solution will be in meshfile.f1m_sol,\n"
	     "the criteria in meshfile.f1m_crit and the sensitivities\n"
	     "in meshfile.f1m_grad\n");
      return FAIL;
    }

  printf("meshfile: %s      STABTYPE:%d ", argv[1], STABTYPE);
#ifdef RAMAGE_NOT_TEZDUYAR
  printf("  stab-par: Ramage\n");
#else
  printf("  stab-par: Tezduyar\n");
#endif
  printf("          FIDX size= %d     pointer size = %d\n",
	 sizeof(FIDX), sizeof(FIDX *) );

  if (strcmp(argv[1],"visual/tobs_eval.f1m")!=0)
    {
      fprintf(stderr, "this is a special version of test_navsto_solver\n"
	      "to compute the derivatives example for the paper\n"
	      "Schneider and Jimack, On the evaluation of Finite Element\n"
	      "Sensitivities to nodal coordinates.\n"
	      "The meshfile must be visual/tobs_eval.f1m\n"
	      "with matching files visual/tobs_eval_Aspar,\n"
	      "visual/tobs_eval_bspar, visual/tobs_eval_rpar0.\n");
      exit(1);
    }

  vector_init(&spar);
  
  TRY_MALLOC(buffer, strlen(argv[1])+10, char, main);

  level0  = -1;
  level   = -1;
  if (sscanf(argv[2],"%d",&ihlp)==1) level0=(FIDX) ihlp;
  if (sscanf(argv[3],"%d",&ihlp)==1) level =(FIDX) ihlp;

  if ((level0<0)||(level<=level0))
    {
      fprintf(stderr, "test_navsto_solver:"
	      "lvl0 and/or lvl1 invalid\n"
	      "(call without arguments for help)\n");
      return FAIL;
    }

  /* in this version spar is always generated by 
     spar = Aspar*rpar0 +  bspar load these */
  err=vector_read_file(&Aspar, "visual/tobs_eval_Aspar");
  FUNCTION_FAILURE_HANDLE( err, vector_read_file, main);
  err=vector_read_file(&bspar, "visual/tobs_eval_bspar");
  FUNCTION_FAILURE_HANDLE( err, vector_read_file, main);
  err=vector_read_file(&rpar0, "visual/tobs_eval_rpar0");
  FUNCTION_FAILURE_HANDLE( err, vector_read_file, main);

  A_n=42; A_m=10;
  if (((Aspar.len!=A_n*A_m)||(bspar.len!=A_n))||(rpar0.len!=A_m))
    {
      fprintf(stderr, "hard coded A_n and A_m dont match data in files\n");
      return FAIL;
    }
  else
    {
      err=vector_alloc(&spar, A_n);
      FUNCTION_FAILURE_HANDLE( err, vector_alloc, main);
      
      /* spar= bspar + Aspar*rpar0 */
      for (i=0; i<A_n; i++)
	{
	  spar.V[i]=bspar.V[i];
	  for (j=0; j<A_m; j++)
	    {
	      /* Aspar was read and thus stored line by line */
	      spar.V[i]+=Aspar.V[i*A_m + j]*rpar0.V[j];
	    }
	}
    }

  /* tell navsto_solver we don't have a reference solution */
  solref.len=0;
  /* { 
     err=vector_read_file(&solref, "divider_n.f1m_sol");
     FUNCTION_FAILURE_HANDLE( err, vector_read_file, main);
     } /* */
  
  err=mesh_read_file_t1( &msh1, argv[1] ); /* */
  FUNCTION_FAILURE_HANDLE( err, mesh_read_file_t1, main);

  for (lvl=0; lvl<level0 ; lvl++)
    {
      err=mesh_refine_uniform_t1( &msh1 );
      FUNCTION_FAILURE_HANDLE( err, mesh_refine_uniform_t1, main);
    }

  /* reset hierarchy */
  msh1.hi_nr=0;
  for (i=0; i<msh1.eg_nr; i++)
    {
      msh1.edge[i*msh1.eg_w+MCT1EGLVL]=-1;
    }

  /* if spar is set, see if it is compatible, and replace spar in the
     mesh */
  if (spar.len!=0)
    {
      if (spar.len==msh1.sp_nr)
	{
	  free(msh1.spar);
	  msh1.spar=spar.V;
	  msh1.sp_max=msh1.sp_nr;
	  vector_init(&spar);
	}
      else
	{
	  fprintf(stderr,"main: the provided shape parameter vector "
		  "has wrong size\n");
	  return FAIL;
	}
    }

  /* err=mesh_write_ssegs_svg_tx( &msh1, 18, "visual/shape_segs", 1);
     FUNCTION_FAILURE_HANDLE( err, mesh_write_ssegs_svg_tx, main);  
     fprintf(stderr,"plot only --> stop!\n"); exit(1); /* */

  /* allocate space for the performance criteria */
  err=vector_alloc(&pccrit, msh1.pc_nr);
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, main);

  /* allocate memory for the sensitivities */
  TRY_MALLOC(DIDF, msh1.pc_nr, struct vector, main);


  TIMEGET;
  tstart=ti;

  /* solve */
  err=navsto_solver(&msh1, level-level0, &solref, &pccrit, DIDF);
  FUNCTION_FAILURE_HANDLE(err, navsto_solver, main);

  
  /* compute DIDrpar */
  err=vector_alloc(&DIDrpar, A_m);
  FUNCTION_FAILURE_HANDLE(err, vector_alloc, main);
  /* DIDrpar = Aspar^T*DIDF */
  for (j=0; j<A_m; j++)
    {
      DIDrpar.V[j]=0.0;
      for (i=0; i<A_n; i++)
	{
	  /* Aspar was read and thus stored line by line */
	  DIDrpar.V[j]+=Aspar.V[i*A_m + j]*DIDF[0].V[i];
	}
    }

  TIMEGET;
  tADJ=ti-tstart;


  /* finite difference check of DIDrpar */
  err=vector_alloc(&DIDrpar_FD, A_m);
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, main);
  if (1==1)
  { 
    struct vector pccrit_ph, pccrit_mh;

    /* reset the reference solution, so it is recomputed in the FD test */
    vector_free(&solref);
    solref.len=0;

    TIMEGET;
    tstart=ti;

    
    err=vector_alloc(&pccrit_ph, msh1.pc_nr);
    FUNCTION_FAILURE_HANDLE( err, vector_alloc, main);
    err=vector_alloc(&pccrit_mh, msh1.pc_nr);
    FUNCTION_FAILURE_HANDLE( err, vector_alloc, main);

    /* initial solve of the nonlinear system */
    err=navsto_solver(&msh1, level-level0, &solref, &pccrit, NULL);
    FUNCTION_FAILURE_HANDLE(err, navsto_solver, main);

     for (i_par=0; i_par<A_m; i_par++)
      {
	double par_save;
	par_save=rpar0.V[i_par];

	rpar0.V[i_par] = par_save + h;
	/* spar= bspar + Aspar*rpar0 */
	for (i=0; i<A_n; i++)
	  { msh1.spar[i]=bspar.V[i];
	    for (j=0; j<A_m; j++)
	      msh1.spar[i]+=Aspar.V[i*A_m + j]*rpar0.V[j]; }
	err=navsto_solver(&msh1, level-level0, &solref, &pccrit_ph, NULL);
	FUNCTION_FAILURE_HANDLE(err, navsto_solver, main);


	rpar0.V[i_par] = par_save - h;
	/* spar= bspar + Aspar*rpar0 */
	for (i=0; i<A_n; i++)
	  { msh1.spar[i]=bspar.V[i];
	    for (j=0; j<A_m; j++)
	      msh1.spar[i]+=Aspar.V[i*A_m + j]*rpar0.V[j]; }
	err=navsto_solver(&msh1, level-level0, &solref, &pccrit_mh, NULL);
	FUNCTION_FAILURE_HANDLE(err, navsto_solver, main);

	DIDrpar_FD.V[i_par]=(pccrit_ph.V[0]-pccrit_mh.V[0])/(2.0*h);
	  
    	rpar0.V[i_par] = par_save;
      }

    TIMEGET;
    tFD=ti-tstart;
    printf("\n  tFD=%8.1e\n\n", tFD);

    for (i_par=0; i_par<A_m; i_par++)
	{
	  printf("[%4d]  DIDrpar=%10.3e   FD=%10.3e   diff=%8.1e  "
		 " rel=%8.1e\n",
		 (int) i_par,
		 DIDrpar.V[i_par],DIDrpar_FD.V[i_par],
		 DIDrpar.V[i_par]-DIDrpar_FD.V[i_par],
		 (DIDrpar.V[i_par]-DIDrpar_FD.V[i_par])/fabs(DIDrpar.V[i_par]));
	}


    printf("\n\nif results differ check first that "
	   "nu = nu_file in all calls\n");

    
    vector_free(&pccrit_ph);
    vector_free(&pccrit_mh);
  }




  /* reset the reference solution, so it is recomputed in the FDR test */
  vector_free(&solref);
  solref.len=0;

  TIMEGET;
  tstart=ti;

  /* Adjoint method with finite differences for the residual of the PDE */
  err=navsto_solver_FDR(&msh1, level-level0, &solref, &pccrit,
			&DIDrpar_FDR, &Aspar, &bspar, A_n, A_m, &rpar0, h);
  FUNCTION_FAILURE_HANDLE(err, navsto_solver_FDR, main);

  TIMEGET;
  tFDR=ti-tstart;
  printf("\n  tFDR=%8.1e\n\n", tFDR);


  printf("\n  tADJ=%8.1e   tFD=%8.1e  tFDR=%8.1e\n\n", tADJ, tFD, tFDR);
  
  for (i_par=0; i_par<A_m; i_par++)
    {
      printf("[%4d]  DIDrpar=%10.3e   FD=%10.3e   FDR=%10.3e   diff_FD=%8.1e  "
	     " rel=%8.1e\n",
	     (int) i_par,
	     DIDrpar.V[i_par],DIDrpar_FD.V[i_par],DIDrpar_FDR.V[i_par],
	     DIDrpar.V[i_par]-DIDrpar_FD.V[i_par],
	     (DIDrpar.V[i_par]-DIDrpar_FD.V[i_par])/fabs(DIDrpar.V[i_par]));
    }


  /* write results to files */
  /* criteria */
  strcpy(buffer, argv[1]);
  strcat(buffer,"_crit");
  err=vector_write_file(&pccrit, buffer);
  FUNCTION_FAILURE_HANDLE( err, vector_write_file, main);

  /* derivatives */
  strcpy(buffer, argv[1]);
  strcat(buffer,"_grad");
  err=vector_n_write_file(msh1.pc_nr, DIDF, buffer);
  FUNCTION_FAILURE_HANDLE( err, vector_n_write_file, main);

  /* reference solution */
  strcpy(buffer, argv[1]);
  strcat(buffer,"_sol");
  err=vector_write_file(&solref, buffer);
  FUNCTION_FAILURE_HANDLE( err, vector_write_file, main);


  vector_free(&pccrit);
  vector_free(&solref);
  vector_free(&Aspar);
  vector_free(&bspar);
  vector_free(&rpar0);

  vector_free(&DIDrpar);
  vector_free(&DIDrpar_FD);
  vector_free(&DIDrpar_FDR);

  for (i=0; i<msh1.pc_nr; i++)
    {
      vector_free(&DIDF[i]);
    }
  free(DIDF);

  mesh_free(&msh1);
  free(buffer);

  return SUCCESS;
}


/********************************************************************
 ********************************************************************
 **                                                                **
 ** modified navsto_solver routine to allow comparison FD_R        **
 **                                                                **
 ********************************************************************
 ********************************************************************/

#include <math.h>
#include "feins_macros.h"
#include "datastruc.h"
#include "mesh.h"
#include "sparse.h"
#include "lin_solver.h"

#include "navsto_struct.h"
#include "stokes_aux.h"
#include "navsto_aux.h"
#include "navsto_adj.h"
#include "navstoassem.h"

#include "assembly.h"
#include "mesh_deform.h"

#include <unistd.h>
#include <string.h> 


#define VERBOSE 4

/*FUNCTION*/
int navsto_solver_FDR(struct mesh *msh1, FIDX lvl_max,
		      struct vector *solref,
		      struct vector *pccrit,
		      struct vector *DIDrpar,
		      struct vector *Aspar,
		      struct vector *bspar,
		      FIDX A_n, FIDX A_m,
		      struct vector *rpar,
		      double h
/*
   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid */
		  ){
  struct navsto_matrix K;
  struct vector rhs_stokes, rhs, x, x_old, y, z, sol, soli;
  struct mesh msh2;

  struct timeval tv;
  struct timezone tz;
  int sec, musec;
  double t1, t2, t3, ti, tstart, tsolve, tadj;
  FILE *mark;

  char solver[15];
  FIDX dim, vx_nr, i, j, d, step, linmeth, nu_chng;
  int  iter, err;

  FIDX lvl, lvl_min;

  double resi, resnl_old, resnl, normy, deltax;
  double leps, nleps, nleps_coarse, nleps_final, nleps_oldresnl;

  double impeps, innereps;
  double adjeps, adjinnereps;
  
  double nu, nu0, nu_akt, dnu, maxPe;

  double *vx_new;
  FIDX    vx_max;

  int use_SDFEM=0;

  int m=100;


  TIMEGET;
  tstart=ti;


  nu=msh1->para[MC2XPANUPO];

  /* set the goals and methods */
  linmeth = 1;


  nleps_coarse   = 1e-1;
  nleps_final    = 1e-6;
  nleps_oldresnl = 1e-8;
  impeps   = 1e-5;
  innereps = 1e-6; /* 1e-6 appears to be more robust for high Re than 1e-3 */ 

  adjeps      = 1e-6; 
  adjinnereps = 1e-6;
  /* fprintf(stderr,"for tests: adjeps=1e-5\n"); adjeps      = 1e-5; */

  dnu     = 0.5;
  dim=msh1->dim;

  /* spar= bspar + Aspar*rpar */
  for (i=0; i<A_n; i++)
    { msh1->spar[i]=bspar->V[i];
      for (j=0; j<A_m; j++)
	msh1->spar[i]+=Aspar->V[i*A_m + j]*rpar->V[j]; }

  /* initialise the deformation */
  err=mesh_deform_t1_t2(msh1, lvl_max, &vx_new, &vx_max, 0, NULL);
  FUNCTION_FAILURE_HANDLE( err, mesh_deform_t2_t1, navsto_solver);

  /* null define msh2 */
  mesh_init( &msh2);

  /* init the stiffness matrix */
  err=navsto_matrix_init( &K, dim, lvl_max+1);
  FUNCTION_FAILURE_HANDLE( err, navsto_matrix_init, navsto_solver);

  
  if (solref->len==0)
    {
      /* start from scratch */
      lvl_min=0;
    }
  else
    {
      /* use the the provided initial solution */
      lvl_min=lvl_max;
    }

  for (lvl=lvl_min; lvl<=lvl_max; lvl++)
    {

      if (lvl==lvl_min)
	{
	  /* initial assem init */
	  if (!use_SDFEM)
	    {

	      if (solref->len==0)
		{
		  /* get the maximal stable Re on the coarse mesh */
		  err=navsto_maxnu_init_t21(msh1, &nu0);
		  FUNCTION_FAILURE_HANDLE(err, navsto_maxnu_init_t21,
					  navsto_solver);
	      
		  nu_akt=fmax(nu, nu0); 
		  msh1->para[MC2XPANUPO]=nu_akt; 
		  if (VERBOSE>0)
		    {
		      printf("nu=%8.1e     nu0=%8.1e     nu_akt=%8.1e\n",
			     nu, nu0, nu_akt);
		    }
		  nu_chng=0;
		}
	      else 
		{
		  nu_akt  = nu;
		  nu0     = nu;
		  nu_chng = 0;
		}

	      err=navsto_MG_init_assem_t21( msh1, lvl, &K, &rhs_stokes,
					    &rhs, &x, &msh2,
					    vx_new, vx_max );
	      FUNCTION_FAILURE_HANDLE( err, navsto_MG_init_assem_t21,
				       navsto_solver);
	    }
	  else
	    {
	      nu_akt  = nu;
	      err=navsto_MG_init_assem_SDFEM_t21( msh1, lvl, &K, &rhs,
						  &x, &msh2,
						  vx_new, vx_max );
	      FUNCTION_FAILURE_HANDLE( err, navsto_MG_init_assem_SDFEM_t21,
				       navsto_solver);
	      
	      err=vector_alloc( &rhs_stokes, x.len );
	      FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_solver);
	      for (i=0; i<rhs_stokes.len; i++) { rhs_stokes.V[i]=0.0; }
	    }
	  vx_nr=msh2.vx_nr;

	  err=vector_alloc( &x_old, x.len );
	  FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_solver);

	  /* if solref is provided do
	     x_old       = sol_ref,
	     x_old(diri) = x(diri) (just to be sure)
	     x           = x_old */
	  if (solref->len!=0)
	    {
	      if (solref->len!=x.len)
		{
		  fprintf(stderr, "navsto_solver: "
			  "provided solref has wrong length\n");
		  return FAIL;
		}
	      /* x_old       = sol_ref */
	      for (i=0; i<x.len; i++)
		x_old.V[i]=solref->V[i];

	      /* x_old(diri) = x(diri) (just to be sure) */
	      for (i=0; i<K.bn_nr; i++)
		for (d=0; d<dim; d++)
		  {
		    if (x_old.V[K.nodes[i]+d*vx_nr]!=x.V[K.nodes[i]+d*vx_nr])
		      printf("xref node %d, dim %d is strange\n",
			     (int) K.nodes[i], (int) d);
		    x_old.V[K.nodes[i]+d*vx_nr]=x.V[K.nodes[i]+d*vx_nr];
		  }

	      /* x           = x_old */
	      for (i=0; i<x.len; i++)
		x.V[i]=x_old.V[i];
	    } /* end init with solref */
	}
      else /* if (lvl!=lvl_min) */
	{
	  /* copy the old solution, used for the interpolation */
	  for (i=0; i<x.len; i++)
	    { x_old.V[i]=x.V[i];}

	  /* update nu */
	  nu_akt=fmax(nu, maxPe*1.3*nu_akt*dnu);
	  /* #warning "higher Pe"
	     nu_akt=fmax(nu, 0.125*maxPe*1.3*nu_akt*dnu); /* */
	  if (msh1->para[MC2XPANUPO]!=nu_akt)
	    {
	      msh1->para[MC2XPANUPO]=nu_akt; 
	      nu_chng=1;
	    }

	  /* refine, init on finer mesh, with larger Re */
	  err=navsto_MG_ref_intp_ass_t21( msh1, lvl, &msh2, vx_new, vx_max,
					  &K, &rhs_stokes, &rhs,
					  &x_old, &x );
	  FUNCTION_FAILURE_HANDLE( err, navsto_MG_ref_intp_ass_t21,
				   navsto_solver); 

	  vx_nr=msh2.vx_nr;
	} /* end if (lvl>0) */
    
      err=vector_alloc( &y, x.len );
      FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_solver);
      err=vector_alloc( &z, x.len );
      FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_solver);
      err=vector_alloc( &sol, (dim+1)*K.vx_nr );
      FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_solver);

      if (lvl==lvl_max)
	nleps=nleps_final;
      else
	nleps=nleps_coarse;

      printf("nleps=%e\n",nleps);

      /* the nonlinear loop */
      resi=1.0; resnl=1.0; deltax=1.0; step=0; resnl_old=1e300;
      while (((deltax>nleps)||(resi>nleps))
	     &&((resnl>nleps_oldresnl)&&(step<100)))
	{
	  /* save the old x for the computation of deltax */
	  for (i=0; i<x.len; i++)
	    { x_old.V[i]=x.V[i]; }

	  TIMEGET;
	  t1=ti;

	  if (!use_SDFEM)
	    {
	      err=navsto_MG_C_assem_t21( msh1, lvl, &K, &rhs, &maxPe, 
					 &rhs_stokes, &x, &msh2, 
					 vx_new, vx_max,
					 linmeth );
	      FUNCTION_FAILURE_HANDLE( err, navsto_MG_C_assem_t21,
				       navsto_solver);
	    }
	  else
	    {      
	      err=navsto_MG_assem_SDFEM_t21( msh1, lvl, &K, &rhs, &x, 
					     &msh2, vx_new, vx_max );
	      FUNCTION_FAILURE_HANDLE( err, navsto_assem_SDFEM_t21,
				       navsto_solver);
	      maxPe=0.0;
	    }

	  TIMEGET;
	  t2=ti;

	  /* test the residual */
	  /* y=K*x */
	  err=navsto_matrix_tim_vec( &K, &x, &y);
	  FUNCTION_FAILURE_HANDLE( err, navsto_matrix_tim_vec,
				   navsto_solver);  
	  /* z=K*x-rhs, get norm(y) */
	  for (i=0; i<x.len; i++)
	    {
	      z.V[i]=y.V[i]-rhs.V[i];
	    }
	  /* apply the projector */
	  err=navsto_projector_no_precon( &K, &z, NULL, &y);
	  FUNCTION_FAILURE_HANDLE( err, navsto_projector_no_precon, 
				   navsto_solver);  
	  normy=0.0;
	  for (i=0; i<x.len; i++)
	    {
	      normy+= y.V[i]*y.V[i];
	    }
	  resnl=sqrt(normy); /* */
	  
	  if (VERBOSE>3)
	    {
	      printf("maxPe= %8.1e ", maxPe);
	      printf("|res_nl|=%12.6e, ", resnl); /* */
	      printf("t_a=%8.2e ", t2-t1); /* */
	      printf("\n"); /* */
	    }

	  /* before we solve, output the matrices to files (for
	     external analysis */
	  /* { 
	     char matname[50];
	     sprintf(matname,  "visual/savemat_%d",lvl); 
	     err=navsto_matrices_write_files( &K, matname);
	     FUNCTION_FAILURE_HANDLE(err, navsto_matrices_write_files,
	     navsto_solver);
	     }/* */


	  resi=0.0;
	  iter=0;

	  /* set stoping criterion for linear solver */
	  leps=fmax(impeps, 1e-2 * nleps_oldresnl/ resnl); 

	  if (!use_SDFEM)
	    {
	      K.innereps=innereps;
	      K.mg->vcycles=1;
	      K.mg->smooths=1;
	      K.mg->stop_eps=0.0;

	      strcpy(solver, "GMRES_Fp");
	      err=GMRES( m, 100, 2, leps, 1, &x, &resi, &iter,
			 navsto_matrix_tim_vec, navsto_projector_w_precon,
			 &K, &rhs, NULL ); /* */
	    }
	  else
	    {
	      strcpy(solver, "GMRES_mg");
	      err=GMRES( m, 100, 2, leps, 1, &x, &resi, &iter,
			 navsto_matrix_tim_vec, navsto_mg_SDFEM_precon,
			 &K, &rhs, NULL ); /* */
	    }
	  FUNCTION_FAILURE_HANDLE(err, GMRES, navsto_solver);
	  

	  TIMEGET;
	  t3=ti;

	  /* test the residual */
	  /* y=K*x */
	  err=navsto_matrix_tim_vec( &K, &x, &y);
	  FUNCTION_FAILURE_HANDLE( err, navsto_matrix_tim_vec, 
				   navsto_solver);  
	  /* z=K*x-rhs, get norm(y) */
	  normy=0.0;
	  for (i=0; i<x.len; i++)
	    {
	      z.V[i]=y.V[i]-rhs.V[i];
	    }
	  /* apply the projector */
	  err=navsto_projector_no_precon( &K, &z, NULL, &y);
	  FUNCTION_FAILURE_HANDLE( err, navsto_projector_no_precon, 
				   navsto_solver);  
	  for (i=0; i<x.len; i++)
	    {
	      normy+= y.V[i]*y.V[i];
	    }
	  resi=sqrt(normy); /* */


	  /* compute deltax */
	  deltax=0.0;
	  for (i=0; i<x.len; i++)
	    { deltax+=(x_old.V[i]-x.V[i])*(x_old.V[i]-x.V[i]); }
	  deltax=sqrt(deltax);

	  /* increase the step counter */
	  step++;

	  if ((VERBOSE>3)
	      ||((VERBOSE>1)
		  &&( (deltax<=nleps)&&(resi<=nleps) ) ))
	    {
	      printf("nu %8.1e st %3d  %8s: N=%6d, it=%4d, "
		     "i_it: %4d %4d %4d %4d err=%2d, |res_l|=%12.6e, "
		     "|dx|=%9.3e, ",
		     nu_akt, (int) step, solver, (int) x.len, iter,
		     K.innercount[0], K.innercount[1],
		     K.innercount[2], K.innercount[3],
		     err, resi, deltax); /* */
	      printf("t_s=%8.2e ", t3-t2); /* */
	    }

	  if (VERBOSE>-1) /* write the solution for visualization */
	    {
	      for (i=0; i<dim*vx_nr; i++)
		{ sol.V[i]=x.V[i]; }
	      for (i=0; i<vx_nr; i++)
		{ sol.V[dim*vx_nr+i]=0.0; }
	      for (i=0; i<vx_nr; i++)
		{ 
		  if (K.pdof[i]!=-1)
		    { sol.V[dim*vx_nr+i]= x.V[dim*vx_nr+K.pdof[i]]; }
		}
	      /* correct the pressure values of the edge-mid-nodes */
	      {
		FIDX eg, nod1, nod2, nodm;
		for (eg=0; eg<msh2.eg_nr; eg++)
		  {
		    if (msh2.edge[eg*msh2.eg_w+MCT2EGCHL1]==-1)
		      {
			nod1=msh2.edge[eg*msh2.eg_w+MCT2EGNOD1  ];
			nod2=msh2.edge[eg*msh2.eg_w+MCT2EGNOD1+1];
			nodm=msh2.edge[eg*msh2.eg_w+MCT2EGNODM  ];
		    
			sol.V[2*msh2.vx_nr+nodm]= 0.5 * 
			  (sol.V[2*msh2.vx_nr+nod1] 
			   + sol.V[2*msh2.vx_nr+nod2]); 
		      }
		  }
	      }
	      /* write the solution with grape */
	      soli.V=&sol.V[0];
	      soli.len=(dim+1)*vx_nr;
	      soli.n_max=(dim+1)*vx_nr;
	      err=mesh_write_solution_grape_t2( &msh2, &soli, dim+1, 22,
					      "visual/2d_mesh_grape_" );
	      FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_grape_t2,
				       navsto_solver); /* */

	      /* write the velocity components */
	      soli.V=&sol.V[0];
	      soli.len=dim*vx_nr;
	      soli.n_max=dim*vx_nr;
	      err=mesh_write_solution_exp_t2( &msh2, &soli, dim, 20,
					      "visual/2d_mesh_v" );
	      FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_exp_t2,
				       navsto_solver); /* */
	  
	      /* write the streamfunction */
	      /* err=stokes_write_streamfunc_t2( &msh2, &soli, 20,
		 "visual/2d_mesh_psi" );
		 FUNCTION_FAILURE_HANDLE( err, stokes_write_streamfunc_t2,
		 navsto_solver); /* */
	  
	      /* write the solution values for comparison */
	      {
		char fname[300];
		sprintf(fname, "visual/line.x.%d.txt", (int) lvl);
		err=navsto_write_line_u_t2( &msh2, &soli, 0, 0.5, fname );
		FUNCTION_FAILURE_HANDLE( err,
					 mesh_write_line_u_t2,
					 navsto_solver); 
		sprintf(fname, "visual/line.y.%d.txt", (int) lvl);
		err=navsto_write_line_u_t2( &msh2, &soli, 1, 0.5,
					    fname ); 
		FUNCTION_FAILURE_HANDLE( err,
					 mesh_write_line_u_t2,
					 navsto_solver);
	      } /* */ 
	  
	      /* write the pressure components */
	      soli.V=&sol.V[dim*vx_nr];
	      soli.len=vx_nr;
	      soli.n_max=vx_nr;
	      err=mesh_write_solution_exp_t2( &msh2, &soli, 1, 20,
					      "visual/2d_mesh_p" );
	      FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_exp_t2,
				       navsto_solver); /* */

	      /* write the magnitude of the velocity  */
	      for (i=0; i<vx_nr;i++)
		{
		  double normi=0.0;
		  for (j=0; j<dim; j++)
		    normi+=sol.V[i+j*vx_nr]*sol.V[i+j*vx_nr];
		  sol.V[i]=sqrt(normi);
		}
	      soli.V=&sol.V[0];
	      soli.len=vx_nr;
	      soli.n_max=vx_nr;
	      err=mesh_write_solution_exp_t2( &msh2, &soli, 1, 20,
					      "visual/2d_mesh_absv" );
	      FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_exp_t2,
				       navsto_solver); /* */

	      /* touch the marker file */
	      mark=fopen("visual/2d_mesh_mark", "w");
	      fprintf(mark, "next please!\n");
	      fclose(mark);
	    }
	}


      /* final check of nonlinear convergence */
      TIMEGET;
      t1=ti;
      
      if (!use_SDFEM)
	{
	  err=navsto_MG_C_assem_t21( msh1, lvl, &K, &rhs, &maxPe,
				     &rhs_stokes,&x, &msh2,
				     vx_new, vx_max, linmeth );
	  FUNCTION_FAILURE_HANDLE( err, navsto_MG_C_assem_t21,
				   navsto_solver);
	}
      else
	{
	  err=navsto_MG_assem_SDFEM_t21( msh1, lvl, &K, &rhs, &x,
					 &msh2, vx_new, vx_max );
	  FUNCTION_FAILURE_HANDLE( err, navsto_assem_SDFEM_t21, 
				   navsto_solver);
	}
  
      TIMEGET;
      t2=ti;

      /* test the residual */
      /* y=K*x */
      err=navsto_matrix_tim_vec( &K, &x, &y);
      FUNCTION_FAILURE_HANDLE( err, navsto_matrix_tim_vec, navsto_solver);  
      /* z=K*x-rhs, get norm(y) */
      normy=0.0;
      for (i=0; i<x.len; i++)
	{
	  z.V[i]=y.V[i]-rhs.V[i];
	}
      /* apply the projector */
      err=navsto_projector_no_precon( &K, &z, NULL, &y);
      FUNCTION_FAILURE_HANDLE( err, navsto_projector_no_precon,
			       navsto_solver);  
      for (i=0; i<x.len; i++)
	{
	  normy+= y.V[i]*y.V[i];
	}
      normy=sqrt(normy); /* */

      if (VERBOSE>3)
	{
	  printf("maxPe= %8.1e ", maxPe);
	  printf("|res_nl|=%12.6e, ", normy); /* */
	  printf("t_a=%8.2e ", t2-t1); /* */
	  printf("\n");
	}

      vector_free(&y);
      vector_free(&z);
      vector_free(&sol);

      /* do the postprocessing */ 
      if (pccrit!=NULL)
	{
	  err=navsto_dIdX_t21( &msh2, &K, &x, msh2.pc_nr,
			       pccrit, NULL, NULL, NULL);
	  FUNCTION_FAILURE_HANDLE( err, navsto_dIdX_t21, navsto_solver);  

	  if (VERBOSE>2)
	    {
	      printf("\nperformance criteria eval: err=%d\n",err);
	      for (i=0; i<msh2.pc_nr; i++)
		{
		  printf("crit %d: %e\n", (int) i, pccrit->V[i]);
		}
	    }
	}

    } /* end mesh refinement + interpolation */




  /*********************************************************/
  /*                                                       */
  /* navier-stokes system is now solved on the finest mesh */
  /*                                                       */
  /*********************************************************/



  
  /* if no reference solution was provided, return one to the
     calling routine */
  if (solref->len==0)
    {
      err=vector_alloc(solref, x.len);
      FUNCTION_FAILURE_HANDLE( err, vecto_alloc, navsto_solver);
      /* copy x to solref */
      for (i=0; i<x.len; i++)
	solref->V[i]=x.V[i];
    }

  TIMEGET;
  tsolve=ti;

  /* if required, do the adjoint solve */
  if ((DIDrpar != NULL)&&(msh1->pc_nr>0))
    {
      FIDX tc, i_par;
      struct vector *dIdsol;
      struct vector Psi, y, z;

      TRY_MALLOC(dIdsol, msh2.pc_nr, struct vector, navsto_solver);

      for (tc=0; tc<msh2.pc_nr; tc++)
	{
	  err=vector_alloc( &dIdsol[tc], x.len); 
	  FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_solver);
	}

      err=navsto_dIdX_t21( &msh2, &K, &x, msh2.pc_nr,
			   pccrit, dIdsol, NULL, NULL);
      FUNCTION_FAILURE_HANDLE( err, navsto_dIdX_t21, navsto_solver);

      /* transpose the system matrix */
      err=navsto_adj_convert( &K);
      FUNCTION_FAILURE_HANDLE( err, navsto_adj_convert, navsto_solver);

      /* allocate the adjoint solution vector Psi and a help vector y */
      err=vector_alloc( &Psi, x.len);
      FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_solver);
      err=vector_alloc( &y, x.len);
      FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_solver);
      err=vector_alloc( &z, x.len);
      FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_solver);

      /* prepare to solve */
      K.innereps=adjinnereps;
      K.mg->vcycles=1;
      K.mg->smooths=1;
      K.mg->stop_eps=0.0;

      /* for each criterion */
      for (tc=0; tc<1; tc++)
	{
	  double norm_dIdsol, norm_adjres;
	  /* compute the norm of the adjoint rhs */
	  normy=0.0;
	  for (i=0; i<dIdsol[tc].len; i++)
	    normy += dIdsol[tc].V[i] * dIdsol[tc].V[i];
	  norm_dIdsol=sqrt(normy);

	  if (norm_dIdsol!=0.0)
	    {
	      /* actualy need to solve */

	      /* reset iteration counters */
	      for (i=0; i<5; i++) K.innercount[i]=0;

	      /* solve the adjoint system */
#ifdef NEW_ADJOINT
	      /* new adjoint handling: same preconditioner as for
		 forward solve, but do a projection step first */
	      {
		double Psi_lam;

		Psi_lam=0.0;
		for (i=0; i<vx_nr; i++)
		  {
		    if (K.pdof[i]!=-1)
		      {
			Psi_lam += dIdsol[tc].V[dim*vx_nr+ K.pdof[i] ];
		      }
		  }

		printf("Psi_lambda=%e\n", Psi_lam);

		for (i=0; i<vx_nr; i++)
		  {
		    if ( K.pdof[i]!=-1)
		      {
			dIdsol[tc].V[ dim*vx_nr+K.pdof[i] ] -= 
			  Psi_lam * K.weight[i] ;
		      }
		  }

		if (Psi_lam > 1e-8)
		  {
		    fprintf(stderr,"navsto_solver: "
			    "current adjoint handling can not deal\n"
			    "with e*dI/dp > 0, something wrong with\n"
			    "the criterion?\n");
		    return FAIL;
		  }
	      }

	      err=GMRES( m, 100, 2, adjeps, 0, &Psi, &resi, &iter,
			 navsto_matrix_tim_vec, navsto_projector_w_precon,
			 &K, &dIdsol[tc], NULL ); /* */ 
#else
	      /* old adjoint handling, special multiplication and
		 preconditioner routines */
	      err=GMRES( m, 100, 2, adjeps, 0, &Psi, &resi, &iter,
			 navsto_adjmat_tim_vec, navsto_adj_w_precon,
			 &K, &dIdsol[tc], NULL ); /* */ 
#endif
	      FUNCTION_FAILURE_HANDLE(err, GMRES, navsto_solver);

	      /* check the actual residual */
#ifdef NEW_ADJOINT
	      err=navsto_matrix_tim_vec( &K, &Psi, &y );
	      FUNCTION_FAILURE_HANDLE( err, navsto_matrix_tim_vec, 
				       navsto_solver);

#else
	      err=navsto_adjmat_tim_vec( &K, &Psi, &y );
	      FUNCTION_FAILURE_HANDLE( err, navsto_adjmat_tim_vec, 
				       navsto_solver);
#endif

	      for (i=0; i<y.len; i++)
		{
		  y.V[i]-=dIdsol[tc].V[i];
		}
#ifdef NEW_ADJOINT
	      /* residual has to be projected */
	      for (i=0; i<y.len; i++)
		{
		  dIdsol[tc].V[i]=y.V[i];
		}

	      err=navsto_projector_no_precon( &K, &dIdsol[tc], NULL, &y );
	      FUNCTION_FAILURE_HANDLE( err, navsto_matrix_tim_vec, 
				       navsto_solver);

#endif
	      normy=0.0;
	      for (i=0; i<y.len; i++)
		{
		  normy+=y.V[i]*y.V[i];
		}


	      norm_adjres=sqrt(normy);
	      printf("adjoint solved!  iter=%d, |rhs|=%8.1e, "
		     "|C^-1*res|=%8.1e  |res|=%8.1e"
		     " inner-it:  %4d %4d %4d %4d\n",
		     iter, norm_dIdsol, resi,
		     norm_adjres, K.innercount[0], K.innercount[1],
		     K.innercount[2], K.innercount[3]);

	      /* plot the adjoint solution */
		{
		  struct vector sol, soli;
		  FIDX eg, nod1, nod2, nodm;
		  
		  err=vector_alloc(&sol, 3*vx_nr);
		  FUNCTION_FAILURE_HANDLE( err, vector_alloc, 
					   navsto_solver);
		  for (i=0; i<dim*vx_nr; i++)
		    { sol.V[i]=Psi.V[i]; }
		  for (i=0; i<vx_nr; i++)
		    { sol.V[dim*vx_nr+i]=0.0; }
		  for (i=0; i<vx_nr; i++)
		    { 
		      if (K.pdof[i]!=-1)
			{ sol.V[dim*vx_nr+i]= Psi.V[dim*vx_nr+K.pdof[i]]; }
		    }

		  for (eg=0; eg<msh2.eg_nr; eg++)
		    {
		      if (msh2.edge[eg*msh2.eg_w+MCT2EGCHL1]==-1)
			{
			  nod1=msh2.edge[eg*msh2.eg_w+MCT2EGNOD1  ];
			  nod2=msh2.edge[eg*msh2.eg_w+MCT2EGNOD1+1];
			  nodm=msh2.edge[eg*msh2.eg_w+MCT2EGNODM  ];
		    
			  sol.V[2*msh2.vx_nr+nodm]= 0.5 * 
			    (sol.V[2*msh2.vx_nr+nod1] 
			     + sol.V[2*msh2.vx_nr+nod2]); 
			}
		    }
		
		  /* write the velocity components */
		  soli.V=&sol.V[0];
		  soli.len=dim*vx_nr;
		  soli.n_max=dim*vx_nr;
		  err=mesh_write_solution_exp_t2( &msh2, &soli, dim, 20,
						  "visual/2d_mesh_v" );
		  FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_exp_t2,
					   navsto_solver); /* */
	  
	  
		  /* write the pressure components */
		  soli.V=&sol.V[dim*vx_nr];
		  soli.len=vx_nr;
		  soli.n_max=vx_nr;
		  err=mesh_write_solution_exp_t2( &msh2, &soli, 1, 20,
						  "visual/2d_mesh_p" );
		  FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_exp_t2,
					   navsto_solver); /* */
		  /* touch the marker file */
		  mark=fopen("visual/2d_mesh_mark", "w");
		  fprintf(mark, "next please!\n");
		  fclose(mark);

		  printf("wrote adjoint\n");

		  vector_free(&sol);
		} /* end plot solution */
	    }
	  else
	    {
	      /* don't really need to compute Psi as Psi=0 anyway */
	      for (i=0; i<Psi.len; i++)
		Psi.V[i]=0.0;
	    }

	  /* now evaluate DIDrpar from the adjoint representation,
	        DI   dI         dR
		-- = -- - Psi^T -- .
		DF   dF         dF
	     where the derivatives wrt. F are evaluated by finite
	     differences
	  */
	  err=vector_alloc(DIDrpar, A_m);
	  FUNCTION_FAILURE_HANDLE( err, vector_alloc, 
				   navsto_solver);
	  

#define EVAL_ADJ_EXPR(__result__) {					\
	    free(vx_new);						\
	    if (msh2.vertex==vx_new) msh2.vertex=NULL;			\
	    vx_new=NULL;						\
	    err=mesh_deform_t1_t2(msh1, lvl_max, &vx_new, &vx_max, 0, NULL); \
	    FUNCTION_FAILURE_HANDLE( err, mesh_deform_t2_t1, navsto_solver); \
	    vector_free(&rhs_stokes); 					\
	    vector_free(&rhs);						\
	    err=navsto_MG_init_assem_t21( msh1, lvl_max, &K, &rhs_stokes, \
					  &rhs, &x_dummy, &msh2,	\
					  vx_new, vx_max );		\
	    FUNCTION_FAILURE_HANDLE( err, navsto_MG_init_assem_t21,	\
				     navsto_solver);			\
	    vector_free(&x_dummy);					\
	    err=navsto_MG_C_assem_t21( msh1, lvl_max, &K, &rhs, &maxPe,	\
				       &rhs_stokes, &x, &msh2,		\
				       vx_new, vx_max,			\
				       linmeth );			\
	    FUNCTION_FAILURE_HANDLE( err, navsto_MG_C_assem_t21,	\
				     navsto_solver);			\
	    /* y=K*x */							\
	    err=navsto_matrix_tim_vec( &K, &x, &y);			\
	    FUNCTION_FAILURE_HANDLE( err, navsto_matrix_tim_vec,	\
				     navsto_solver);			\
	    /* z=K*x-rhs */						\
	    for (i=0; i<x.len; i++)					\
	      {								\
		z.V[i]=y.V[i]-rhs.V[i];					\
	      }								\
	    /* applying the projector is not necessary (and gives
	       wrong results */						\
	    { double *vx_old=msh2.vertex;				\
	      msh2.vertex=vx_new;					\
	      err=navsto_dIdX_t21( &msh2, &K, &x, msh2.pc_nr,		\
				   &pc_crit_pmh, NULL, NULL, NULL);	\
	      FUNCTION_FAILURE_HANDLE( err, navsto_dIdX_t21, navsto_solver); \
	      msh2.vertex=vx_old;					\
	    }								\
	    /* pc_crit-Psi^T*(K*x-rhs) */				\
	    __result__= pc_crit_pmh.V[0];					\
	    for (i=0; i<x.len; i++)					\
	      {								\
		__result__-=Psi.V[i]*z.V[i];				\
	      }								\
	  }


	  for (i_par=0; i_par<A_m; i_par++)
	    {
	      double par_save;
	      double adj_expr_ph, adj_expr_mh;
	      struct vector x_dummy,pc_crit_pmh;

	      
	      err=vector_alloc(&pc_crit_pmh, msh1->pc_nr);
	      FUNCTION_FAILURE_HANDLE( err, vector_alloc, 
				       navsto_solver);

	      par_save=rpar->V[i_par];

	      rpar->V[i_par] = par_save + h;
	      /* spar= bspar + Aspar*rpar */
	      for (i=0; i<A_n; i++)
		{ msh1->spar[i]=bspar->V[i];
		  for (j=0; j<A_m; j++)
		    msh1->spar[i]+=Aspar->V[i*A_m + j]*rpar->V[j]; }
	      /* eval pc_ph- Psi^T*(K_ph*x-rhs) */
	      EVAL_ADJ_EXPR(adj_expr_ph);


	      rpar->V[i_par] = par_save - h;
	      /* spar= bspar + Aspar*rpar */
	      for (i=0; i<A_n; i++)
		{ msh1->spar[i]=bspar->V[i];
		  for (j=0; j<A_m; j++)
		    msh1->spar[i]+=Aspar->V[i*A_m + j]*rpar->V[j]; }
	      /* eval pc_mh- Psi^T*(K_mh*x-rhs) */
	      EVAL_ADJ_EXPR(adj_expr_mh);
	      

	      DIDrpar->V[i_par]=(adj_expr_ph-adj_expr_mh)/(2.0*h);
	  
	      rpar->V[i_par] = par_save;

	      vector_free(&pc_crit_pmh);
	    }

	} /* end loop over cirteria */

      for (tc=0; tc<msh2.pc_nr; tc++)
	{
	  vector_free( &dIdsol[tc]); 
	}

      /* free intermediate help vectors */
      vector_free(&Psi);
      vector_free(&y);
      vector_free(&z);
      free(dIdsol);

    }
  TIMEGET;
  tadj=ti;

  printf("\n\n times: solve=%8.1e   adj_FDR=%8.1e \n\n",
	 tsolve-tstart, tadj-tsolve);

  normy=0.0;
  for(i=0; i<x.len; i++)
    normy+=(x.V[i]-solref->V[i])*(x.V[i]-solref->V[i]);
  printf("diff x-solref=%8.1e\n", sqrt(normy));

  /* free local data */
  navsto_matrix_free(&K);
  vector_free(&rhs_stokes);
  vector_free(&rhs);
  vector_free(&x);

  vector_free(&x_old);

  mesh_free(&msh2); /* frees vx_new as well */

  if (nu==nu_akt)
    return SUCCESS;
  else
    {
      fprintf(stderr,"navsto_solver: "
	      "unable to use desired nu on this mesh\n");
      return FAIL;
    }
}
