/***********************************************************************

    This file is part of
    FEINS, Finite Element Incompressible Navier-Stokes solver,
    which is expanding to a more general FEM solver and toolbox,
    Copyright (C) 2003--2008, Rene Schneider 
    <rene.schneider@mathematik.tu-chemnitz.de>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program. If not, see <http://www.gnu.org/licenses/>.

    Minor contributions to this program (for example bug-fixes and
    minor extensions) by third parties automatically transfer the
    copyright to the general author of FEINS, to maintain the
    possibility of commercial re-licensing. If you contribute but wish
    to keep the copyright of your contribution, make that clear in
    your contribution!

    Non-GPL licenses to this program are available upon request from
    the author.

************************************************************************/
/*
FILE navsto_aux.c
HEADER navsto_aux.h

TO_HEADER:


// useful text macros
#include "feins_macros.h"
// data structures 
#include "sparse_struct.h"
#include "navsto_struct.h"

*/

#include <math.h>


/* function prototypes */
#include "navsto_aux.h"

/* prototypes of external functions */
#include <string.h>

#include "feins_lapack.h"
#include "sparse.h"
#include "lin_solver.h"
#include "mesh.h"
#include "stokes_aux.h"
#include "navstoassem.h"

/*FUNCTION*/
int navsto_write_line_u_t2( struct mesh *m, struct vector *usol,
			    int xy, double dxy,
			    char *name
/* writes the values of the solution usol along a line specified
   (x=dxy or y=dxy),
   the values are stored for a set of points which will be sorted in
   descending order (of the y resp. x component) and written together
   with the velocity vector in a file named name
   
   Input:  m         - the mesh
           usol      - vector with containing the velocity field,
                       stored as usol[j*vx_nr+i] gives the j-th
                       velocity component in the i-th node of the mesh
	   xy        - specifies if dxy shall be matched by the x or y
	               component of the coordinates,
		       xy=0  ==> for x==dxy
		       xy=1  ==> for y==dxy
           dxy       - the position of the line
	   name      - name of the file

   Output: (writes the files)

   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid
*/
				){
#define CATCHEPS (1.0e-8)
  FIDX i, j, vx_nr, vx_w, el_nr, el_w, fc_nr, fc_w, eg_nr, eg_w,
    nr, match_el, sorter_len;

  FILE *out;
  int *posmarker;            /* marks each point to be left(-1),
				right(+1) or on(0) of the line */
  FIDX *elemmarker;           /* the line intersecting elements */
  struct intdouble *sorter;


  if ((xy<0)||(xy>1))
    {
      fprintf(stderr,"navsto_write_line_u_t2: xy=%d invalid!\n", xy);
      return FAIL;
    }

  if ((*usol).len<2*(*m).vx_nr)
    {
      fprintf(stderr,
	      "navsto_write_line_u_t2: usol.len=%d doesn't fit vx_nr=%d !\n",
	      (int) (*usol).len, (int) (*m).vx_nr);
      return FAIL;
    }

  vx_nr=(*m).vx_nr; el_nr=(*m).el_nr; fc_nr=(*m).fc_nr; eg_nr=(*m).eg_nr; 
  vx_w =(*m).vx_w;  el_w =(*m).el_w;  fc_w =(*m).fc_w;  eg_w =(*m).eg_w;


  /* mark the points left/right/on the line, count the on the line ones */
  TRY_MALLOC( posmarker, vx_nr, int,  navsto_write_line_u_t2);
  nr=0;
  for (i=0; i<vx_nr; i++)
    {
      double the_xy;
      the_xy=(*m).vertex[i*vx_w+MCT2VXSTRT+xy];

      if (fabs(the_xy-dxy)<CATCHEPS)
	{
	  nr++;
	  posmarker[i]=0;
	}
      else
	{
	  if ( (the_xy-dxy)<0 )
	    {
	      posmarker[i]=-1;
	    }
	  else
	    {
	      posmarker[i]=+1;
	    }
	}
    }
  
  /* now find the elements which intersect the line */
  TRY_MALLOC( elemmarker, el_nr, FIDX,  navsto_write_line_u_t2);
  match_el=0;
  for (i=0; i<el_nr; i++)
    {
      int cpos, cneg;
      
      /* count the left/rights in this element */
      cpos=0;
      cneg=0;
      for (j=0; j<3; j++)
	{
	  FIDX node;
	  node=(*m).elem[i*el_w+MCT2ELNOD1+j];
	  if (posmarker[node]>0) cpos++;
	  if (posmarker[node]<0) cneg++;
	}

      /* if positive and negative are present, mark this element */
      if ( (cpos>0)&&(cneg>0) )
	{
	  elemmarker[match_el]=i;
	  match_el++;
	  nr +=2;
	}
    }

  /* allocate memory for the sorter field */
  TRY_MALLOC(sorter, nr, struct intdouble, navsto_write_line_u_t2);

  /* define the sorter field */
  sorter_len=0;
  /* first the exact matches */
  for (i=0; i<vx_nr; i++)
    {
      if (posmarker[i]==0) 
	{
	  sorter[sorter_len].i=i;
	  sorter[sorter_len].d=(*m).vertex[i*vx_w+MCT2VXSTRT+(xy+1)%2];
	  sorter_len++;
	}
    }
  /* then the intersecting elements */
  for (i=0; i<match_el; i++)
    {
      FIDX lines_found, the_el, edge;

      /* find two intersecting lines */
      lines_found=0;
      the_el=elemmarker[i];
      for (j=0; j<3; j++)
	{
	  FIDX node1, node2;

	  edge=(*m).face[the_el*fc_w+MCT2FCEDG1+j];
	  node1=(*m).edge[edge*eg_w+MCT2EGNOD1+0];
	  node2=(*m).edge[edge*eg_w+MCT2EGNOD1+1];
	  if ( (posmarker[node1] != posmarker[node2]) && (lines_found<2) )
	    {
	      double n1x, n2x, alpha, the_x;

	      /* find the exact position of the intersection point,
		 ofwhich we need  the component which is sorted */
	      n1x=(*m).vertex[node1*vx_w+MCT2VXSTRT+xy];
	      n2x=(*m).vertex[node2*vx_w+MCT2VXSTRT+xy];
	      if ( (n2x-n1x)!=0.0 )
		{
		  alpha= (dxy-n1x)/(n2x-n1x);
		}
	      else
		{
		  /* coordinate identical => both are on the line */
		  alpha=0;
		}

	      n1x=(*m).vertex[node1*vx_w+MCT2VXSTRT+(xy+1)%2];
	      n2x=(*m).vertex[node2*vx_w+MCT2VXSTRT+(xy+1)%2];

	      /* the position is */
	      the_x=n1x+alpha*(n2x-n1x);

	      /* make an entry in the sorter,
	         
	         to mark it as an element rather than a point, give it
	         a negative id, also encoding the which edge it was */
	      sorter[sorter_len].i=-1 -(i + j*match_el);
	      sorter[sorter_len].d=the_x;
	      sorter_len++;

	      lines_found++;
	    }
	}
    }
  
  /* sort the field */
  qsort( sorter, nr, sizeof(struct intdouble), comp_intdouble_d);

  /* open the file */
  out=fopen(name, "w");
  if (out==NULL)
    {
      fprintf(stderr,
	      "navsto_write_line_u_t2: error opening file \"%s\"\n",
	      name);
      free(sorter);
      return FAIL;
    }
  
  /* write the output */
  fprintf(out, "%% %8s  %8s  %8s\n\n", " xy+1", "   u", "   v");
  for (j=0; j<nr; j++)
    {
      i=sorter[j].i;
      if (i>=0)
	{
	  /* it is the point itself */
	  fprintf(out, "%8.4f  %8.4f  %8.4f\n",
		  (*m).vertex[i*vx_w+MCT2VXSTRT+(xy+1)%2],
		  (*usol).V[i], (*usol).V[i+vx_nr]);
	}
      else
	{
	  /* it is an element edge */
	  
	  FIDX the_el, el_edge, edge, node1, node2;
	  double n1x, n2x, alpha, the_x, n1u, n2u, n1v, n2v, the_u, the_v;

	  /* decode the element and edge info */
	  the_el =(-(i+1))%match_el;
	  el_edge=(-(i+1))/match_el;
	  the_el=elemmarker[the_el];

	  edge=(*m).face[the_el*fc_w+MCT2FCEDG1+el_edge];
	  node1=(*m).edge[edge*eg_w+MCT2EGNOD1+0];
	  node2=(*m).edge[edge*eg_w+MCT2EGNOD1+1];

	  /* find the exact position of the intersection point,
	     ofwhich we need  the component which is sorted */
	  n1x=(*m).vertex[node1*vx_w+MCT2VXSTRT+xy];
	  n2x=(*m).vertex[node2*vx_w+MCT2VXSTRT+xy];
	  if ( (n2x-n1x)!=0.0 )
	    {
	      alpha= (dxy-n1x)/(n2x-n1x);
	    }
	  else
	    {
	      /* coordinate identical => both are on the line */
	      alpha=0;
	    }

	  /* the position is */
	  the_x=sorter[j].d;
	  
	  n1u=(*usol).V[node1];
	  n1v=(*usol).V[node1+vx_nr];

	  n2u=(*usol).V[node2];
	  n2v=(*usol).V[node2+vx_nr];

	  /* we ignore that it is a t2 element and just do linear
	     interpolation (for now) */
	  the_u=n1u+alpha*(n2u-n1u);
	  the_v=n1v+alpha*(n2v-n1v);
	  
	  fprintf(out, "%8.4f  %8.4f  %8.4f\n", the_x, the_u, the_v);
	}
    }

  /* close the file */
  fclose(out);


  /* free the temporary data */
  free(posmarker);
  free(elemmarker);
  free(sorter);

  return SUCCESS;
}


/*FUNCTION*/
int navsto_matrix_init(struct navsto_matrix *K, FIDX dim, FIDX lvlmax
/* allocates memory in K
   
   Input:  dim     - problem dimension (d=2 or 3)
	   lvlmax  - maximal number of levels for which the stiffness
	             matrices are stored

   Output: K       - (K is given by reference), memory in K is
                     allocated, dimensions of K are initialised

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  FIDX i;

  TRY_MALLOC( (*K).Fs, lvlmax, struct sparse, navsto_matrix_init);
  TRY_MALLOC( (*K).Bs, dim, struct sparse, navsto_matrix_init);

  for (i=0; i<lvlmax; i++)
    sparse_init( &(*K).Fs[i]);

  for (i=0; i<dim; i++)
    sparse_init( &(*K).Bs[i]);


  (*K).dim    = dim;
  (*K).vx_nr  = 0;
  (*K).pvx_nr = 0;
  (*K).pdof   = NULL;
  (*K).lvl    = 0;
  (*K).lvlmax = lvlmax;

  (*K).bn_nr  = 0;
  (*K).nodes  = NULL;
  (*K).weight = NULL;

  sparse_init(&(*K).M);
  vector_init(&(*K).Minvdiag);
  sparse_init(&(*K).Ap);
  sparse_init(&(*K).Cp);
  coarse_mat_null_def(&(*K).cmat);

  (*K).cmat_Ap       = NULL;
  (*K).cmat_Ap_pdof  = NULL;
  (*K).cmat_Ap_vxnr  = 0;
  (*K).cmat_Ap_pvxnr = 0;

  (*K).msh    = NULL;
  (*K).ml1    = NULL;
  (*K).mld    = NULL;
  (*K).bpx    = NULL;
  (*K).mg     = NULL;
  (*K).mg1    = NULL;

  (*K).mlsorters = NULL;

  for (i=0; i<5; i++)
    (*K).innercount[i]=0;

  (*K).innereps=1.0e-2;

  for (i=0; i<5; i++)
    (*K).innersteps[i]=-1;

  TRY_MALLOC( (*K).MGscale, lvlmax, double, navsto_matrix_init);
  for (i=0; i<lvlmax; i++)
    (*K).MGscale[i]=0.0;


  TRY_MALLOC( (*K).vx_nr_lvl, lvlmax, FIDX, navsto_matrix_init);
  TRY_MALLOC( (*K).pdof_vels, lvlmax, struct ilist**, navsto_matrix_init);
  TRY_MALLOC( (*K).pvx_nr_lvl, lvlmax, FIDX, navsto_matrix_init);
  TRY_MALLOC( (*K).pdof_lvl, lvlmax, FIDX*, navsto_matrix_init);
  TRY_MALLOC( (*K).bn_nr_lvl, lvlmax, FIDX, navsto_matrix_init);
  TRY_MALLOC( (*K).nodes_lvl, lvlmax, FIDX*, navsto_matrix_init);
  TRY_MALLOC( (*K).pdof_sorter_n, lvlmax, FIDX, navsto_matrix_init);
  TRY_MALLOC( (*K).pdof_sorter, lvlmax, FIDX*, navsto_matrix_init);
  TRY_MALLOC( (*K).weight_lvl, lvlmax, double*, navsto_matrix_init);
  TRY_MALLOC( (*K).vel_weight, lvlmax, double*, navsto_matrix_init);

  TRY_MALLOC( (*K).elem_lvl, lvlmax, FIDX*, navsto_matrix_init);
  TRY_MALLOC( (*K).el_nr_lvl, lvlmax, FIDX, navsto_matrix_init);
  TRY_MALLOC( (*K).bc_marker_lvl, lvlmax, FIDX*, navsto_matrix_init);
  TRY_MALLOC( (*K).elem_sorter_n, lvlmax, FIDX, navsto_matrix_init);
  TRY_MALLOC( (*K).elem_sorter, lvlmax, FIDX*, navsto_matrix_init);

  for (i=0; i<lvlmax; i++)
    {
      (*K).vx_nr_lvl[i] = 0;
      
      (*K).pdof_vels[i]  = NULL;

      (*K).pvx_nr_lvl[i] = 0;
      (*K).pdof_lvl[i]   = NULL;
      (*K).weight_lvl[i] = NULL;
      (*K).vel_weight[i] = NULL;

      (*K).bn_nr_lvl[i]  = 0;
      (*K).nodes_lvl[i]  = NULL;

      (*K).pdof_sorter_n[i] = 0;
      (*K).pdof_sorter[i]   = NULL;
      

      (*K).elem_lvl[i]      = NULL;
      (*K).el_nr_lvl[i]     = 0;
      (*K).bc_marker_lvl[i] = NULL;
      (*K).elem_sorter_n[i] = 0;
      (*K).elem_sorter[i]   = NULL;
    }

  (*K).max_n_pdof_vels = 0;

  return SUCCESS;
}


/*FUNCTION*/
int navsto_matrix_reinit(struct navsto_matrix *K, FIDX vx_nr
/* reallocates memory in K as necessary if the mesh changes
   (e.g. refined)
   
   Input:  vx_nr   - number of vertices in the corresponding mesh

   Output: K       - (K is given by reference), memory in K is
                     reallocated, dimensions of K are reinitialised

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  FIDX i, dim;

  dim         = (*K).dim;

  for (i=0; i<dim; i++)
    sparse_free( &(*K).Bs[i]);

  (*K).vx_nr  = vx_nr;
  (*K).pvx_nr = 0;
  free((*K).pdof);
  (*K).pdof   = NULL;

  (*K).bn_nr  = 0;
  free((*K).nodes);
  TRY_MALLOC( (*K).nodes, vx_nr, FIDX, navsto_matrix_reinit);
  free((*K).weight);
  TRY_MALLOC( (*K).weight, vx_nr, double, navsto_matrix_reinit);

  sparse_free(&(*K).M);
  vector_free(&(*K).Minvdiag);
  sparse_free(&(*K).Ap);
  sparse_free(&(*K).Cp);
  coarse_mat_free(&(*K).cmat);

  /* cmat_Ap and cmat_Ap_pdof are not changed until destroyed */

  (*K).msh    = NULL;

  if ((*K).ml1 != NULL)
    multilvl_free((*K).ml1);
  free((*K).ml1);
  (*K).ml1     = NULL;

  if ((*K).mld != NULL)
    multilvl_free((*K).mld);
  free((*K).mld);
  (*K).mld     = NULL;

  if ((*K).bpx != NULL)
    bpx_free((*K).bpx);
  free((*K).bpx);
  (*K).bpx    = NULL;

  if ((*K).mg != NULL)
    mg_free((*K).mg);
  free((*K).mg);
  (*K).mg     = NULL;

  if ((*K).mg1 != NULL)
    mg_free((*K).mg1);
  free((*K).mg1);
  (*K).mg1     = NULL;

  if ((*K).mlsorters != NULL)
    {
      for (i=0; i<dim; i++)
	{
	  if ((*K).mlsorters[i] != NULL)
	    free((*K).mlsorters[i]);
	}
      free((*K).mlsorters);
    }
  (*K).mlsorters=NULL;

  for (i=0; i<5; i++)
    (*K).innercount[i]=0;

  (*K).innereps=1.0e-2;

  for (i=0; i<5; i++)
    (*K).innersteps[i]=-1;

  for (i=0; i<(*K).lvlmax; i++)
    (*K).MGscale[i]=0.0;

  return SUCCESS;
}





/*FUNCTION*/
void navsto_matrix_free(struct navsto_matrix *K
/* memory allocated in K is freed, so that K can be freed

   Output: K       - (K is given by reference) inside memory is
                     released 
*/
		  ){
  FIDX i, dim, lvlmax;

  dim         = (*K).dim;
  lvlmax      = (*K).lvlmax;

  for (i=0; i<lvlmax; i++)
    sparse_free( &(*K).Fs[i]);
  free( (*K).Fs );
  (*K).Fs     = NULL;

  for (i=0; i<dim; i++)
    sparse_free( &(*K).Bs[i]);
  free( (*K).Bs );
  (*K).Bs     = NULL;

  (*K).dim    = 0 ;
  (*K).lvl    = 0 ;
  (*K).lvlmax = 0 ;

  (*K).vx_nr  = 0;
  (*K).pvx_nr = 0;

  sparse_free(&(*K).M);
  vector_free(&(*K).Minvdiag);
  sparse_free(&(*K).Ap);
  sparse_free(&(*K).Cp);
  coarse_mat_free(&(*K).cmat);


  if ((*K).cmat_Ap!=NULL)
    {
      coarse_mat_free((*K).cmat_Ap);
      free((*K).cmat_Ap);
      (*K).cmat_Ap=NULL;
    }

  if ((*K).cmat_Ap_pdof!=NULL)
    {
      free((*K).cmat_Ap_pdof);
      (*K).cmat_Ap_pdof=NULL;
    }

  (*K).cmat_Ap_vxnr  = 0;
  (*K).cmat_Ap_pvxnr = 0;



  (*K).msh    = NULL;

  if ((*K).ml1 != NULL)
    multilvl_free((*K).ml1);
  free((*K).ml1);
  (*K).ml1     = NULL;

  if ((*K).mld != NULL)
    multilvl_free((*K).mld);
  free((*K).mld);
  (*K).mld     = NULL;

  if ((*K).bpx != NULL)
    bpx_free((*K).bpx);
  free((*K).bpx);
  (*K).bpx    = NULL;

  if ((*K).mg != NULL)
    mg_free((*K).mg);
  free((*K).mg);
  (*K).mg     = NULL;

  if ((*K).mg1 != NULL)
    mg_free((*K).mg1);
  free((*K).mg1);
  (*K).mg1    = NULL;

  if ((*K).mlsorters != NULL)
    {
      for (i=0; i<dim; i++)
	{
	  if ((*K).mlsorters[i] != NULL)
	    free((*K).mlsorters[i]);
	}
      free((*K).mlsorters);
    }
  (*K).mlsorters=NULL;


  for (i=0; i<5; i++)
    (*K).innercount[i]=0;

  (*K).innereps=1.0e-2;

  for (i=0; i<5; i++)
    (*K).innersteps[i]=-1;

  free((*K).MGscale);
  (*K).MGscale=NULL;

  for (i=0; i<lvlmax; i++)
    {
      FIDX j;

      (*K).vx_nr_lvl[i]=0;

      for (j=0; j<(*K).pvx_nr_lvl[i]; j++)
	{
	  ilist_free( &(*K).pdof_vels[i][j] );
	  (*K).pdof_vels[i][j]=NULL;
	}

      free( (*K).pdof_vels[i] );
      (*K).pdof_vels[i]=NULL;

      if ( (*K).pdof_lvl[i] != (*K).pdof )
	{
	  free( (*K).pdof_lvl[i] );
	}
      (*K).pdof_lvl[i]=NULL;

      if ( (*K).weight_lvl[i] != (*K).weight )
	{
	  free( (*K).weight_lvl[i] );
	}
      (*K).weight_lvl[i]=NULL;
      (*K).pvx_nr_lvl[i]=0;

      free( (*K).vel_weight[i] );
      (*K).vel_weight[i] = NULL;

      if ( (*K).nodes_lvl[i] != (*K).nodes )
	{
	  free( (*K).nodes_lvl[i] );
	}
      (*K).nodes_lvl[i]=NULL;
      (*K).bn_nr_lvl[i]=0;

      free( (*K).pdof_sorter[i]);
      (*K).pdof_sorter[i]   = NULL;
      (*K).pdof_sorter_n[i] = 0;

      free( (*K).elem_lvl[i] );
      (*K).elem_lvl[i]=NULL;
      free( (*K).bc_marker_lvl[i] );
      (*K).bc_marker_lvl[i]=NULL;

      free( (*K).elem_sorter[i]);
      (*K).elem_sorter[i]   = NULL;
      (*K).elem_sorter_n[i] = 0;
    }

  free( (*K).vx_nr_lvl);
  (*K).vx_nr_lvl = NULL;

  free( (*K).pdof_vels );
  (*K).pdof_vels  = NULL;

  free( (*K).pdof_lvl  );
  (*K).pdof_lvl   = NULL;

  free( (*K).weight_lvl  );
  (*K).weight_lvl = NULL;

  free( (*K).vel_weight );
  (*K).vel_weight = NULL;

  free( (*K).pvx_nr_lvl);
  (*K).pvx_nr_lvl = NULL;

  free( (*K).nodes_lvl );
  (*K).nodes_lvl  = NULL;

  free( (*K).pdof_sorter );
  (*K).pdof_sorter= NULL;
  free( (*K).pdof_sorter_n );
  (*K).pdof_sorter_n= NULL;

  free( (*K).bn_nr_lvl );
  (*K).bn_nr_lvl = NULL;

  free( (*K).elem_lvl );
  (*K).elem_lvl  = NULL;

  free( (*K).el_nr_lvl );
  (*K).el_nr_lvl  = NULL;

  free( (*K).bc_marker_lvl );
  (*K).bc_marker_lvl = NULL;

  free( (*K).elem_sorter );
  (*K).elem_sorter= NULL;
  free( (*K).elem_sorter_n );
  (*K).elem_sorter_n= NULL;


  /* those need to be freed last as they are used in checks above */
  free((*K).pdof);
  (*K).pdof   = NULL;
  (*K).bn_nr  = 0;
  free((*K).nodes);
  (*K).nodes  = NULL;
  free((*K).weight);
  (*K).weight = NULL;

}




/*FUNCTION*/
int navsto_projector_no_precon(void *arg1, struct vector *in,
			       void *notused,
			       struct vector *out
/* performs the boudary condition projection for stokes problems,
     out = P*I*P^T * in
   where P projects the velocity components of boundary nodes to zero,
   (such that addition of a projected vector doesn't change the
   velocity there) and projects the pressure such that it has zero
   mean on the domain
   
   Input:  arg1=
           K       - navsto_matrix struct holding the boundary
                     projection data 
	   in      - input vector
	   notused - well, it is not used but in the interface to
                     allow this function to be used as a
                     "preconditioner" 

   Output: out    - (given by reference), P*P^T* in

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  FIDX i, j, dim, vx_nr;
  double pmean, sumw;
  struct navsto_matrix *K;

  K= (navsto_matrix*) arg1;

  vx_nr= (*K).vx_nr;
  dim  = (*K).dim;
  sumw=0.0;

  if (((*in).len!=(*out).len)||((*in).len<dim*vx_nr+(*K).pvx_nr))
    {
      fprintf(stderr,
	      "navsto_projector_no_precon: dimensions don't make sense!\n");
      return FAIL;
    }

  /* copy in to out */
  for (i=0; i<(*in).len; i++)
    {
      (*out).V[i]=(*in).V[i];
    }

  /* apply P^T */
  /* velocity components */
  for (j=0; j<dim; j++)
    {
      for (i=0; i<(*K).bn_nr; i++)
	{
	  (*out).V[ j*vx_nr + (*K).nodes[i] ]   = 0.0 ;
	}
    }


  /* pressure components */
  /* get the summ of all pressure components */
  pmean=0.0;
  for (i=0; i<vx_nr; i++)
    {
      sumw+= (*K).weight[i];
      if ((*K).pdof[i]!=-1)
	{
	  pmean+= (*out).V[dim*vx_nr + (*K).pdof[i] ];
	}
    }

  /* subtract weight[i] times the pressure summ */
  for (i=0; i<vx_nr; i++)
    {
      if ((*K).pdof[i]!=-1)
	{
	  (*out).V[dim*vx_nr + (*K).pdof[i] ] -= pmean * (*K).weight[i];
	}
    }


  /* P^T done, now P */
  /* nothing to be done for the velocity components (P=P*P^T there) */
  /* pressure components */
  /* get the mean pressure */
  pmean=0.0;
  for (i=0; i<vx_nr; i++)
    {
      if ((*K).pdof[i]!=-1)
	{
	  pmean+= (*out).V[dim*vx_nr + (*K).pdof[i] ] * (*K).weight[i];
	}
    }

  /* subtract the mean pressure */
  for (i=0; i<vx_nr; i++)
    {
      if ((*K).pdof[i]!=-1)
	{
	  (*out).V[dim*vx_nr + (*K).pdof[i] ] -= pmean;
	}
    }

  /* done */

  return SUCCESS;
}





/*FUNCTION*/
int navsto_matrix_tim_vec(void *arg1, struct vector *vec,
			  struct vector *out
/* multiplies the matrix K from left to the vector vec,
   
   out = K * vec;

   Input:  arg1=
           K       - linearised Navier-Stokes-problem stiffness matrix
           vec     - vector

   Output: out     - resulting vector

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		     ){
  struct vector x, y;
  struct navsto_matrix *K;

  int  err;
  FIDX i;
  FIDX dim, lvl, vx_nr, pvx_nr;

  K      =  (navsto_matrix*) arg1;
  dim    = (*K).dim;
  lvl    = (*K).lvl;
  vx_nr  = (*K).vx_nr;
  pvx_nr = (*K).pvx_nr;
  
  /* distinguish between SDFEM and F_p-taylored  matrix types */
  if ( (*K).Bs[0].row_nr !=0 )
    {
      /* is old style (F_p taylored matrix) */
      /* for the velocity part of the output vector, first apply the
	 changing part (Fs), then the pressure part (B^T)
      */

      /* first the Fs part */
      x.len   = dim*vx_nr;
      x.n_max = dim*vx_nr;
      x.V     = &(*vec).V[0];
  
      y.len   = dim*vx_nr;
      y.n_max = dim*vx_nr;
      y.V     = &(*out).V[0];
  
      err=sparse_mul_mat_vec( &(*K).Fs[lvl], &x, &y);
      FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec, 
			       navsto_matrix_tim_vec);

      /* now B^T */
      for (i=0; i<dim; i++)
	{
	  /* out_i += B_i^T * vec_p */
	  x.len   = pvx_nr;
	  x.n_max = pvx_nr;
	  x.V     = &(*vec).V[dim*vx_nr];
      
	  y.len   = vx_nr;
	  y.n_max = vx_nr;
	  y.V     = &(*out).V[i*vx_nr];
      
	  err=sparse_mul_mat_vec_add_trans( &(*K).Bs[i], &x, &y);
	  FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec_add_trans, 
				   navsto_matrix_tim_vec);
	}


      /* out_p =  sum (B_i * vec_i) */
      /* i=0 */
      x.len   = vx_nr;
      x.n_max = vx_nr;
      x.V     = &(*vec).V[0];

      y.len   = pvx_nr;
      y.n_max = pvx_nr;
      y.V     = &(*out).V[dim*vx_nr];

      err=sparse_mul_mat_vec( &(*K).Bs[0], &x, &y);
      FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec, 
			       navsto_matrix_tim_vec);


      /* i=1..dim-1 */
      for (i=1; i<dim; i++)
	{
	  x.len   = vx_nr;
	  x.n_max = vx_nr;
	  x.V     = &(*vec).V[i*vx_nr];

	  err=sparse_mul_mat_vec_add( &(*K).Bs[i], &x, &y);
	  FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec_add, 
				   navsto_matrix_tim_vec);
	}
    }
  else
    {
      /* is SDFEM style matrix, just do a simple multiply */
      err=sparse_mul_mat_vec( &(*K).Fs[lvl], vec, out);
      FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec, 
			       navsto_matrix_tim_vec);
    }

  return SUCCESS;
}

/*FUNCTION*/
int navsto_matrices_write_files(struct navsto_matrix *K, char *basename
/* write all the top-level matrices into files
   (useful to analyse stuff in matlab)

   Input:  K       - navsto_matrix struct holding the boundary
                     projection and preconditioner data 
           basename- the names of the individual files are derived
                     from this by appending "_XX.txt", where XX
                     specifies which of the matrices it is, i.e. one
                     of F, B1, B2, A_p, C_p, M_p, w, Dir
		     (F_p = A_p + C_p)

   Output: (the files)

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
				){
  int  err;
  FIDX i, nlen, lvl, vx_nr;
  FIDX *pdof;

  char *name;

  FILE *out;

  lvl    = (*K).lvl;

  /* allocate space for the name (appropriate length) */
  nlen=strlen(basename)+10; /* +8 for appending stuff +2 safety */
  TRY_MALLOC( name, nlen, char, navsto_matrices_write_files);

  /* output of F, B's, A_p, C_p, M_p straight via sparse_mat_write_file,
     only need to set up the name */
  
  /* write F */
  strcpy(name,basename);
  strcat(name,"_F.txt");
  err=sparse_mat_write_file( &(*K).Fs[lvl], name);
  FUNCTION_FAILURE_HANDLE( err, sparse_mat_write_file, 
			   navsto_matrices_write_files); 

  /* write B1 */
  strcpy(name,basename);
  strcat(name,"_B1.txt");
  err=sparse_mat_write_file( &(*K).Bs[0], name);
  FUNCTION_FAILURE_HANDLE( err, sparse_mat_write_file, 
			   navsto_matrices_write_files); 

  /* write B2 */
  strcpy(name,basename);
  strcat(name,"_B2.txt");
  err=sparse_mat_write_file( &(*K).Bs[1], name);
  FUNCTION_FAILURE_HANDLE( err, sparse_mat_write_file, 
			   navsto_matrices_write_files); 

  
  /* write Ap */
  strcpy(name,basename);
  strcat(name,"_A_p.txt");
  err=sparse_mat_write_file( &(*K).Ap, name);
  FUNCTION_FAILURE_HANDLE( err, sparse_mat_write_file, 
			   navsto_matrices_write_files); 

  /* write Cp */
  strcpy(name,basename);
  strcat(name,"_C_p.txt");
  err=sparse_mat_write_file( &(*K).Cp, name);
  FUNCTION_FAILURE_HANDLE( err, sparse_mat_write_file, 
			   navsto_matrices_write_files); 


  /* write Mp */
  strcpy(name,basename);
  strcat(name,"_M_p.txt");
  err=sparse_mat_write_file( &(*K).M, name);
  FUNCTION_FAILURE_HANDLE( err, sparse_mat_write_file, 
			   navsto_matrices_write_files); 

  /* write the weight vector w */
  strcpy(name,basename);
  strcat(name,"_w.txt");
  /* open the file */
  out=fopen(name, "w");
  if (out==NULL)
    {
      fprintf(stderr,
	      "navsto_matrices_write_files: error opening file \"%s\"\n",
	      name);
      return FAIL;
    }

  vx_nr  = (*K).vx_nr;
  pdof   = (*K).pdof;
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]!=-1)
	{
	  /* they might be unordered, so make sure we store the index
	     as well (matlab counts indices from 1, we from 0) */
	  fprintf(out, " %7d   %+24.16e\n", (int) (pdof[i]+1),
		  (*K).weight[i]);
	}
    }

  /* close the file */
  fclose(out);

  /* write the list of Dirichlet nodes Dir */
  strcpy(name,basename);
  strcat(name,"_Dir.txt");

  /* open the file */
  out=fopen(name, "w");
  if (out==NULL)
    {
      fprintf(stderr,
	      "navsto_matrices_write_files: error opening file \"%s\"\n",
	      name);
      return FAIL;
    }

  /* write the node numbers */
  for (i=0; i<(*K).bn_nr; i++)
    {
      /* (matlab counts indices from 1, we from 0) */
      fprintf(out, " %7d\n", (int) ((*K).nodes[i]+1) );
    }

  /* close the file */
  fclose(out);

  /* free allocated memory */
  free(name);

  return SUCCESS;
}
   

/*FUNCTION*/
int navsto_projector_w_precon(void *arg1, struct vector *in,
			      void *notused,
			      struct vector *out
/* performs preconditioning and the boudary condition projection for
   the linearizations of Navier-Stokes problems, 
     out = P*C^-1*P^T * in
   where P projects the velocity components of boundary nodes to zero,
   (such that addition of a projected vector doesn't change the
   velocity there) and projects the pressure such that it has zero
   mean 

   C^-1 is the Wathen block preconditioner:
        
   C^-1=[I                   ] [ I   ] [F^-1  ] velocity space
        [  (Mp^-1)*Fp*(Ap^-1)]*[-B  I]*[     I] pressure space

   where F is the advection diffusion operator, Mp is the pressure
   space mass matrix, Fp the pressure space advection diffusion
   operator, Ap the pressure space laplace operator
   (F, Fp and Ap are discretisations of these operators)

   Input:  arg1=
           K       - navsto_matrix struct holding the boundary
                     projection and preconditioner data 
	   in      - input vector
	   notused - well, it is not used but in the interface to
                     allow this function to be used as a
                     preconditioner

   Output: out    - (given by reference), P*C^-1*P^T* in

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  int  err, iter;
  FIDX i, j, dim, lvl, vx_nr, pvx_nr;
  double pmean, resi;
  FIDX *pdof;
  struct vector rhs, xi;
  struct navsto_matrix *K;
  int vel_solver; /* used to switch between different preconditioners
		     for the F part */

  K      =  (navsto_matrix*) arg1;
  dim    = (*K).dim;
  lvl    = (*K).lvl;
  vx_nr  = (*K).vx_nr;
  pvx_nr = (*K).pvx_nr;
  pdof   = (*K).pdof;

  if (((*in).len!=(*out).len)||((*in).len<dim*vx_nr+pvx_nr))
    {
      fprintf(stderr,
	      "navsto_projector_w_precon: dimensions don't make sense!\n");
      return FAIL;
    }

  err=vector_alloc( &rhs, dim*vx_nr);
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_projector_w_precon);

  /* velocity components */
  /* apply P^T, rhs = P^T*in  */
  /* copy in to rhs */
  for (i=0; i<dim*vx_nr; i++)
    {
      rhs.V[i]=(*in).V[i];
    }
  for (j=0; j<dim; j++)
    for (i=0; i<(*K).bn_nr; i++)
      {
	rhs.V[ j*vx_nr + (*K).nodes[i] ]   = 0.0 ;
      }

  /* { double normrhs=0.0;
     for (i=0; i<dim*vx_nr; i++)
     normrhs+=rhs.V[i]*rhs.V[i];
     printf("normrhs=%e   ", normrhs); } /* */

  /* prepare to solve F out = rhs */
  xi.len=dim*vx_nr;
  xi.n_max=dim*vx_nr;
  xi.V=&(*out).V[0];

  /* solve F out = rhs */
  /* err=GMRES( 30, 500, 2, (*K).innereps, 0, &xi, &resi, &iter,
     navsto_advecdiff_tim_vec, navsto_projector_vel,
     K, &rhs, NULL); /* */
  /* {
     struct vector invdiaghelp;
     invdiaghelp.len   = dim*vx_nr;
     invdiaghelp.n_max = dim*vx_nr;
     invdiaghelp.V     = K->mg->invdiag;
     err=GMRES( 30, 500, 2, (*K).innereps, 0, &xi, &resi, &iter,
     navsto_advecdiff_tim_vec, vector_diag_scale,
     K, &rhs, &invdiaghelp); 
     } /* */

  vel_solver= FP_VELOCITY_SOLVER;
  if ((*K).innersteps[0]<0)
    {
      switch (vel_solver)
	{
	case 0:
	  err=GMRES( 80, 500, 2, (*K).innereps, 0, &xi, &resi, &iter,
		     navsto_advecdiff_tim_vec, navsto_projector_vel,
		     K, &rhs, NULL); /* */
	  break;
	case 1:
#define MG_GMRES_RESTART 120
	  err=GMRES( MG_GMRES_RESTART, 500, 2, (*K).innereps, 0,
		     &xi, &resi, &iter,
		     navsto_advecdiff_tim_vec, navsto_projector_vel_MG,
		     K, &rhs, NULL); /* */
	  break;
	case 2:
	  err=GMRES( MG_GMRES_RESTART, 500, 2, (*K).innereps, 0,
		     &xi, &resi, &iter,
		     navsto_advecdiff_tim_vec, navsto_projector_vel_W_cycle,
		     K, &rhs, NULL); /* */
	  break;
	default:
	  fprintf(stderr, "navsto_projector_w_precon: "
		  "unknown velocity solver!\n");
	  return FAIL;
	}
      if (iter>1)
	{
	  (*K).innersteps[0]=iter;
	  printf("set iter=%d\n", iter);
	}
      else 
	printf("postpone setting iter\n");
    }
  else
    {
      switch (vel_solver)
	{
	case 0:
	  err=GMRES( 80, (*K).innersteps[0], 3, 0.0, 0, &xi, &resi, &iter,
		     navsto_advecdiff_tim_vec, navsto_projector_vel,
		     K, &rhs, NULL); /* */
	  break;
	case 1:
	  err=GMRES( MG_GMRES_RESTART, (*K).innersteps[0], 3, 0.0, 0,
		     &xi, &resi, &iter,
		     navsto_advecdiff_tim_vec, navsto_projector_vel_MG,
		     K, &rhs, NULL); /* */
	  break;
	case 2:
	  err=GMRES( MG_GMRES_RESTART, (*K).innersteps[0], 3, 0.0, 0,
		     &xi, &resi, &iter,
		     navsto_advecdiff_tim_vec, navsto_projector_vel_W_cycle,
		     K, &rhs, NULL); /* */
	  break;
	default:
	  fprintf(stderr, "navsto_projector_w_precon: "
		  "unknown velocity solver!\n");
	  return FAIL;
	}
    }
  /* if (err=10) err=SUCCESS; /* */
  FUNCTION_FAILURE_HANDLE( err, GMRES, navsto_projector_w_precon);
  (*K).innercount[0]+=iter;

  /* { double normres=0.0;
     struct vector res;
     err=vector_alloc(&res,rhs.len);
     err=navsto_advecdiff_tim_vec(K, &xi, &res);
     for (i=0; i<dim*vx_nr; i++)
     res.V[i] -= rhs.V[i];
     for (j=0; j<dim; j++)
     for (i=0; i<(*K).bn_nr; i++)
     res.V[ j*vx_nr + (*K).nodes[i] ]   = 0.0 ;
     for (i=0; i<dim*vx_nr; i++)
     normres+=res.V[i]*res.V[i];
     printf("normres=%e\n", normres);
     vector_free(&res); } /* */


  /* apply P, out = P*out  */
  for (j=0; j<dim; j++)
    for (i=0; i<(*K).bn_nr; i++)
      {
	(*out).V[ j*vx_nr+ (*K).nodes[i] ]   = 0.0 ;
      }


  /* pressure components */
  /* apply P^T, rhs = -P^T*in  */
  /* copy in to rhs */
  for (i=0; i<pvx_nr; i++)
    {
      rhs.V[i]=  -(*in).V[dim*vx_nr+i];
    }
  rhs.len=pvx_nr;
  /* get the summ of all pressure components */
  pmean=0.0;
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]!=-1)
	{
	  pmean+= rhs.V[ pdof[i] ];
	}
    }

  /* subtract weight[i] times the pressure summ */
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]!=-1)
	{
	  rhs.V[pdof[i] ] -= pmean * (*K).weight[i];
	}
    }


  /* P^T done, now C^{-1} */ 
  /* rhs=[B -I]*[out.vel; rsh] */
  for (j=0; j<dim; j++)
    {
      xi.len=vx_nr;
      xi.n_max=vx_nr;
      xi.V=&(*out).V[j*vx_nr];
      err=sparse_mul_mat_vec_add( &(*K).Bs[j], &xi, &rhs);
      FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec_add,
			       stokes_projector_w_precon);
    }
  /* rhs=-rhs (= [-B P^T]*[out.vel; in.p]) */
  for (i=0; i<vx_nr; i++)
    {
      if ((*K).pdof[i]!=-1)
	{
	  rhs.V[(*K).pdof[i] ]= -rhs.V[(*K).pdof[i] ];
	}
    }

  /* solve Ap out = rhs */
  xi.len=pvx_nr;
  xi.n_max=pvx_nr;
  xi.V=&(*out).V[dim*vx_nr];
  /* err=PCG( 10000, 2, (*K).innereps, 0, &xi, &resi, &iter,
     sparse_mul_mat_vec, NULL, &(*K).Ap, &rhs, NULL); /* */
  if ((*K).innersteps[dim] <0)
    {
      err=PCG( 10000, 2, (*K).innereps, 0, &xi, &resi, &iter,
	       sparse_mul_mat_vec, navsto_precon_p_lapl_bpx,
	       &(*K).Ap, &rhs, K); 
      (*K).innersteps[dim]=iter;
    }
  else
    {
      err=PCG( (*K).innersteps[dim], 0, 0.0, 0, &xi, &resi, &iter,
	       sparse_mul_mat_vec, navsto_precon_p_lapl_bpx,
	       &(*K).Ap, &rhs, K); 
      if (err==10) err=SUCCESS; 
    }
  /* err=PCG( 20, 2, (*K).innereps, 0, &xi, &resi, &iter,
     sparse_mul_mat_vec, navsto_precon_p_lapl_bpx,
     &(*K).Ap, &rhs, K); /* */
  /* if (err=10) err=SUCCESS; /* */
  FUNCTION_FAILURE_HANDLE( err, PCG, navsto_projector_w_precon);
  (*K).innercount[dim]+=iter;

  /* multiply by Fp:
     rhs = Fp * out ( = (Ap + Cp)*out )*/
  err=sparse_mul_mat_vec(&(*K).Ap, &xi, &rhs);
  FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec,
			   navsto_projector_w_precon);
  err=sparse_mul_mat_vec_add(&(*K).Cp, &xi, &rhs);
  FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec_add,
			   navsto_projector_w_precon);

  /* solve Mp out = rhs */
  if ((*K).innersteps[dim+1] <0)
    {
      err=PCG( 10000, 2, (*K).innereps, 0, &xi, &resi, &iter,
	       sparse_mul_mat_vec, vector_diag_scale,
	       &(*K).M, &rhs, &(*K).Minvdiag);
      (*K).innersteps[dim+1]=iter;
    }
  else
    {
      err=PCG( (*K).innersteps[dim+1], 0, 0.0, 0, &xi, &resi, &iter,
	       sparse_mul_mat_vec, vector_diag_scale, 
	       &(*K).M, &rhs, &(*K).Minvdiag);
      if (err==10) err=SUCCESS; /* */
    }
  FUNCTION_FAILURE_HANDLE( err, PCG, navsto_projector_w_precon);
  (*K).innercount[dim+1]+=iter;

  /* C^{-1} done, now P */
  /* get the mean pressure */
  pmean=0.0;
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]!=-1)
	{
	  pmean+= (*out).V[dim*vx_nr + pdof[i] ] * (*K).weight[i];
	}
    }

  /* subtract the mean pressure */
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]!=-1)
	{
	  (*out).V[dim*vx_nr + pdof[i] ] -= pmean;
	}
    }

  /* done */
  vector_free( &rhs );

  return SUCCESS;
}


/*FUNCTION*/
int navsto_advecdiff_tim_vec(void *arg1, struct vector *vec,
			     struct vector *out
/* multiplies the stiffness matrix K of the advection diffusion
   operator from left to the vector vec,
   
   out = K * vec;

   Input:  arg1=
           K       - Navier-Stokes matrix struct
           vec     - vector

   Output: out     - resulting vector

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		     ){
  struct navsto_matrix *K;

  int  err;
  FIDX lvl;

  K      =  (navsto_matrix*) arg1;
  lvl    = (*K).lvl;
  
  err=sparse_mul_mat_vec( &(*K).Fs[lvl], vec, out);
  FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec, 
			   navsto_advecdiff_tim_vec);


  return SUCCESS;
}



/*FUNCTION*/
int navsto_projector_vel(void *arg1, struct vector *in,
			 void *arg3, struct vector *out
/* performs the boudary condition projection for the advection
   diffusion subproblems of the Wathen block preconditioner for
   Navier-Stokes problems, 
     out = P*I*P^T * in
   where P projects the velocity components of boundary nodes to zero,
   (such that addition of a projected vector doesn't change the
   velocity there)
   
   Input:  arg1=
           K       - navsto_matrix struct holding the boundary
                     projection data 
           in      - input vector
	   notused - well, it is not used but in the interface to
                     allow this function to be used as a
                     "preconditioner" 

   Output: out    - (given by reference), P*P^T* in

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  FIDX i, j, dim, vx_nr, len;

  struct navsto_matrix *K;

  K      =  (navsto_matrix*) arg1;
  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr;
  len    = dim * vx_nr;

  if ((len!=(*in).len)||(len!=(*out).len))
    {
      fprintf(stderr,
	      "navsto_projector_vel: in or out length doesn't match\n");
      return FAIL;
    }

  /* copy in to out */
  for (i=0; i<len; i++)
    {
      (*out).V[i]=(*in).V[i]; 
    }


  /* in our case: P^T P = P, so just apply P once  */
  for (j=0; j<dim; j++)
    {
      for (i=0; i<(*K).bn_nr; i++)
	{
	  (*out).V[ j*vx_nr + (*K).nodes[i] ]   = 0.0 ;
	}
    }
  return SUCCESS;
}




/*FUNCTION*/
int navsto_projector_vel_MG(void *arg1, struct vector *in,
			    void *arg3, struct vector *out
/* performs the boudary condition projection and multigrid
   preconditioning for the advection diffusion subproblems of the
   Wathen block preconditioner for Navier Stokes problems, 
     out = P*C^-1*P^T * in
   where P projects the velocity components of boundary nodes to zero,
   (such that addition of a projected vector doesn't change the
   velocity there), and C^-1 repressents the action of the multigrid
   preconditioning, that is one V cycle
   
   Input:  arg1=
           K       - navsto_matrix struct holding the boundary
                     projection and preconditioner data 
           in      - input vector
	   notused - well, it is not used but in the interface to
                     allow this function to be used as a
                     preconditioner

   Output: out    - (given by reference), P*P^T* in

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  int  err, dir2;
  FIDX i, j, d, len, dir1;

  struct navsto_matrix *K;

  FIDX dim, vx_nr, eh_w, eh_nr, eg_w, eg_nr;
  struct vector xi, bi, invdiagi, dxi;
  struct mesh *m;
  struct multilvl *ml;
  struct mgdata *mg;
  double *xl, *dxl, *bl, *invdiag;

  FIDX lvl, lmax, lmin, lvl_vx, smooths, vcycles, vccount;

  double drow;

  double normres, normb, stop_eps;

  double alpha, ennorm;

  const double damping = MG_SMOOTHER_DAMPING;

#if ((STABTYPE==3)||(STABTYPE==4))
  /* coarse meshes use linear elements  ==> use linear interpolation
     in MG, phi1[child*bas_n1+father]= value of the father's basis
     function at node child */
  double *phi2, phi1[9*6]={  0.5, 0, 0, 0.5, 0, 0, /* 0: 0,3 */
			     0, 0, 0, 0.5, 0, 0.5, /* 1: 3,5 */
			     0.5, 0, 0, 0, 0, 0.5, /* 2: 0,5 */
			     0, 0.5, 0, 0, 0.5, 0, /* 3: 1,4 */
			     0, 0, 0, 0.5, 0.5, 0, /* 4: 3,4 */
			     0, 0.5, 0, 0.5, 0, 0, /* 5: 1,3 */
			     0, 0, 0.5, 0, 0, 0.5, /* 6: 2,5 */
			     0, 0, 0, 0, 0.5, 0.5, /* 7: 4,5 */
			     0, 0, 0.5, 0, 0.5, 0, /* 8: 2,4 */ };
#endif

  K      =  (navsto_matrix*) arg1;
  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr;
  len    = dim * vx_nr;

  if ((len!=(*in).len)||(len!=(*out).len))
    {
      fprintf(stderr,
	      "navsto_projector_vel_MG: in or out length doesn't match\n");
      return FAIL;
    }

  m      = (*K).msh;
  ml     = (*K).mld;
  mg     = (*K).mg;


#if ((STABTYPE==3)||(STABTYPE==4))
/* coarse meshes use linear elements  ==> use linear interpolation,
   set up phi2 */
  phi2=mg->phih;
#endif


  xl      = (*mg).xl;
  dxl     = (*mg).dxl;
  bl      = (*mg).bl;
  invdiag = (*mg).invdiag;

  smooths  = (*mg).smooths;
  vcycles  = (*mg).vcycles;
  stop_eps = (*mg).stop_eps;

  vx_nr = (*K).vx_nr;
  dim   = (*K).dim;
  eg_nr = (*m).eg_nr;
  eg_w  = (*m).eg_w;
  eh_nr = (*m).eh_nr;
  eh_w  = (*m).eh_w;

  lmax = (*ml).lmax;

  lmin=0; /*lmax-2;*/

  if (lmin<0) lmin=0;

  if (((*in).len!=(*out).len)||((*in).len!=dim*vx_nr))
    {
      fprintf(stderr,
	      "navsto_projector_vel_MG: dimensions don't make sense!\n");
      return FAIL;
    }
  if ( (*K).lvl!=lmax ) 
    {
      fprintf(stderr, "navsto_projector_vel_MG: "
	      "K.lvl and K.ml.lmax mismatch!\n");
      return FAIL;
    }

  /* copy in to bl */
  for (i=0; i<(*in).len; i++)
    {
      bl[i]=(*in).V[i];
    }

  /* apply P^T */
  for (i=0; i<(*K).bn_nr; i++)
    {
      for (d=0; d<dim; d++)
	bl[ (*K).nodes[i]+d*vx_nr ] = 0.0 ;
    }

  /* apply C^-1 */

  /* set the vectors for multilpication with the stiffness matrix */
  dxi.V     = &drow;
  dxi.n_max = 1;
  dxi.len   = 1;


  if (vcycles==0)
    {
      fprintf(stderr, "navsto_projector_vel_MG: vcycles==0 not "
	      "defined\n");
      return FAIL;
    }
  if (stop_eps<0.0)
    {
      fprintf(stderr, "navsto_projector_vel_MG: stop_eps<0 not "
	      "sensible\n");
      return FAIL;
    }
 
  if (vcycles<0)
    {
      /* compute normb */
      normb=0.0;
      for (i=0; i<(*in).len; i++)
	{
	  normb+=bl[i]*bl[i];
	}
      normb=sqrt(normb);
    }
  else
    {
      normb=1.0;
      stop_eps=1;
    }
  /* make sure the process starts */
  normres=2*stop_eps*normb;

  /* perform at most vcycles V-cycles */
  for (vccount=0; (vccount<abs(vcycles))&&(normres>stop_eps*normb);
       vccount++)
    {
      /* V-cycle downward */
      for (lvl=lmax; lvl>=lmin; lvl--)
	{ 
	  lvl_vx=(*ml).nlevl[lvl]-(*ml).nlevl[lvl+1];
	  
	  if ((vccount==0)||(lvl<lmax))
	    {
	      /* set xl=0 */
	      for (j=(*ml).nlevl[lvl+1]; j<(*ml).nlevl[lvl]; j++)
		xl[j]=0.0;
	    }

	  /* make x_l, b_l, invdiag_l accessible as vector */
	  xi.len=lvl_vx;
	  xi.n_max=lvl_vx;
	  xi.V=&xl[(*ml).nlevl[lvl+1]];

	  bi.len=lvl_vx;
	  bi.n_max=lvl_vx;
	  bi.V=&bl[(*ml).nlevl[lvl+1]];

	  invdiagi.len=lvl_vx;
	  invdiagi.n_max=lvl_vx;
	  invdiagi.V=&invdiag[(*ml).nlevl[lvl+1]];

	  if ((lvl==0)&&((*K).cmat.nr!=0))
	    {
	      /* use the coarse grid solver */
	      struct vector bcoarse;
	      bcoarse.len=lvl_vx;
	      bcoarse.n_max=lvl_vx;
	      bcoarse.V=&bl[(*ml).nlevl[lvl+1]];
	      err=coarse_mat_solve( &(*K).cmat, NoTrans,
				    &bcoarse, &xi);
	      FUNCTION_FAILURE_HANDLE( err, coarse_mat_solve,
				       navsto_projector_vel_MG);    
	    }
	  else
	    {
	      /* do smooths Gauss-Seidel sweeps forward */
#ifdef MG_FOUR_DIR_SWEEPS
	      for (dir2=-1; dir2<=+1; dir2+=2)
		for (dir1=0; dir1<dim; dir1++)
		  {
		    err=sparse_GS_sweep_sorted( &(*K).Fs[lvl], &bi,
						&invdiagi,
						&(*K).mlsorters[dir1][
					          (*ml).nlevl[lvl+1]],dir2,
						smooths, damping, &xi);
		    FUNCTION_FAILURE_HANDLE( err, sparse_GS_sweep_fwd,
					     navsto_projector_vel_MG);
		  }
#else
	      err=sparse_GS_sweep_fwd( &(*K).Fs[lvl], &bi,
				       &invdiagi, smooths, &xi);
	      FUNCTION_FAILURE_HANDLE( err, sparse_GS_sweep_fwd,
				       navsto_projector_vel_MG);
#endif
		    
	      if (lvl==0)
		{
		  /* do smooths Gauss-Seidel sweeps backward */
#ifdef MG_FOUR_DIR_SWEEPS
		  for (dir2=-1; dir2<=+1; dir2+=2)
		    for (dir1=0; dir1<dim; dir1++)
		      {
			err=sparse_GS_sweep_sorted( &(*K).Fs[lvl], &bi,
						    &invdiagi,
						    &(*K).mlsorters[dir1][
						       (*ml).nlevl[lvl+1]],
						    dir2,
						    smooths, damping, &xi);
			FUNCTION_FAILURE_HANDLE( err, sparse_GS_sweep_fwd,
						 navsto_projector_vel_MG);
		      }
#else
		  err=sparse_GS_sweep_bwd( &(*K).Fs[lvl], &bi,
					   &invdiagi, smooths, &xi);
		  FUNCTION_FAILURE_HANDLE( err, sparse_GS_sweep_bwd,
					   navsto_projector_vel_MG);
#endif
		}
	    }
	  
	  if (lvl>0)
	    {
	      /* compute the residual on this lvl */
	      /* compute the matrix vector product,
		 Cm*x */
	      dxi.V   = &dxl[(*ml).nlevl[lvl+1]];
	      dxi.len = lvl_vx;
	      dxi.n_max = lvl_vx;
	      err=sparse_mul_mat_vec( &(*K).Fs[lvl], &xi, &dxi);
	      FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec,
				       navsto_projector_vel_MG);

	      /* now change dxl to rl=bl-A*xl */
	      for (j=(*ml).nlevl[lvl+1]; j<(*ml).nlevl[lvl]; j++)
		dxl[j]=bl[j]-dxl[j];

	      /* restrict rl to r_l-1 */
#if ((STABTYPE==3)||(STABTYPE==4))
/* coarse meshes use linear elements  ==> use linear interpolation */
	      if (lvl==lmax) mg->phih=phi2; else mg->phih=phi1; 
#endif
	      err=mg_restrict_t2( mg, lvl, lvl-1, dxl);
	      FUNCTION_FAILURE_HANDLE( err, mg_restrict_2,
				       navsto_projector_vel_MG);
	      /* copy r_l-1 to b_l-1 */
	      for (j=(*ml).nlevl[lvl]; j<(*ml).nlevl[lvl-1]; j++)
		{
		  bl[j]=dxl[j];
		}

	      /* apply the projector */
	      for (i=0; i<(*K).bn_nr; i++)
		for (d=0; d<dim; d++)
		  {
		    FIDX node, nodel;
		    node=d*vx_nr+(*K).nodes[i];
		    MLVLFINDENTRY(nodel, node, lvl-1, *ml);
		    if (nodel>=0)
		      {
			bl[nodel]=0.0;
		      }
		  }

	      dxi.V     = &drow;
	      dxi.n_max = 1;
	      dxi.len   = 1;
	    }
	} /* end V-cycle downward */

      /* V-cycle upward */
      for (lvl=lmin+1; lvl<=lmax; lvl++)
	{ 
	  lvl_vx=(*ml).nlevl[lvl]-(*ml).nlevl[lvl+1];

	  /* apply the update computed in the lower level */
	  /* copy the update to dx */
	  for (j=(*ml).nlevl[lvl]; j<(*ml).nlevl[lvl-1]; j++)
	    dxl[j]=xl[j];
	  /* interpolate dx to lvl */
#if ((STABTYPE==3)||(STABTYPE==4))
/* coarse meshes use linear elements  ==> use linear interpolation */
	  if (lvl==lmax) mg->phih=phi2; else mg->phih=phi1; 
#endif

	  err=mg_interpolate_t2( mg, lvl-1, lvl, dxl);
	  FUNCTION_FAILURE_HANDLE( err, mg_interpolate_t2,
				   navsto_projector_vel_MG);

	  /* if necessary, compute the scaling for the factor to give
	     min energy for the residual */
	  if ((*K).MGscale[lvl]==0.0)
	    {
#ifdef MG_MIN_EN
	      if (lvl==lmax)
		{
		  (*K).MGscale[lvl]=1.0;
		}
	      else
		{
		  alpha=0.0;
		  for (j=(*ml).nlevl[lvl]; j<(*ml).nlevl[lvl-1]; j++)
		    alpha+=bl[j]*xl[j];
		  /* make dx_l, accessible as vector */
		  xi.len=lvl_vx;
		  xi.n_max=lvl_vx;
		  xi.V=&dxl[(*ml).nlevl[lvl+1]];
		  
		  err=sparse_vec_mat_vec(&(*K).Fs[lvl], &xi, &xi, &ennorm);
		  FUNCTION_FAILURE_HANDLE( err, sparse_vec_mat_vec,
					   navsto_projector_vel_MG);
		  if (ennorm!=0.0)
		    alpha=alpha/ennorm;
		  else
		    alpha=1.0;
		  
		  printf("\nlvl=%d   alpha=%e", (int) lvl, alpha);
		  /* negative alphas are not to be trusted, thus replaced
		     by alpha=1*/
		  if (alpha<0.25) alpha=1.0;

		  (*K).MGscale[lvl]=alpha;
		}
#else
	      (*K).MGscale[lvl]=1.0;
#endif
	    }

	  alpha=(*K).MGscale[lvl];

	  /* apply the update to xl */
	  for (j=(*ml).nlevl[lvl+1]; j<(*ml).nlevl[lvl]; j++)
	    xl[j]+=alpha*dxl[j];

	  /* make x_l, b_l, invdiag_l accessible as vector */
	  xi.len=lvl_vx;
	  xi.n_max=lvl_vx;
	  xi.V=&xl[(*ml).nlevl[lvl+1]];

	  bi.len=lvl_vx;
	  bi.n_max=lvl_vx;
	  bi.V=&bl[(*ml).nlevl[lvl+1]];

	  invdiagi.len=lvl_vx;
	  invdiagi.n_max=lvl_vx;
	  invdiagi.V=&invdiag[(*ml).nlevl[lvl+1]];
	  
	  /* do smooths Gauss-Seidel sweeps backward */
#ifdef MG_FOUR_DIR_SWEEPS
	  for (dir2=+1; dir2>=-1; dir2-=2)
	    for (dir1=dim-1; dir1>=0; dir1--)
	      {
		err=sparse_GS_sweep_sorted( &(*K).Fs[lvl], &bi,
					    &invdiagi,
					    &(*K).mlsorters[dir1][
					      (*ml).nlevl[lvl+1]],dir2,
					    smooths, damping, &xi);
		FUNCTION_FAILURE_HANDLE( err, sparse_GS_sweep_fwd,
					 navsto_projector_vel_MG);
	      }
#else
	  err=sparse_GS_sweep_bwd( &(*K).Fs[lvl], &bi,
				   &invdiagi, smooths, &xi);
	  FUNCTION_FAILURE_HANDLE( err, sparse_GS_sweep_bwd,
				   navsto_projector_vel_MG);
#endif

	} /* end V-cycle upward */

      if (vcycles<0)
	{
	  /* compute the residual norm */
	  /* out=A*x */
	  err=sparse_mul_mat_vec( &(*K).Fs[lmax], &xi, out);
	  FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec,
				   navsto_projector_vel_MG);
	  /* out -= rhs = in */
	  for (i=0; i<(*out).len; i++)
	    {
	      (*out).V[i]-=bl[i];
	    }
	  /* apply P */
	  for (i=0; i<(*K).bn_nr; i++)
	    {
	      (*out).V[ (*K).nodes[i] ]   = 0.0 ;
	    }
	  normres=0.0;
	  for (i=0; i<(*out).len; i++)
	    {
	      normres+=(*out).V[i]*(*out).V[i];
	    }
	  normres=sqrt(normres);
	  /* printf("vccount=%3d    normres=%+8.1e\n", vccount, normres); /* */
	}
    } /* end V-cycles loop */
  
  (*mg).vccount+=vccount;



  /* copy xl to out */
  for (i=0; i<(*out).len; i++)
    {
      (*out).V[i]=xl[i];
    }

  /* apply P */
  for (i=0; i<(*K).bn_nr; i++)
    {
      for (d=0; d<dim; d++)
	(*out).V[ (*K).nodes[i] +d*vx_nr ] = 0.0 ;
    }

  /* done */
#if ((STABTYPE==3)||(STABTYPE==4))
/* coarse meshes use linear elements  ==> use linear interpolation,
   make sure phi2 is in place */
  mg->phih=phi2;
#endif

  return SUCCESS;
}









/*****************************************************************/
/*****************************************************************/
/*****************************************************************/
/*****************************************************************/
/*****************************************************************/









/*FUNCTION*/
int navsto_projector_vel_W_cycle(void *arg1, struct vector *in,
				 void *arg3, struct vector *out
/* performs the boudary condition projection and multigrid
   preconditioning for the advection diffusion subproblems of the
   Wathen block preconditioner for Navier Stokes problems, 
     out = P*C^-1*P^T * in
   where P projects the velocity components of boundary nodes to zero,
   (such that addition of a projected vector doesn't change the
   velocity there), and C^-1 repressents the action of the multigrid
   preconditioning, that is one W cycle
   
   Input:  arg1=
           K       - navsto_matrix struct holding the boundary
                     projection and preconditioner data 
           in      - input vector
	   notused - well, it is not used but in the interface to
                     allow this function to be used as a
                     preconditioner

   Output: out    - (given by reference), P*P^T* in

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  int  err;
  FIDX i, j, d, len;

  struct navsto_matrix *K;

  FIDX dim, vx_nr;

  struct multilvl *ml;
  struct mgdata *mg;
  double *xl, *dxl, *bl, *invdiag;

  FIDX lvl, lmax, lvlmin, vcycles;

#if ((STABTYPE==3)||(STABTYPE==4))
  fprintf(stderr, "navsto_projector_vel_W_cycle: "
	  "called for incompatible stabtype!\n");
  return FAIL;
#endif

  K      =  (navsto_matrix*) arg1;
  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr;
  len    = dim * vx_nr;

  if ((len!=(*in).len)||(len!=(*out).len))
    {
      fprintf(stderr,
	      "navsto_projector_vel_W_cycle: "
	      "in or out length doesn't match\n");
      return FAIL;
    }

  ml     = (*K).mld;
  mg     = (*K).mg;

  xl      = (*mg).xl;
  dxl     = (*mg).dxl;
  bl      = (*mg).bl;
  invdiag = (*mg).invdiag;

  vcycles  = (*mg).vcycles;

  vx_nr = (*K).vx_nr;
  dim   = (*K).dim;

  lmax = (*ml).lmax;
  lvl  = lmax;

  lvlmin=0;

  if (((*in).len!=(*out).len)||((*in).len!=dim*vx_nr))
    {
      fprintf(stderr,
	      "navsto_projector_vel_W_cycle: dimensions don't make sense!\n");
      return FAIL;
    }
  if ( (*K).lvl!=lmax ) 
    {
      fprintf(stderr, "navsto_projector_vel_W_cycle: "
	      "K.lvl and K.ml.lmax mismatch!\n");
      return FAIL;
    }

  /* copy in to bl */
  for (i=0; i<(*in).len; i++)
    {
      bl[i]=(*in).V[i];
    }

  /* apply P^T */
  for (i=0; i<(*K).bn_nr; i++)
    {
      for (d=0; d<dim; d++)
	bl[ (*K).nodes[i]+d*vx_nr ] = 0.0 ;
    }

  /* apply C^-1 */


  if (vcycles==0)
    {
      fprintf(stderr, "navsto_projector_vel_W_cycle: vcycles==0 not "
	      "defined\n");
      return FAIL;
    }

  /* set xl=0 */
  for (j=(*ml).nlevl[lvl+1]; j<(*ml).nlevl[lvl]; j++)
    xl[j]=0.0;
  
  /* perform vcycles CGCs */
  /*  err=navsto_projector_vel_W_CGC( K, lvl, lvlmin, vcycles); /* */
  err=navsto_projector_vel_W_CGC( K, lvl, lvlmin, 1);
  FUNCTION_FAILURE_HANDLE( err, navsto_projector_vel_W_CGC,
			   navsto_projector_vel_W_cycle);


  /* copy xl to out */
  for (i=0; i<(*out).len; i++)
    {
      (*out).V[i]=xl[i];
    }

  /* apply P, just to make sure */
  for (i=0; i<(*K).bn_nr; i++)
    {
      for (d=0; d<dim; d++)
	(*out).V[ (*K).nodes[i] +d*vx_nr ] = 0.0 ;
    }

  /* done */
  return SUCCESS;
}






/*FUNCTION*/
int navsto_projector_vel_W_CGC(struct navsto_matrix *K,
			       FIDX lvl, FIDX lvlmin, FIDX W_arms
/* forms a W cycle multigrid scheme by recursively doing

   loop W_arms times:
      smooth_bwd
          calc residual, restrict
               navsto_projector_vel_W_CGC 
          inject solution, add to current level
      smooth_fwd

   

   
   Input:  K       - navsto_matrix struct holding the boundary
                     projection and preconditioner data 
           lvl     - current level
           lvlmin  - lowest level of the hierarchy to be used
	   W_arms  - number of CGC loops, see description above

   In/Out: K.mg    - the lvl part contains in and output: bl=rhs(in),
                     xl=solution(in(should be initialised,e.g.=0)+out),
		     dxl is modified, ALL LOWER LEVELS ARE MODIFIED

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  int  err;
  FIDX i, j, d;

  FIDX dim, vx_nr;
  struct vector xi, bi, invdiagi, dxi;
  struct mesh *m;
  struct multilvl *ml;
  struct mgdata *mg;
  double *xl, *dxl, *bl, *invdiag;

  FIDX lmax, lvl_vx, smooths, Wacount;

  double alpha;
  double damping=MG_SMOOTHER_DAMPING;


  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr;

  m      = (*K).msh;
  ml     = (*K).mld;
  mg     = (*K).mg;

  xl      = (*mg).xl;
  dxl     = (*mg).dxl;
  bl      = (*mg).bl;
  invdiag = (*mg).invdiag;

  smooths  = (*mg).smooths;

  vx_nr = (*K).vx_nr;
  dim   = (*K).dim;

  lmax   = (*ml).lmax;
  lvl_vx = (*ml).nlevl[lvl]-(*ml).nlevl[lvl+1];

  if (( lvl>lmax ) || ( lvl<lvlmin ))
    {
      fprintf(stderr, "navsto_projector_vel_W_CGC: "
	      "lvl out of range! lvl=%d\n", (int) lvl);
      return FAIL;
    }

  /* rhs_lvl is in bl, initialised x_lvl is in xl */

  if (W_arms<=0)
    {
      fprintf(stderr, "navsto_projector_vel_W_CGC: "
	      "W_arms<=0 does not make sense\n");
      return FAIL;
    }

  /* test if we can apply the coarse grid solver */
  if ((lvl==0)&&((*K).cmat.nr!=0))
    {
      struct vector bcoarse;

      /* make x_l accessible as vector */
      xi.len=lvl_vx;
      xi.n_max=lvl_vx;
      xi.V=&xl[(*ml).nlevl[lvl+1]];

      /* use the coarse grid solver */
      bcoarse.len=lvl_vx;
      bcoarse.n_max=lvl_vx;
      bcoarse.V=&bl[(*ml).nlevl[lvl+1]];
      err=coarse_mat_solve( &(*K).cmat, NoTrans,
			    &bcoarse, &xi);
      FUNCTION_FAILURE_HANDLE( err, coarse_mat_solve,
			       navsto_projector_vel_W_CGC);    
    }
  else
    {
      /* perform W_arms smooths anc coarse grid corrections */
      for (Wacount=0; Wacount<W_arms; Wacount++)
	{
	  /* make x_l, b_l, invdiag_l accessible as vector */
	  xi.len=lvl_vx;
	  xi.n_max=lvl_vx;
	  xi.V=&xl[(*ml).nlevl[lvl+1]];

	  bi.len=lvl_vx;
	  bi.n_max=lvl_vx;
	  bi.V=&bl[(*ml).nlevl[lvl+1]];

	  invdiagi.len=lvl_vx;
	  invdiagi.n_max=lvl_vx;
	  invdiagi.V=&invdiag[(*ml).nlevl[lvl+1]];


	  /* do smooths Gauss-Seidel sweeps backward */
#ifdef USE_SORTED_GS
	  err=sparse_GS_sweep_sorted( &(*K).Fs[lvl], &bi, &invdiagi,
				      &(*mg).sorterl[(*ml).nlevl[lvl+1]],
				      -1, smooths, damping, &xi);
	  FUNCTION_FAILURE_HANDLE( err, sparse_GS_sweep_sorted,
				   navsto_projector_vel_W_CGC);
#else
	  err=sparse_GS_sweep_bwd( &(*K).Fs[lvl], &bi,
				   &invdiagi, smooths, &xi);
	  FUNCTION_FAILURE_HANDLE( err, sparse_GS_sweep_fwd,
				   navsto_projector_vel_W_CGC);
#endif

	  /* do the CGC, coarse grid correction */
	  if (lvl>lvlmin)
	    {
	      /* compute the residual on this lvl */
	      /* compute the matrix vector product,
		 Cm*x */
	      dxi.V   = &dxl[(*ml).nlevl[lvl+1]];
	      dxi.len = lvl_vx;
	      dxi.n_max = lvl_vx;
	      err=sparse_mul_mat_vec( &(*K).Fs[lvl], &xi, &dxi);
	      FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec,
				       navsto_projector_vel_W_CGC);

	      /* now change dxl to rl=bl-A*xl */
	      for (j=(*ml).nlevl[lvl+1]; j<(*ml).nlevl[lvl]; j++)
		dxl[j]=bl[j]-dxl[j];

	      /* restrict the residual in dxl to lvl-1 */
	      err=mg_restrict_t2( mg, lvl, lvl-1, dxl);
	      FUNCTION_FAILURE_HANDLE( err, mg_restrict_2,
				       navsto_projector_vel_W_CGC);
	      /* copy the coarse residual from dx_l-1 to b_l-1 */
	      for (j=(*ml).nlevl[lvl]; j<(*ml).nlevl[lvl-1]; j++)
		{
		  bl[j]=dxl[j];
		}

	      /* set xl=0 on lvl-1 */
	      for (j=(*ml).nlevl[lvl]; j<(*ml).nlevl[lvl-1]; j++)
		xl[j]=0.0;

	      /* apply the projector */
	      for (i=0; i<(*K).bn_nr; i++)
		for (d=0; d<dim; d++)
		  {
		    FIDX node, nodel;
		    node=d*vx_nr+(*K).nodes[i];
		    MLVLFINDENTRY(nodel, node, lvl-1, *ml);
		    if (nodel>=0)
		      {
			bl[nodel]=0.0;
		      }
		  }
	      
	      /* do the CGC (recursive call) */
	      err=navsto_projector_vel_W_CGC( K, lvl-1, lvlmin, MG_W_ARMS);
	      FUNCTION_FAILURE_HANDLE( err, navsto_projector_vel_W_CGC,
				       navsto_projector_vel_W_CGC);

	      /* result is in xl (lvl-1) */
	      /* copy result to dxl (lvl-1), interpolate to lvl */
	      for (j=(*ml).nlevl[lvl]; j<(*ml).nlevl[lvl-1]; j++)
		dxl[j]=xl[j];
	      /* interpolate dxl to lvl */
	      err=mg_interpolate_t2( mg, lvl-1, lvl, dxl);
	      FUNCTION_FAILURE_HANDLE( err, mg_interpolate_t2,
				       navsto_projector_vel_W_CGC);

	      /* if necessary, compute the scaling for the factor to give
		 min energy for the residual */
	      if ((*K).MGscale[lvl]==0.0)
		{
#ifdef MG_W_DAMPING		  
		  if (lvl==lmax-1)
		    (*K).MGscale[lvl]=0.9;
		  else
		    (*K).MGscale[lvl]=0.7;
#else
		  (*K).MGscale[lvl]=1.0;
#endif
		}

	      alpha=(*K).MGscale[lvl];

	      /* apply the update to xl */
	      for (j=(*ml).nlevl[lvl+1]; j<(*ml).nlevl[lvl]; j++)
		xl[j]+=alpha*dxl[j];
	      
	    } /* end CGC */


		    
	  /* do smooths Gauss-Seidel sweeps forward */
#ifdef USE_SORTED_GS
	  err=sparse_GS_sweep_sorted( &(*K).Fs[lvl], &bi, &invdiagi,
				      &(*mg).sorterl[(*ml).nlevl[lvl+1]],
				      +1, smooths, damping, &xi);
	  FUNCTION_FAILURE_HANDLE( err, sparse_GS_sweep_sorted,
				   navsto_projector_vel_W_CGC);
#else
	  err=sparse_GS_sweep_fwd( &(*K).Fs[lvl], &bi,
				   &invdiagi, smooths, &xi);
	  FUNCTION_FAILURE_HANDLE( err, sparse_GS_sweep_bwd,
				   navsto_projector_vel_W_CGC);
#endif
	} /* end loop W-arms */
    }/* end if coarse grid solver */

  return SUCCESS;
}










/*****************************************************************/
/*****************************************************************/
/*****************************************************************/
/*****************************************************************/
/*****************************************************************/





/*FUNCTION*/
int navsto_precon_p_lapl_bpx(void *notused, struct vector *in,
			     void *arg3, struct vector *out
/* performs BPX preconditioning for the pressure space Laplacian
   subproblems which occure in the Wathen block preconditioner for
   Navier Stokes problems, 
     out = C^{-1}* in
   where C^{-1} is the BPX preconditioner sum(l=level)Q_l*D_l*Q_l^T 
   
   Input:  notused - well, it is not used but in the interface to
                     allow this function to be used as a
                     "preconditioner" 
           in      - input vector
	   arg3=
           K       - navsto_matrix struct holding the boundary
                     projection and preconditioner data 

   Output: out    - (given by reference), P*P^T* in

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  int  err;
  FIDX i, j;
  FIDX vx_nr, pvx_nr, hi_w, hi_nr, fath1, fath2, child, eg_w, eg_nr;
  FIDX level, level_old, lvlmax, hi_end, hi_start; 
  
  struct navsto_matrix *K;
  struct mesh          *m;
  struct multilvl      *ml;
  struct bpxdata       *bpx;
  FIDX                 *pdof;

  K      =  (navsto_matrix*) arg3;
  m      = (*K).msh;
  ml     = (*K).ml1;
  lvlmax = (*ml).lmax;
  bpx    = (*K).bpx;


  pvx_nr = (*K).pvx_nr;
  vx_nr  = (*K).vx_nr;
  pdof   = (*K).pdof;
  eg_nr  = (*m).eg_nr;
  eg_w   = (*m).eg_w;
  hi_nr  = (*m).hi_nr;
  hi_w   = (*m).hi_w;

  if (((*in).len!=(*out).len)||((*in).len!=pvx_nr))
    {
      fprintf(stderr,
	      "navsto_precon_p_lapl_bpx: dimensions don't make sense!\n");
      return FAIL;
    }

  /* the pressure space does not include the highest level of
     hierarchy, but only those below the highest level, and the bpx
     data structure can not use level 0 hierarchy entries, this
     restricts the hierarchy entries which can be used, find the begin
     and end of those which can*/
  i=0;
  while ((i<hi_nr)&&((*m).hier[i*hi_w+MCT2HILVL]<1)) i++;
  hi_start=i;

  i=hi_nr-1;
  while ((i>=hi_start)&&((*m).hier[i*hi_w+MCT2HILVL]>lvlmax-1)) i--;
  hi_end=i;


  if (lvlmax<=0)
    {
      /* no hierarchy, no hierarchical preconditioner,
         copy in to out, eventually apply the coarse grid solver and
         return */ 
      for (i=0; i<(*in).len; i++)
	(*out).V[i]=(*in).V[i];

#ifdef LAPLACIAN_COARSE_GRID_SOLVER
      if (K->cmat_Ap != NULL) 
	{
	  double rhs_mean;

	  /* eliminate the constant part from the rhs */
	  rhs_mean=0.0;
	  for (i=0; i<(*out).len; i++)
	    rhs_mean+=(*out).V[i];
	  rhs_mean/=(*out).len;
	  for (i=0; i<(*out).len; i++)
	    (*out).V[i]-=rhs_mean;

	  err=coarse_mat_solve( K->cmat_Ap, NoTrans, out, out);
	  FUNCTION_FAILURE_HANDLE(err, coarse_mat_solve,
				  navsto_precon_p_lapl_bpx);
	  /* printf("have cmat_Ap but no use\n"); /* */
	}
#endif

      return SUCCESS;
    }
      

  /* copy in to hvec */
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]>=0)
	{
	  child=i;
	  MLVLFINDENTRY( child, child, lvlmax-1, *ml );
	  (*bpx).hvec[child]=(*in).V[pdof[i]];
	}
    }

  /* calculate r_l = Q_l^T r = [I P_l^T] r_(l+1), r_lmax=r for all
     levels l, r_l is stored in hvec[ nlevl[l+1]...nlevl[l]-1 ], */
  level_old=lvlmax;
  for (i=hi_end; i>=hi_start; i--)
    {
      level=(*m).hier[i*hi_w+MCT2HILVL];
      if (level<=0) printf("error, level=%d\n", (int) level);
      if (level_old!=level)
	{
	  /* initialise the new (coarser) level */
	  for (j=(*ml).nlevl[level+1]; j<(*ml).nlevl[level]; j++)
	    {
	      /* get the entry number of the coarser level */
	      fath2 = j-(*ml).nlevl[level+1];
	      if (fath2>=(*ml).nlevl[level-1]-(*ml).nlevl[level]) fath2=-1;
	      if (fath2!=-1)
		{
		  /* the I (identity) part of the multiplication,
		     r_l = r_(l+1) */
		  (*bpx).hvec[fath2+(*ml).nlevl[level]]=(*bpx).hvec[j];
		}
	    }
	  level_old=level;
	}

      child=(*m).hier[i*hi_w+MCT2HICHLD];
      fath1=(*m).hier[i*hi_w+MCT2HIFAT1  ];
      fath2=(*m).hier[i*hi_w+MCT2HIFAT1+1];      

      MLVLFINDENTRY(fath1, fath1, level-1, *ml);
      MLVLFINDENTRY(fath2, fath2, level-1, *ml);
      MLVLFINDENTRY(child, child, level,   *ml);

      /* the P_l^T part */
      (*bpx).hvec[fath1] += 0.5*(*bpx).hvec[child];
      (*bpx).hvec[fath2] += 0.5*(*bpx).hvec[child];
    }


#ifdef LAPLACIAN_COARSE_GRID_SOLVER
  if (K->cmat_Ap != NULL) 
    {
      int err;
      FIDX dof;
      struct vector rhs_x;
      double rhs_mean;

      /* move the pressure coarse grid vector into the right
	 representation */
      err=vector_alloc( &rhs_x, K->cmat_Ap_pvxnr);
      FUNCTION_FAILURE_HANDLE(err, vector_alloc, navsto_precon_p_lapl_bpx);

      for (i=0; i< K->cmat_Ap_vxnr; i++)
	{
	  if ( K->cmat_Ap_pdof[i] >=0)
	    {
	      dof=i;
	      MLVLFINDENTRY(dof, dof, 0, *ml);
	      
	      rhs_x.V[ K->cmat_Ap_pdof[i] ]= (*bpx).hvec[dof];
	    }
	}

      /* eliminate the constant part from the rhs */
      rhs_mean=0.0;
      for (i=0; i<rhs_x.len; i++)
	rhs_mean+=rhs_x.V[i];
      rhs_mean/=rhs_x.len;
      for (i=0; i<rhs_x.len; i++)
	rhs_x.V[i]-=rhs_mean;
      rhs_x.V[0]=0.0;

      /* coarse grid solve */
      err=coarse_mat_solve( K->cmat_Ap, NoTrans, &rhs_x, &rhs_x);
      FUNCTION_FAILURE_HANDLE(err, coarse_mat_solve,
			      navsto_precon_p_lapl_bpx);

      /* eliminate the constant part from the solution */
      rhs_mean=0.0;
      for (i=0; i<rhs_x.len; i++)
	rhs_mean+=rhs_x.V[i];
      rhs_mean/=rhs_x.len;
      for (i=0; i<rhs_x.len; i++)
	rhs_x.V[i]-=rhs_mean;

      /* write the solution to w_0 */
      for (i=0; i< K->cmat_Ap_vxnr; i++)
	{
	  if ( K->cmat_Ap_pdof[i] >=0)
	    {
	      dof=i;
	      MLVLFINDENTRY(dof, dof, 0, *ml);
	      
	      (*bpx).hvec[dof] = rhs_x.V[ K->cmat_Ap_pdof[i] ];
	    }
	}

      vector_free( &rhs_x );
    }
#endif

  /* calculate w_l = [I ; P_l] w_(l-1) + D_l r_l, w_0=K_0^(-1) r_0 */
  /* initialise the coarsest level, 
     nothing has to be done in 2d as D_0 = I */

  /* calculate w_l = [I ; P_l] w_(l-1) + D_l r_l, */
  level_old=0; 
  for (i=hi_start; i<=hi_end; i++)
    {
      level=(*m).hier[i*hi_w+MCT2HILVL];

      if (level_old!=level)
	{
	  /* initialise the new (finer) level */
	  for (j=(*ml).nlevl[level+1]; j<(*ml).nlevl[level]; j++)
	    {
	      /* get the entry number of the coarser level */
	      fath2 = j-(*ml).nlevl[level+1];
	      if (fath2>=(*ml).nlevl[level-1]-(*ml).nlevl[level]) fath2=-1;
	      if (fath2!=-1)
		{
		  /* the I (identity) part of the multiplication,
		     w_l = w_(l-1) + D_l r_l,
		     D_l=I in 2d, r_l=hvec[j], so just add w_(l-1)
		  */
		  (*bpx).hvec[j]+=(*bpx).hvec[fath2+(*ml).nlevl[level]];
		}
	    }
	  level_old=level;
	}
      
      child=(*m).hier[i*hi_w+MCT2HICHLD];
      fath1=(*m).hier[i*hi_w+MCT2HIFAT1  ];
      fath2=(*m).hier[i*hi_w+MCT2HIFAT1+1];      
      
      MLVLFINDENTRY(fath1, fath1, level-1, *ml);
      MLVLFINDENTRY(fath2, fath2, level-1, *ml);
      MLVLFINDENTRY(child, child, level,   *ml);
      
      /* the P_(l-1) part */
      (*bpx).hvec[child]+=0.5*( (*bpx).hvec[fath1] + (*bpx).hvec[fath2] );
    }

  /* copy to out */
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]>=0)
	{
	  child=i;
	  MLVLFINDENTRY( child, child, lvlmax-1, *ml );
	  (*out).V[pdof[i]]=(*bpx).hvec[child];
	}
    }

  /* done */

  return SUCCESS;
}










/*FUNCTION*/
int navsto_mg_SDFEM_precon(void *arg1, struct vector *in,
			   void *notused,
			   struct vector *out
/* performs preconditioning and the boudary condition projection for
   the linearizations of Navier-Stokes problems, 
     out = P*C^-1*P^T * in
   where P projects the velocity components of boundary nodes to zero,
   (such that addition of a projected vector doesn't change the
   velocity there) and projects the pressure such that it has zero
   mean 

   C^-1 is a W-multigrid cycle 

   Input:  arg1=
           K       - navsto_matrix struct holding the boundary
                     projection and preconditioner data 
	   in      - input vector
	   notused - well, it is not used but in the interface to
                     allow this function to be used as a
                     preconditioner

   Output: out    - (given by reference), P*C^-1*P^T* in

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  int  err;
  FIDX dim, lvl, vx_nr, pvx_nr;
  struct vector rhs, x, res;
  struct navsto_matrix *K;
  struct mgdata        *mg, *mg1;

  K      =  (navsto_matrix*) arg1;
  mg     = (*K).mg;
  mg1    = (*K).mg1;

  lvl    = (*K).lvl;
  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr;
  pvx_nr = (*K).pvx_nr;

  if (((*in).len!=(*out).len)||((*in).len!=dim*vx_nr+pvx_nr))
    {
      fprintf(stderr,
	      "navsto_mg_SDFEM_precon: dimensions don't make sense!\n");
      return FAIL;
    }

  err=vector_alloc( &rhs, (*in).len);
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_mg_SDFEM_precon);
  err=vector_alloc( &x, (*in).len);
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_mg_SDFEM_precon);
  err=vector_alloc( &res, (*in).len);
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_mg_SDFEM_precon);

  /* test interpolation operator */
  /* if (lvl>0)
     {
     int i, bigNl, bigNL;
     bigNL=dim*vx_nr+pvx_nr;
     bigNl=dim*(*K).vx_nr_lvl[lvl-1]+(*K).pvx_nr_lvl[lvl-1];
     x.len=bigNl;
     for (i=0; i<bigNl; i++)
     x.V[i]=1.0;
     err=navsto_mg_push_vec_SDFEM( &x, K->mg->dxl, K->mg1->dxl, lvl-1, K);
     FUNCTION_FAILURE_HANDLE( err, navsto_mg_push_vec_SDFEM, test);
     err=mg_interpolate_t2(K->mg,lvl-1,lvl,K->mg->dxl);
     FUNCTION_FAILURE_HANDLE(err,mg_interpolate_t2, test);
     err=mg_interpolate_t1_t2(K->mg1,lvl-1,lvl,
     K->mg1->dxl);
     FUNCTION_FAILURE_HANDLE(err,mg_interpolate_t1_t2, test);
     err=navsto_mg_pull_vec_SDFEM( &res, K->mg->dxl, K->mg1->dxl, lvl, K);
     FUNCTION_FAILURE_HANDLE( err, navsto_mg_pull_vec_SDFEM, test);
     printf("begin test\n");
     for (i=0; i<bigNL; i++)
     printf("  ones[%3d]=%e\n",i,res.V[i]);
     printf("end   test\n");

     err=navsto_mg_push_vec_SDFEM( &res, K->mg->xl, K->mg1->xl, lvl, K);
     FUNCTION_FAILURE_HANDLE( err, navsto_mg_push_vec_SDFEM,test);
     err=mg_restrict_t2(K->mg,lvl,lvl-1,K->mg->xl);
     FUNCTION_FAILURE_HANDLE(err,mg_restrict_t2, test);
     err=mg_restrict_t1_t2(K->mg1,lvl,lvl-1,K->mg1->xl);
     FUNCTION_FAILURE_HANDLE(err,mg_restrict_t1_t2,test);
     err=navsto_mg_pull_vec_SDFEM( &x, K->mg->xl, K->mg1->xl, lvl-1, K);
     FUNCTION_FAILURE_HANDLE( err, navsto_mg_pull_vec_SDFEM, test);
     printf("begin test\n");
     for (i=0; i<bigNl; i++)
     printf("  twos[%3d]=%e\n",i,x.V[i]);
     printf("end   test\n");

     x.len=bigNL;
     exit(1);
     }/* */

  /* apply P*C^{-1}*P^T */ 
  err=navsto_mg_push_vec_SDFEM(in, mg->bl, mg1->bl, lvl, K);
  FUNCTION_FAILURE_HANDLE( err, navsto_mg_push_vec_SDFEM,
			   navsto_mg_SDFEM_precon);

  err=navsto_mg_Wcycle_SDFEM(K, &rhs, &x, &res, lvl);
  FUNCTION_FAILURE_HANDLE( err, navsto_mg_Wcycle_SDFEM,
			   navsto_mg_SDFEM_precon);

  err=navsto_mg_pull_vec_SDFEM(out, mg->dxl, mg1->dxl, lvl, K);
  FUNCTION_FAILURE_HANDLE( err, navsto_mg_pull_vec_SDFEM,
			   navsto_mg_SDFEM_precon);

  /* // test the smoother only 
     {
     FIDX i,k, bigN;
     bigN=x.len;
     for (i=0; i<bigN; i++)
     rhs.V[i]=(*in).V[i];
     for (i=0; i<bigN; i++)
     x.V[i]= 0.0;
     err=navsto_mg_projector_trans_SDFEM( &rhs, lvl, K);
     FUNCTION_FAILURE_HANDLE( err, navsto_mg_projector_trans_SDFEM,
     test);
     for (k=0; k<15; k++)
     {
     err=navsto_mg_smoother_SDFEM( &x, &rhs, -1,  lvl, K);
     FUNCTION_FAILURE_HANDLE( err, navsto_mg_smoother_SDFEM,
     test);
     err=navsto_mg_projector_SDFEM( &x, lvl, K);
     FUNCTION_FAILURE_HANDLE( err, navsto_mg_projector_trans_SDFEM,
     test); 
     err=navsto_mg_smoother_SDFEM( &x, &rhs, +1,  lvl, K);
     FUNCTION_FAILURE_HANDLE( err, navsto_mg_smoother_SDFEM,
     test);
     err=navsto_mg_projector_SDFEM( &x, lvl, K);
     FUNCTION_FAILURE_HANDLE( err, navsto_mg_projector_trans_SDFEM,
     test); 
     {
     FIDX i;
     double normres=0.0, normx=0.0;
     err=sparse_mul_mat_vec( &(*K).Fs[lvl], &x, &res);
     FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec, 
     test);
     for (i=0; i<bigN; i++)
     res.V[i] = rhs.V[i] - res.V[i];
     for (i=0; i<bigN; i++)
     normres+= res.V[i] * res.V[i];
     for (i=0; i<bigN; i++)
     normx+= x.V[i] * x.V[i];
     printf(" smooth  k=%d   norm(r)=%e  norm(x)=%e\n",
     k,sqrt(normres),sqrt(normx));
     }
     }
     err=navsto_mg_projector_SDFEM( &x, lvl, K);
     FUNCTION_FAILURE_HANDLE( err, navsto_mg_projector_trans_SDFEM,
     test);
     for (i=0; i<bigN; i++)
     (*out).V[i]= x.V[i];
     }*/
 

  /* done */
  vector_free( &res );
  vector_free( &x );
  vector_free( &rhs );

  return SUCCESS;
}

/*FUNCTION*/
int navsto_mg_Wcycle_SDFEM(struct navsto_matrix *K, struct vector *bh,
			   struct vector *xh, struct vector *rh, FIDX lvl
/* performs a W-multigrid cycle to approximate the solution of 
     K*xl = bl
   bl and xl are stored in mg and mg1: mg->velocities, mg1->pressure,
   read and write of these vectors should be done with
   navsto_mg_pull_vec_SDFEM and navsto_mg_push_vec_SDFEM respectively

   the W-cycle is performed by recursively
      1. setting xl=0
      2. smoothing 
      3. if lvl>0
           a. restrict
           b. W-cycle on lvl-1
           c. interpolate
      4. smoothing
      5. define resulting xl, store in dxl.

   Input:  lvl     - the current level

   In/Out: K       - navsto_matrix struct,
                     the help vectors in mg and mg1 are modified,
                     otherwise only input
           bh, xh, rh
                   - help vectors used for interaction with the matrix
                     on th current level

   Output: out    - (given by reference), P*C^-1*P^T* in

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
			   ){
  int  err;
  FIDX i, cgc;
  FIDX dim, vx_nr, pvx_nr, bigN;
  FIDX bh_len, xh_len, rh_len;

  int number_cgcs=2;

  struct mgdata        *mg, *mg1;

  mg     = (*K).mg;
  mg1    = (*K).mg1;

  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr_lvl[lvl];
  pvx_nr = (*K).pvx_nr_lvl[lvl];

  bigN   = dim * vx_nr + pvx_nr;

  /* save old lengths */
  bh_len=bh->len; xh_len=xh->len; rh_len=rh->len;

  if ( ( (bh_len<bigN)||(xh_len<bigN) )||(rh_len<bigN) )
    {
      fprintf(stderr,"navsto_mg_Wcycle_SDFEM: help vectors to small\n");
      return FAIL;
    }

  /* set the help vector lengts to the current level needs */
  bh->len = bigN; xh->len = bigN; rh->len = bigN;

  /* set xl=0 */
  for (i=0; i<bigN; i++)
    (*xh).V[i]=0.0;

  /* get bl */
  err=navsto_mg_pull_vec_SDFEM( bh, mg->bl, mg1->bl, lvl, K);
  FUNCTION_FAILURE_HANDLE( err, navsto_mg_pull_vec_SDFEM,
			   navsto_mg_Wcycle_SDFEM);

  /* do the projection P^T */
  err=navsto_mg_projector_trans_SDFEM( bh, lvl, K);
  FUNCTION_FAILURE_HANDLE( err, navsto_mg_projector_trans_SDFEM,
			   navsto_mg_Wcycle_SDFEM);

  /* push the projected residual */
  err=navsto_mg_push_vec_SDFEM( bh, mg->bl, mg1->bl, lvl, K);
  FUNCTION_FAILURE_HANDLE( err, navsto_mg_push_vec_SDFEM,
			   navsto_mg_Wcycle_SDFEM);

  /* #define DEBUG_MG */
#ifdef DEBUG_MG  
  {
    FIDX i;
    double normres=0.0;
    for (i=0; i<bigN; i++)
      normres+= (*bh).V[i] * (*bh).V[i];
    printf("W-cycle in   lvl=%d   norm(b)=%e\n",(int) lvl,sqrt(normres));
  }
#endif

  if (lvl>0)
    for (cgc=0; cgc<number_cgcs; cgc++)
    {
      /* apply the smoother backwards */
      err=navsto_mg_smoother_SDFEM( xh, bh, -1,  lvl, K);
      FUNCTION_FAILURE_HANDLE( err, navsto_mg_smoother_SDFEM,
			       navsto_mg_Wcycle_SDFEM);

#ifdef DEBUG_MG  
      {
	FIDX i;
	double normres=0.0;

	/* compute residual */
	/* rl=K*xl */
	err=sparse_mul_mat_vec( &(*K).Fs[lvl], xh, rh);
	FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec, 
				 navsto_mg_Wcycle_SDFEM);
	/* rl = bl - K*xl */
	for (i=0; i<bigN; i++)
	  (*rh).V[i] = (*bh).V[i] - (*rh).V[i];
	/* do the projection P^T */
	err=navsto_mg_projector_trans_SDFEM( rh, lvl, K);
	FUNCTION_FAILURE_HANDLE( err, navsto_mg_projector_trans_SDFEM,
				 navsto_mg_Wcycle_SDFEM);
	for (i=0; i<bigN; i++)
	  normres+= (*rh).V[i] * (*rh).V[i];

	printf(" pre smooth  lvl=%d   norm(r)=%e\n",(int) lvl,sqrt(normres));
      }
#endif

      /* if coarser grid exists, do coarse grid correction */
      if (lvl>0)
	{
	  /* compute residual */
	  /* rl=K*xl */
	  err=sparse_mul_mat_vec( &(*K).Fs[lvl], xh, rh);
	  FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec, 
				   navsto_mg_Wcycle_SDFEM);
	  /* rl = bl - K*xl */
	  for (i=0; i<bigN; i++)
	    (*rh).V[i] = (*bh).V[i] - (*rh).V[i];

	  /* save rl, xl in the multilevel vectors */
	  err=navsto_mg_push_vec_SDFEM( rh, mg->dxl, mg1->dxl, lvl, K);
	  FUNCTION_FAILURE_HANDLE( err, navsto_mg_push_vec_SDFEM,
				   navsto_mg_Wcycle_SDFEM);

	  err=navsto_mg_push_vec_SDFEM( xh, mg->xl, mg1->xl, lvl, K);
	  FUNCTION_FAILURE_HANDLE( err, navsto_mg_push_vec_SDFEM,
				   navsto_mg_Wcycle_SDFEM);

	  /* restrict rl (in dxl) */
	  /* velocities */
	  err=mg_restrict_t2( mg, lvl,lvl-1,mg->dxl);
	  FUNCTION_FAILURE_HANDLE( err, mg_restrict_t2,
				   navsto_mg_Wcycle_SDFEM);
	  /* pressure */
	  err=mg_restrict_t1_t2( mg1, lvl, lvl-1,
				 mg1->dxl);
	  FUNCTION_FAILURE_HANDLE( err, mg_restrict_t1_t2,
				   navsto_mg_Wcycle_SDFEM);
  
	  /* copy rl-1(in dxl-1) to bl-1 */
	  err=navsto_mg_copy_vec_SDFEM( mg->dxl,mg1->dxl, mg->bl,mg1->bl,
					lvl-1, K );
	  FUNCTION_FAILURE_HANDLE( err, navsto_mg_copy_vec_SDFEM,
				   navsto_mg_Wcycle_SDFEM);

	  /* do the coarse grid correction */
	  err=navsto_mg_Wcycle_SDFEM( K, bh, xh, rh, lvl-1);
	  FUNCTION_FAILURE_HANDLE( err, navsto_mg_Wcycle_SDFEM,
				   navsto_mg_Wcycle_SDFEM);

	  /* the result is in dxl-1 */
	  /* interpolate dxl-1 */
	  /* velocities */
	  err=mg_interpolate_t2(mg,lvl-1,lvl,mg->dxl);
	  FUNCTION_FAILURE_HANDLE(err,mg_interpolate_t2,
				  navsto_mg_Wcycle_SDFEM);
	  /* pressure */
	  err=mg_interpolate_t1_t2(mg1,lvl-1,lvl,mg1->dxl);
	  FUNCTION_FAILURE_HANDLE(err,mg_interpolate_t1_t2,
				  navsto_mg_Wcycle_SDFEM);

	  /* copy bl, to bh */
	  err=navsto_mg_pull_vec_SDFEM( bh, mg->bl, mg1->bl, lvl, K);
	  FUNCTION_FAILURE_HANDLE( err, navsto_mg_pull_vec_SDFEM,
				   navsto_mg_Wcycle_SDFEM);
	  /* copy xl, to xh */
	  err=navsto_mg_pull_vec_SDFEM( xh, mg->xl, mg1->xl, lvl, K);
	  FUNCTION_FAILURE_HANDLE( err, navsto_mg_pull_vec_SDFEM,
				   navsto_mg_Wcycle_SDFEM);
	  /* copy dxl, to rh */
	  err=navsto_mg_pull_vec_SDFEM( rh, mg->dxl, mg1->dxl, lvl, K);
	  FUNCTION_FAILURE_HANDLE( err, navsto_mg_pull_vec_SDFEM,
				   navsto_mg_Wcycle_SDFEM);

	  /* so the coarse grid correction is in rh, add it to the
	     current solution in xh */
	  for (i=0; i<bigN; i++)
	    (*xh).V[i] += (*rh).V[i];
	}/* end if coarser grid exists, do coarse grid correction */

      /* apply the smoother forwards */
      err=navsto_mg_smoother_SDFEM( xh, bh, +1,  lvl, K);
      FUNCTION_FAILURE_HANDLE( err, navsto_mg_smoother_SDFEM,
			       navsto_mg_Wcycle_SDFEM);

#ifdef DEBUG_MG  
      {
	FIDX i;
	double normres=0.0;

	/* compute residual */
	/* rl=K*xl */
	err=sparse_mul_mat_vec( &(*K).Fs[lvl], xh, rh);
	FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec, 
				 navsto_mg_Wcycle_SDFEM);
	/* rl = bl - K*xl */
	for (i=0; i<bigN; i++)
	  (*rh).V[i] = (*bh).V[i] - (*rh).V[i];
	/* do the projection P^T */
	err=navsto_mg_projector_trans_SDFEM( rh, lvl, K);
	FUNCTION_FAILURE_HANDLE( err, navsto_mg_projector_trans_SDFEM,
				 navsto_mg_Wcycle_SDFEM);
	for (i=0; i<bigN; i++)
	  normres+= (*rh).V[i] * (*rh).V[i];

	printf(" post smooth lvl=%d   norm(r)=%e\n",(int) lvl,sqrt(normres));
      }
#endif

    } /* one V of the W-cycle is now complete, do the next one ? */
  else /* is coarsest level, use coarse grid solver */
    {
      if ( (*K).cmat.nr!=0 )
	{
	  err=coarse_mat_solve( &(*K).cmat, NoTrans,
				bh, xh);
	  FUNCTION_FAILURE_HANDLE( err, coarse_mat_solve,
				   navsto_mg_Wcycle_SDFEM );
	}
      else
	{
	  /* just copy */
	  for (i=0; i<bigN; i++)
	    (*xh).V[i] = (*bh).V[i]; 
	}
    }

  /* do the projection P */
  err=navsto_mg_projector_SDFEM( xh, lvl, K);
  FUNCTION_FAILURE_HANDLE( err, navsto_mg_projector_SDFEM,
			   navsto_mg_Wcycle_SDFEM);

  /* W-cycle is complete, store the resulting xl in the multilevel
     vectors, part dxl */
  err=navsto_mg_push_vec_SDFEM( xh, mg->dxl, mg1->dxl, lvl, K);
  FUNCTION_FAILURE_HANDLE( err, navsto_mg_push_vec_SDFEM,
			   navsto_mg_Wcycle_SDFEM);

#ifdef DEBUG_MG  
  {
    FIDX i;
    double normres=0.0;

    /* compute residual */
    /* rl=K*xl */
    err=sparse_mul_mat_vec( &(*K).Fs[lvl], xh, rh);
    FUNCTION_FAILURE_HANDLE( err, sparse_mul_mat_vec, 
			     navsto_mg_Wcycle_SDFEM);
    /* rl = bl - K*xl */
    for (i=0; i<bigN; i++)
      (*rh).V[i] = (*bh).V[i] - (*rh).V[i];
    /* do the projection P^T */
    err=navsto_mg_projector_trans_SDFEM( rh, lvl, K);
    FUNCTION_FAILURE_HANDLE( err, navsto_mg_projector_trans_SDFEM,
			   navsto_mg_Wcycle_SDFEM);
    for (i=0; i<bigN; i++)
      normres+= (*rh).V[i] * (*rh).V[i];

    printf("W-cycle out  lvl=%d   norm(r)=%e\n",(int) lvl,sqrt(normres));
  }
#endif

  /* set the help vector lengts to their original values */
  bh->len = bh_len; xh->len = xh_len; rh->len = rh_len;

  return SUCCESS;
}


/*FUNCTION*/
int navsto_mg_smoother_SDFEM(struct vector *x, struct vector *rhs,
			     int dir, FIDX lvl, struct navsto_matrix *K
/* performes a smoothing sweep using the box smoother (aka Vanka
   smoother), used in multigrid schemes to solve
     K*x = rhs

   The box smoother comprises of solving local equation systems
   corresponding to small patches of the mesh in a Gauss-Seidel like
   manner. (It is a generalisation of the Gauss-Seidel scheme to
   coupled equations.)

   Our patches are defined by the support of the pressure basis
   functions (linear). On each such patch we solve a local dirichlet
   problem, adjusting only those velocity dofs which are not on the
   boundary of the patch and the pressure dof that defines the patch.
   
   Input:  rhs     - the right hand side vector
           dir     - direction of the sweep,
	             dir=+1 -> forward, dir=-1 -> backward,
	   lvl     - level of the mesh that the given vectors
	             correspond to
           K       - navsto_matrix struct, holds the necessary data

   In/Out: x       - the solution vector, it is updated according to
                     the smoothing scheme
           
   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
			     ){
  int  err;
  FIDX i, j, d, thep, thei;
  FIDX dim, vx_nr, pvx_nr, ndofs, LDKloc;
  FIDX dim_vx_nr, dim_vx_nr_p_pvx_nr, pdof_sorter_n;
  FIDX *pdof, *ldofs, *pdof_sorter;
  int *ipiv;
  double *vel_weight;
  struct ilist *this_;

  struct vector lrhsv; /* Local rhs, after solving, the local
			  solutiuon update */
  double *Kloc;        /* local system matrix */
  double damping;      /* damping parameter for the smoother */


  FIDX begin, end, incr;

  struct sparse *Klvl;
  struct ilist** pdof_vels;

#ifndef BOXSMOOTHER_ELEMBASED
  /* p-node support based smoothing */
    /* damping = 0.1;       /* good for pressure node support dirichlet
       problem based smoothing */
  /* damping = 0.5;/*1.0;       /* good for pressure node support dirichlet
     problem based smoothing, sorted nodes */
  damping = BOXSMOOTHER_DAMPING;
#else
  FIDX *elem=K->elem_lvl[lvl];
  FIDX el_nr=K->el_nr_lvl[lvl];
  FIDX el_w =K->msh->el_w;
  FIDX *bc_marker=K->bc_marker_lvl[lvl];
  FIDX elem_sorter_n=K->elem_sorter_n[lvl];
  FIDX *elem_sorter=K->elem_sorter[lvl];

  /* damping = 0.45;       /* for element dofs based smoothing, unsorted */
  /* damping = 0.7;       /* for element dofs based smoothing, sorted */
  damping = BOXSMOOTHER_DAMPING;
#endif

  Klvl = &(*K).Fs[lvl];

  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr_lvl[lvl];

  pvx_nr        = (*K).pvx_nr_lvl[lvl];
  pdof          = (*K).pdof_lvl[lvl];
  pdof_vels     = (*K).pdof_vels[lvl];
  pdof_sorter_n = K->pdof_sorter_n[lvl];
  pdof_sorter   = K->pdof_sorter[lvl];
  vel_weight    = K->vel_weight[lvl];
  
  dim_vx_nr          = dim * vx_nr;
  dim_vx_nr_p_pvx_nr = dim_vx_nr + pvx_nr;

  if ( (( (*rhs).len != dim_vx_nr_p_pvx_nr )
	|| ( (*x).len != dim_vx_nr_p_pvx_nr ) )
       || ( (*Klvl).row_nr != dim_vx_nr_p_pvx_nr ) )
    {
      fprintf(stderr, "navsto_mg_smoother_SDFEM: "
	      "dimensions don't make sense!\n");
      return FAIL;
    }

#ifndef BOXSMOOTHER_ELEMBASED
  /* p-node support based smoothing */
  LDKloc = (*K).max_n_pdof_vels*dim+1;
#else
  /* element based smoothing */
  LDKloc = 6*dim+3;
#endif

  /* allocate local memory */
  err=vector_alloc( &lrhsv, LDKloc);
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, navsto_mg_smoother_SDFEM);

  TRY_MALLOC( ldofs, LDKloc, FIDX, navsto_mg_smoother_SDFEM);
  TRY_MALLOC( ipiv,  LDKloc, int, navsto_mg_smoother_SDFEM);
  TRY_MALLOC( Kloc,  LDKloc*LDKloc, double, navsto_mg_smoother_SDFEM);
  

  if (dir==+1)
    {
      begin = 0;
#ifndef BOXSMOOTHER_ELEMBASED
      /* p-node support based smoothing */
      end   = pdof_sorter_n;
#else
      /* element based smoothing */
      end   = elem_sorter_n;
#endif
      incr  = +1;
    }
  else if (dir==-1)
    {
#ifndef BOXSMOOTHER_ELEMBASED
      /* p-node support based smoothing */
      begin = pdof_sorter_n-1;
#else
      /* element based smoothing */
      begin = elem_sorter_n-1;
#endif
      end   = -1;
      incr  = -1;
    }
  else
    {
      fprintf(stderr, "navsto_mg_smoother_SDFEM: "
	      "unknown direction dir=%d\n",dir);
      return FAIL;
    }

  /* the loop over all pressure dofs in direction dir */
  for (thei=begin; thei!=end; thei+=incr)
    {
      /* first collect the dofs for this element */
#ifndef BOXSMOOTHER_ELEMBASED
      /* p-node support based smoothing */
      thep = pdof_sorter[thei];
      ndofs=0;
      for (d=0; d<dim; d++)
	{
	  /* velocities */
	  this_= pdof_vels[thep];
	  while (this_!=NULL)
	    {
	      ldofs[ndofs] = d*vx_nr + this_->data;
	      ndofs++;
	      this_ = this_->next;

#ifdef DEBUGFEINS
	      if (ndofs >= LDKloc) /* >= as we add the pressure dof */
		{
		  fprintf(stderr,"navsto_mg_smoother_SDFEM: "
			  "max_n_ldof_vels must be wrong!\n");
		  return FAIL;
		}
#endif
	    }
	}
      /* pressure */
      ldofs[ndofs] = dim*vx_nr + thep;
      ndofs++;

#else
      /* element based smoothing */
      thep = elem_sorter[thei];
      ndofs=0;
      for (d=0; d<dim; d++)
	for (i=0; i<6; i++)
	  {
	    FIDX node=elem[thep*el_w+MCT2ELNOD1+i];
	    if (bc_marker[node])
	      {
		ldofs[ndofs]=d*vx_nr+node;
		ndofs++;
	      }
	  }
      for (i=0; i<3; i++)
	{
	  ldofs[ndofs]=dim*vx_nr+pdof[elem[thep*el_w+MCT2ELNOD1+i]];
	  ndofs++;
	}
#endif

      /* set the size of the vector to the expected value */
      lrhsv.len=ndofs;

      /* evaluate the local residual */
      /* lrhs= K*x -b */
      err=sparse_row_tim_vec( Klvl, ndofs, ldofs, x, &lrhsv);
      FUNCTION_FAILURE_HANDLE( err, sparse_row_tim_vec,
			       navsto_mg_smoother_SDFEM);
      for (i=0; i<ndofs; i++)
	lrhsv.V[i] -= (*rhs).V[ldofs[i]];

      /* extract the local matrix Kloc */
#ifdef BOXSMOOTHER_DIAG_ONLY_KLOC
      /* use only the diagonal of the velocity block of the local
	 matrix,
	 this implementation is inefficient, as the diagonal structure
	 is not exploited, but it suffices to see if this could be a
	 good smoother */
      /* set all zero */
      for (i=0; i<ndofs; i++)
	for (j=0; j<ndofs; j++)
	  {
	    Kloc[j*LDKloc+i]= 0.0;
	  }
      for (i=0; i<ndofs; i++)
	{
	  double *entry;
	  int j;
	    
	  /* diag block */
	  err=sparse_get_entry( Klvl, ldofs[i], ldofs[i], 0, &entry);
	  if (err==SUCCESS)
	    Kloc[i*LDKloc+i]= (*entry);

	  /* B blocks */
	  j=ndofs-1;
	  err=sparse_get_entry( Klvl, ldofs[j], ldofs[i], 0, &entry);
	  if (err==SUCCESS)
	    Kloc[i*LDKloc+j]= (*entry); 

	  err=sparse_get_entry( Klvl, ldofs[i], ldofs[j], 0, &entry);
	  if (err==SUCCESS)
	    Kloc[j*LDKloc+i]= (*entry);

	  }
#else
      /* use the whole local matrix */
      for (i=0; i<ndofs; i++)
	for (j=0; j<ndofs; j++)
	  {
	    double *entry;
	    
	    err=sparse_get_entry( Klvl, ldofs[i], ldofs[j], 0, &entry);

	    if (err==SUCCESS)
	      Kloc[j*LDKloc+i]= (*entry);
	    else
	      Kloc[j*LDKloc+i]= 0.0;
	  }
#endif

      /* apply the weighting on the velocity dofs of lrhs */
      for (i=0; i<ndofs; i++)
	{
	  if (ldofs[i] < dim*vx_nr)
	    {
	      FIDX j;
	      FIDX node = ldofs[i]%vx_nr;
	      double inc_wght=1.0/vel_weight[node];
	      
	      lrhsv.V[i] *= vel_weight[node];
	      /* scale the B blocks of the local problem */
	      /* for (j=0; j<ndofs-1; j++)
		 {
		 Kloc[j*LDKloc+i] *= inc_wght;
		 Kloc[i*LDKloc+j] *= inc_wght;
		 } */
	    }
	}
      /* delete C block */
      /* Kloc[(ndofs-1)*LDKloc+ndofs-1] = 0.0; */
      
      /* solve the local system */
      {
	int info;
	int one=1, neq=ndofs, LDA=LDKloc;
	
	/* DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
	dgesv_(&neq, &one, Kloc, &LDA, ipiv, lrhsv.V, &neq, &info);

	if (info!=0)
	  {
	    fprintf(stderr, "navsto_mg_smoother_SDFEM: "
		    "LAPACK routine dgesv returned with info=%d\n",
		    info);
	    return FAIL;
	  }
      }
      /* local solution update is now in lrhsv */
      if (ndofs==1) printf("me has ndofs==1: thep=%d\n", (int) thep);

      /* apply the weighting on the velocity dofs of lrhs */
      for (i=0; i<ndofs; i++)
	{
	  if (ldofs[i] < dim*vx_nr)
	    {
	      FIDX node = ldofs[i]%vx_nr;
	      lrhsv.V[i] *= vel_weight[node];
	    }
	}


      /* update the solution vector */
      for (i=0; i<ndofs; i++)
	(*x).V[ldofs[i]] -= damping*lrhsv.V[i];
    } /* end loop thep over pdofs */


  /* free local memory */
  free(Kloc);
  free(ipiv);
  free(ldofs);
  vector_free( &lrhsv);

  return SUCCESS;
}







/*FUNCTION*/
int navsto_mg_push_vec_SDFEM(struct vector *vec, double *vml, double *pml,
			     FIDX lvl, struct navsto_matrix *K
/* copies the vector vec into the multilevel vectors, seperated in the
   velocity (vml) and pressure parts (pml)

   Input:  vec     - vector to be copied
           lvl     - level of the mesh that the given vector
	             corresponds to
           K       - navsto_matrix struct, holds the necessary data

   Output: vml     - in level lvl part, the velocities from vec
           pml     - in level lvl part, the pressure from vec

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
			     ){
  FIDX i, dim, vx_nr, pvx_nr;
  FIDX dim_vx_nr, dim_vx_nr_p_pvx_nr;
  FIDX *pdof;

  struct multilvl *ml1, *mld;

  ml1= (*K).ml1;
  mld= (*K).mld;

  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr_lvl[lvl];

  pvx_nr = (*K).pvx_nr_lvl[lvl];
  pdof   = (*K).pdof_lvl[lvl];

  dim_vx_nr          = dim * vx_nr;
  dim_vx_nr_p_pvx_nr = dim_vx_nr + pvx_nr;

  if ( (( (*vec).len != dim_vx_nr_p_pvx_nr )
	|| ( vx_nr != mld->levlvx[lvl] ) )
       || ( ( dim != mld->dim ) || (1 != ml1->dim))
       || ( vx_nr != ml1->levlvx[lvl] ) )
    {
      fprintf(stderr, "navsto_mg_push_vec_SDFEM: "
	      "dimensions don't make sense!\n");

      return FAIL;
    }

  /* velocity components */
  for (i=0; i<dim_vx_nr; i++)
    {
      vml[mld->nlevl[lvl+1] + i] = (*vec).V[i] ;
    }
  /* pressure components */
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]!=-1)
	{
	  pml[ml1->nlevl[lvl+1] + i] = (*vec).V[ dim_vx_nr + pdof[i] ] ;
	}
      else
	{
	  pml[ml1->nlevl[lvl+1] + i] = 0.0;
	}
    }

  return SUCCESS;
}

/*FUNCTION*/
int navsto_mg_pull_vec_SDFEM(struct vector *vec, double *vml, double *pml,
			     FIDX lvl, struct navsto_matrix *K
/* copies a vector stored in multilevel vectors, seperated in the
   velocity (vml) and pressure parts (pml), into the vector vec

   Input:  vml     - in level lvl part, the velocities for vec
           pml     - in level lvl part, the pressure for vec
	   lvl     - level of the mesh that the given vector
	             corresponds to
           K       - navsto_matrix struct, holds the necessary data

   Output: vec     - vector to be copied into
           
   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
			     ){
  FIDX i, dim, vx_nr, pvx_nr;
  FIDX dim_vx_nr, dim_vx_nr_p_pvx_nr;
  FIDX *pdof;
  
  struct multilvl *ml1, *mld;

  ml1= (*K).ml1;
  mld= (*K).mld;

  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr_lvl[lvl];

  pvx_nr = (*K).pvx_nr_lvl[lvl];
  pdof   = (*K).pdof_lvl[lvl];

  dim_vx_nr          = dim * vx_nr;
  dim_vx_nr_p_pvx_nr = dim_vx_nr + pvx_nr;

  if ( (( (*vec).len != dim_vx_nr_p_pvx_nr )
	|| ( vx_nr != mld->levlvx[lvl] ) )
       || ( ( dim != mld->dim ) || (1 != ml1->dim))
       || ( vx_nr != ml1->levlvx[lvl] ) )
    {
      fprintf(stderr, "navsto_mg_pull_vec_SDFEM: "
	      "dimensions don't make sense!\n");
      return FAIL;
    }

  /* velocity components */
  for (i=0; i<dim_vx_nr; i++)
    {
      (*vec).V[i] = vml[mld->nlevl[lvl+1] + i];
    }
  /* pressure components */
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]!=-1)
	{
	  (*vec).V[ dim_vx_nr + pdof[i] ] = pml[ml1->nlevl[lvl+1] + i];
	}
    }

  return SUCCESS;
}




/*FUNCTION*/
int navsto_mg_copy_vec_SDFEM(double *vmlin, double *pmlin,
			     double *vmlout, double *pmlout,
			     FIDX lvl, struct navsto_matrix *K
/* copies multilevel vectors, seperated in the
   velocity (vml) and pressure parts (pml)

   Input:  vmlin   - in level lvl part, the velocities
           pmlin   - in level lvl part, the pressure
	   lvl     - level of the mesh that the given vector
	             corresponds to
           K       - navsto_matrix struct, holds the necessary data

   Output: vmlout  - in level lvl part, the velocities 
           pmlout  - in level lvl part, the pressure

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
			     ){
  FIDX i, dim, vx_nr, pvx_nr;
  FIDX dim_vx_nr, dim_vx_nr_p_pvx_nr;
  FIDX *pdof;

  struct multilvl *ml1, *mld;

  ml1= (*K).ml1;
  mld= (*K).mld;

  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr_lvl[lvl];

  pvx_nr = (*K).pvx_nr_lvl[lvl];
  pdof   = (*K).pdof_lvl[lvl];

  dim_vx_nr          = dim * vx_nr;
  dim_vx_nr_p_pvx_nr = dim_vx_nr + pvx_nr;

  if ( ( vx_nr != mld->levlvx[lvl] ) 
       || ( ( dim != mld->dim ) || (1 != ml1->dim))
       || ( vx_nr != ml1->levlvx[lvl] ) )
    {
      fprintf(stderr, "navsto_mg_copy_vec_SDFEM: "
	      "dimensions don't make sense!\n");
      return FAIL;
    }

  /* velocity components */
  for (i=0; i<dim_vx_nr; i++)
    {
      vmlout[mld->nlevl[lvl+1] + i] = vmlin[mld->nlevl[lvl+1] + i];
    }
  /* pressure components */
  for (i=0; i<vx_nr; i++)
    {
      pmlout[ml1->nlevl[lvl+1] + i] = pmlin[ml1->nlevl[lvl+1] + i];
    }

  return SUCCESS;
}


/*FUNCTION*/
int navsto_mg_projector_trans_SDFEM( struct vector *vec, 
				     FIDX lvl, struct navsto_matrix *K
/* performs application of the transpose of the projector used to
   implement the Dirichlet boundary conditions and the zero mean
   pressure condition

   Input:  lvl     - level of the mesh that the given vector
	             corresponds to
           K       - navsto_matrix struct, holds the necessary data
           
   In/Out: vec     - out = P^T* in

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
			     ){
  FIDX i, j, dim, vx_nr, pvx_nr, bn_nr;
  FIDX dim_vx_nr, dim_vx_nr_p_pvx_nr;
  FIDX *pdof, *nodes;

  double pmean, *weight;

  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr_lvl[lvl];
  bn_nr  = (*K).bn_nr_lvl[lvl];
  nodes  = (*K).nodes_lvl[lvl];

  pvx_nr = (*K).pvx_nr_lvl[lvl];
  pdof   = (*K).pdof_lvl[lvl];
  weight = (*K).weight_lvl[lvl];

  dim_vx_nr          = dim * vx_nr;
  dim_vx_nr_p_pvx_nr = dim_vx_nr + pvx_nr;

  if ( (*vec).len != dim_vx_nr_p_pvx_nr )
    {
      fprintf(stderr, "navsto_mg_projector_trans_SDFEM: "
	      "dimensions don't make sense!\n");
      return FAIL;
    }

  /* velocity components */
  /* apply P^T, vec = P^T*vec  */
  for (j=0; j<dim; j++)
    for (i=0; i<bn_nr; i++)
      {
	(*vec).V[ j*vx_nr + nodes[i] ]   = 0.0 ;
      }
  /* pressure components */
  /* apply P^T, vec = P^T*vec  */
  /* get the summ of all pressure components */
  pmean=0.0;
  for (i=dim_vx_nr; i<dim_vx_nr_p_pvx_nr; i++)
    {
      pmean+= (*vec).V[i];
    }
  /* subtract weight[i] times the pressure summ */
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]!=-1)
	{
	  (*vec).V[dim_vx_nr+pdof[i] ] -= pmean * weight[i];
	}
    }

  return SUCCESS;
}


/*FUNCTION*/
int navsto_mg_projector_SDFEM( struct vector *vec, 
			       FIDX lvl, struct navsto_matrix *K
/* performs application of the projector used to implement the
   Dirichlet boundary conditions and the zero mean pressure condition

   Input:  lvl     - level of the mesh that the given vector
	             corresponds to
           K       - navsto_matrix struct, holds the necessary data
           
   In/Out: vec     - out = P * in

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
			     ){
  FIDX i, j, dim, vx_nr, pvx_nr, bn_nr;
  FIDX dim_vx_nr, dim_vx_nr_p_pvx_nr;
  FIDX *pdof, *nodes;

  double pmean, *weight;

  dim    = (*K).dim;
  vx_nr  = (*K).vx_nr_lvl[lvl];
  bn_nr  = (*K).bn_nr_lvl[lvl];
  nodes  = (*K).nodes_lvl[lvl];

  pvx_nr = (*K).pvx_nr_lvl[lvl];
  pdof   = (*K).pdof_lvl[lvl];
  weight = (*K).weight_lvl[lvl];

  dim_vx_nr          = dim * vx_nr;
  dim_vx_nr_p_pvx_nr = dim_vx_nr + pvx_nr;

  if ( (*vec).len != dim_vx_nr_p_pvx_nr )
    {
      fprintf(stderr, "navsto_mg_projector_SDFEM: "
	      "dimensions don't make sense!\n");
      return FAIL;
    }

  /* pressure components */
  /* apply P, vec = P*vec  */
  /* get the mean pressure */
  pmean=0.0;
  for (i=0; i<vx_nr; i++)
    {
      if (pdof[i]!=-1)
	{
	  pmean+= (*vec).V[dim_vx_nr + pdof[i] ]*weight[i];
	}
    }
  /* subtract the mean pressure */
  for (i=dim_vx_nr; i<dim_vx_nr_p_pvx_nr; i++)
    {
      (*vec).V[ i ] -= pmean;
    }
  /* velocity components */
  /* apply P, vec = P*vec  */
  for (j=0; j<dim; j++)
    for (i=0; i<bn_nr; i++)
      {
	(*vec).V[ j*vx_nr + nodes[i] ]   = 0.0 ;
      }


  return SUCCESS;
}
