/***********************************************************************

    This file is part of
    FEINS, Finite Element Incompressible Navier-Stokes solver,
    which is expanding to a more general FEM solver and toolbox,
    Copyright (C) 2003--2008, Rene Schneider 
    <rene.schneider@mathematik.tu-chemnitz.de>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program. If not, see <http://www.gnu.org/licenses/>.

    Minor contributions to this program (for example bug-fixes and
    minor extensions) by third parties automatically transfer the
    copyright to the general author of FEINS, to maintain the
    possibility of commercial re-licensing. If you contribute but wish
    to keep the copyright of your contribution, make that clear in
    your contribution!

    Non-GPL licenses to this program are available upon request from
    the author.

************************************************************************/
/*
FILE stokesassem.c
HEADER stokesassem.h

TO_HEADER:


// useful text macros
#include "feins_macros.h"
// data structures 
#include "datastruc.h"
#include "sparse_struct.h"
#include "navsto_struct.h"

*/


/* prototypes of external functions */
#include <math.h>
#include "elements.h"
#include "cubature.h"
#include "sparse.h"
#include "mesh.h"
#include "stokes_aux.h"
#include "navsto_aux.h"
#include "feins_lapack.h"    /* link against LAPACK! */


/* function prototypes */
#include "stokesassem.h"


/*FUNCTION*/
int stokes_assem_t21(struct navsto_matrix *K, struct vector *rhs,
		     struct vector *u0, 
		     struct mesh *m, FIDX lvl, int noFs
/* performs the assembly of the stiffness matrix K and the right hand
   side vector rhs which result from finite element discretisation of
   the Stokes equation on the T2 mesh m, such that 
       K x = rhs
   defines the approximate solution x=(u_x, u_y, p)  of the Stokes equation
       -nu Laplace(u) + grad p = f
                         div u = 0

   with boundary conditions as given in the mesh m    
   (p2 triangles)

   Input:  m       - the mesh
           lvl     - specifying the current refinement level, used to
                     define where in K.Fs[] the matrices are stored
           noFs    - specifies if the the Fs blocks of K are to be
                     computed or not, noFs==1 ==> they are not,
                     noFs==0 they are computed

   Output: K       - stiffness matrix and preconditioner data in
                     specialised storage, the structure K needs
                     to be given with internal data initialised,
                     here the given level is initialised and memory
                     allocated as necessary
           rhs     - righthand side vector, only the structure rhs 
	             needs to be given, all internal data is
	             initialised here and memory allocated 
	   u0      - initial guess for the solution of the equation
 	             system which satisfies the Dirichlet BC, only the
 	             structure u0 needs to be given, all internal
 	             data is initialised here and memory allocated 

   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid
*/ 
		     ){
  int  err;
  FIDX el, i, j, k, r;

  FIDX dim=2, bas_n1, bas_n2, vx_nr, eg_nr, hi_nr, 
    el_w, vx_w, eg_w, fc_w, bd_w, hi_w, fu_w,
    bigN;
  FIDX subtypes[2];

  /* for FORTRAN calls: */
  char fTrans='T', fNoTrans='N';
  double dhelp, done=1.0, dzero=0.0;
  int fone=1, fdim, fbas_n1, fbas_n2; 


  struct sparse *Fs;     /* pointer to the stiffness matrices for this
			    level */

  struct int_data iform; /* integration formula 2d   */
  double *elF;           /* element viscous stiffness matrix */
  double *elB;           /* element pressure stiffness matrix */
  double *elM;           /* element pressure mass matrix */
  double *Jac, *Jacinv;  /* Jacobian of the element mapping and its
			    inverse */
  double x_k[2];         /* world coordinates of the quadrature point
			 */
  double *Jinvgrad;      /* inverse Jacobian times gradphi */
  double detJac;         /* determinant of the Jacobian */
  FIDX   *dofs1, *dofs2; /* degrees of freedom to which the local
			    matrices correspond */
  double nu;             /* nondimensional parameter describing the
			    flow, nu= 1/Re, where Re is the Reynolds
			    number, thus nu=mu/(rho*U*d), where mu is
			    the viscosity of the fluid, rho the
			    density, U is the velocity scale (the
			    solution velocities relate to this scale),
			    d is the lenght scale (the scale by which
			    lengths in the mesh are given),
			    nu is taken from m.param[MC2XPANUPO] */

  FIDX pvx_nr;           /* number of pressure dofs */

  double *phi1, *phi2, *gradp1, *gradp2;



  /****************   init ******************************************/
  /* get integration formula */
  subtypes[0]=1;
  subtypes[1]=2;

  err=cubature_bases( dim, 4, tria, 2, subtypes, &iform); 
  FUNCTION_FAILURE_HANDLE( err, cubature_bases, stokes_assem_t21);

  /* make phi and gradphi better accessible */
  phi1   = (iform.bases[0]->phi);
  gradp1 = (iform.bases[0]->gradphi);
  bas_n1 = (iform.bases[0]->num_basis);
  phi2   = (iform.bases[1]->phi);
  gradp2 = (iform.bases[1]->gradphi);
  bas_n2 = (iform.bases[1]->num_basis);

  fdim    = (int) dim;
  fbas_n1 = (int) bas_n1;
  fbas_n2 = (int) bas_n2;

  vx_nr = (*m).vx_nr;
  eg_nr = (*m).eg_nr;
  hi_nr = (*m).hi_nr;
  vx_w  = (*m).vx_w;
  el_w  = (*m).el_w;
  eg_w  = (*m).eg_w;
  fc_w  = (*m).fc_w;
  bd_w  = (*m).bd_w;
  hi_w  = (*m).hi_w;
  fu_w  = (*m).fu_w;

  nu    = (*m).para[MC2XPANUPO];

  if (dim > 2)
    {
      /* cry */
      fprintf(stderr,
	      "stokes_assem_t21: dim >2 not implemented (Jacinv)\n");
      return FAIL;
    }

  /* reinitialise the parts of the matrix struct which will be
     redefined */
  err=navsto_matrix_reinit( K, vx_nr );
  FUNCTION_FAILURE_HANDLE( err, navsto_matrix_reinit, stokes_assem_t21);
  (*K).msh=m;

  TRY_MALLOC( (*K).ml1, 1, struct multilvl, stokes_assem_t21);
  err=multilvl_init_t2( m, 1, (*K).ml1 );
  FUNCTION_FAILURE_HANDLE( err, multilvl_init_t2, stokes_assem_t21);

  TRY_MALLOC( (*K).bpx, 1, struct bpxdata, stokes_assem_t21);
  err=bpx_init_tx( (*K).bpx, (*K).ml1 );
  FUNCTION_FAILURE_HANDLE( err, bpx_init_tx, stokes_assem_t21);

  /* initialise K */
  (*K).vx_nr  = vx_nr;
  (*K).dim    = dim;

  err=stokes_pdof_init_t21( m, K);
  FUNCTION_FAILURE_HANDLE( err, stokes_pdof_init_t21, stokes_assem_t21);
  pvx_nr=(*K).pvx_nr;

  /* allocate space for Fs if needed */
  if (noFs!=1)
    {
      Fs=&(*K).Fs[lvl];
      err=sparse_flex_alloc( Fs, vx_nr);
      FUNCTION_FAILURE_HANDLE( err, sparse_flex_alloc, stokes_assem_t21);

      TRY_MALLOC( (*K).mg, 1, struct mgdata, stokes_assem_t21);
    }
  else Fs=NULL;

  for (i=0; i<dim; i++)
    {
      err=sparse_flex_alloc( &(*K).Bs[i], pvx_nr);
      FUNCTION_FAILURE_HANDLE( err, sparse_flex_alloc, stokes_assem_t21);
    }
  err=sparse_flex_alloc( &(*K).M, pvx_nr);
  FUNCTION_FAILURE_HANDLE( err, sparse_flex_alloc, stokes_assem_t21);


  /* initialise rhs, u0 */
  err=vector_alloc( rhs, dim*vx_nr+pvx_nr );
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, stokes_assem_t21);
  err=vector_alloc( u0,  dim*vx_nr+pvx_nr );
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, stokes_assem_t21);

  /* allocate memory for elF */
  TRY_MALLOC( elF, bas_n2*bas_n2, double, stokes_assem_t21);
  /* allocate memory for elB */
  TRY_MALLOC( elB, bas_n1*(dim*bas_n2), double, stokes_assem_t21);
  /* allocate memory for elM */
  TRY_MALLOC( elM, bas_n1*bas_n1, double, stokes_assem_t21);
  /* allocate memory for Jac, Jacinf */
  TRY_MALLOC( Jac, dim*dim, double, stokes_assem_t21);
  TRY_MALLOC( Jacinv, dim*dim, double, stokes_assem_t21);
  /* allocate memory for Jinvgrad */
  TRY_MALLOC( Jinvgrad, dim*bas_n2, double, stokes_assem_t21);
  /* allocate memory for dofs1 */
  TRY_MALLOC( dofs1, bas_n1, FIDX, stokes_assem_t21);
  /* allocate memory for dofs2 */
  TRY_MALLOC( dofs2, bas_n2, FIDX, stokes_assem_t21);
  bigN = dim*vx_nr+pvx_nr;

  /* clear rhs, u0 */
  for (i=0; i<bigN; i++)
    {
      (*rhs).V[i]=0.0;
      (*u0).V[i] =0.0;
    }

  /* loop over all elements */
  for (el=0; el<(*m).el_nr; el++)
    {
      /* set elF to zero */
      for (i=0; i<bas_n2*bas_n2; i++) 
	{
	  elF[i]=0.0;
	}
      /* set elB to zero */
      for (i=0; i<bas_n1*dim*bas_n2; i++) 
	{
	  elB[i]=0.0;
	}
      /* set elM to zero */
      for (i=0; i<bas_n1*bas_n1; i++) 
	{
	  elM[i]=0.0;
	}


      /* use only linear element mapping => Jac is const on the
	 element */
      /* Jac=0 */
      for (i=0; i<dim*dim; i++)
	Jac[i]=0.0;
	  
      /* Jac = sum_{i=nodes} vertex(i)*gradphi_i^T */
      for (i=0;i<bas_n1; i++)
	{
	  for (j=0; j<dim; j++)
	    for (r=0; r<dim; r++)
	      {
		Jac[j*dim+r]+= 
		  (*m).vertex[(*m).elem[el*el_w+MCT2ELNOD1+i]*vx_w
			      +MCT2VXSTRT+j]
		  * gradp1[0*bas_n1*dim +i*dim +r];
	      }
	}
	
      /* get detJac */
      detJac=Jac[0]*Jac[3]-Jac[2]*Jac[1];
      
      /* get Jacinv (here direct) */
      Jacinv[0]=1.0/detJac*Jac[3];
      Jacinv[1]=-1.0/detJac*Jac[1];
      Jacinv[2]=-1.0/detJac*Jac[2];
      Jacinv[3]=1.0/detJac*Jac[0];
      
      /* loop over all integration points */
      for (k=0; k<iform.num_points; k++)
	{
	  /* compute x_k and the Jacobian at this point */
	  /* x_k=0 */
	  for (i=0; i<dim; i++)
	    x_k[i]=0.0;
	  
	  /* x_k = sum_{i=nodes} vertex(i)*phi_i, */
	  for (i=0;i<bas_n2; i++)
	    {
	      for (j=0; j<dim; j++)
		{
		  x_k[j]+= 
		    (*m).vertex[(*m).elem[el*el_w+MCT2ELNOD1+i]*vx_w
				+MCT2VXSTRT+j]
		    * phi2[k*bas_n2+i];		  
		}
	    }

	  /* Jinvgrad= Jacinv * gradphi2[k,:,:]  (=real world gradient)
	  */
	  dgemm_( &fNoTrans, &fNoTrans, &fdim, &fbas_n2, &fdim,
		  &done, Jacinv, &fdim, &(gradp2[k*bas_n2*dim]), &fdim,
		  &dzero, Jinvgrad, &fdim );

	  dhelp=iform.weights[k]*fabs(detJac)*nu;
	  /* elF += |detJac|*weigth[k] * nu* Jinvgrad^T*Jinvgrad */
	  dgemm_( &fTrans, &fNoTrans, &fbas_n2, &fbas_n2, &fdim,
		  &dhelp,  Jinvgrad, &fdim, Jinvgrad, &fdim,
		  &done, elF, &fbas_n2 );

	  for (i=0; i<dim; i++)
	    {
	      dhelp=-iform.weights[k]*fabs(detJac);
	      /* elB_i += - |detJac|*weigth[k] * pbi1*Jinvgrad[i,:] */
	      dgemm_( &fNoTrans, &fNoTrans, &fbas_n1, &fbas_n2, &fone,
		      &dhelp, &phi1[k*bas_n1], &fbas_n1, &Jinvgrad[i],
		      &fdim, &done, &elB[i*bas_n1*bas_n2], &fbas_n1 );
	    }

	  dhelp=iform.weights[k]*fabs(detJac)/nu;
	  /* elM += |detJac|*weigth[k] / nu * pbi1*phi1 */
	  dgemm_( &fNoTrans, &fTrans, &fbas_n1, &fbas_n1, &fone,
		  &dhelp, &phi1[k*bas_n1], &fbas_n1, &phi1[k*bas_n1],
		  &fbas_n1, &done, elM, &fbas_n1 );
#ifdef FEINS_have_warning 
#warning "stokes_t21: no rhs assembly yet!"
#endif
	} /* end loop over all integration points */
      
      /* elF, elB and elM are ready, add to K
      */
      for (i=0; i<bas_n1; i++)
	{ /* pressure dofs */
	  dofs1[i]=(*K).pdof[(*m).elem[el*el_w+MCT2ELNOD1+i]];
	}

      /* velocity dofs */
      for (i=0; i<bas_n2; i++)
	{ dofs2[i]=(*m).elem[el*el_w+MCT2ELNOD1+i]; }
      for (j=0; j<dim; j++)
	{
	   if (noFs!=1)
	     {
	       /* elF to Fs[j]   (velocity_j-velocity_j) */
	       err=sparse_add_local(Fs, NoTrans,
				    bas_n2, dofs2, bas_n2, dofs2,
				    elF, bas_n2 );
	       FUNCTION_FAILURE_HANDLE( err, sparse_add_local,
					stokes_assem_t21);
	     }

	  /* elB_j to Bs[j] (pressure-velocity_j) */
	  err=sparse_add_local(&(*K).Bs[j], NoTrans,
			       bas_n1, dofs1, bas_n2, dofs2,
			       &elB[j*bas_n1*bas_n2], bas_n1 );
	  FUNCTION_FAILURE_HANDLE( err, sparse_add_local, stokes_assem_t21);
	}

      /* elM to M (pressure-pressure) */
      err=sparse_add_local( &(*K).M, NoTrans,
			   bas_n1, dofs1, bas_n1, dofs1,
			   elM, bas_n1 );
      FUNCTION_FAILURE_HANDLE( err, sparse_add_local, stokes_assem_t21);

    } /* end loop over all elements */

#ifdef FEINS_have_warning 
#warning "stokes_T21: Neumann BC not accounted for in rhs yet!"
#endif
  /* first Neumann BC, otherwise they would collide with Dirichlet BC
   */

  /* K itself is ready, now implement boundary conditions  */

  err=stokes_diri_bc_init_t2( m, u0, rhs, K);
  FUNCTION_FAILURE_HANDLE( err, stokes_diri_bc_init_t2, stokes_assem_t21);

  /* set the level info in K */
  (*K).lvl=lvl;

  if (noFs!=1)
    {
      if ( (*Fs).type == SP_TYPE_FLEX )
	{
	  err = sparse_convert_compressed_row( Fs );
	  FUNCTION_FAILURE_HANDLE( err, sparse_convert_compressed_row, 
				   stokes_assem_t21 );
	}

      /* initialise the multigrid stuff */
      err=mg_init_tx( (*K).Fs, m, (*K).ml1, (*K).mg, NULL );
      FUNCTION_FAILURE_HANDLE( err, mg_init_tx, stokes_assem_t21);

      for (i=0; i<(*K).bn_nr; i++)
	for (j=0; j<=(*(*K).ml1).lmax; j++)
	  {
	    FIDX node;
	    node=(*K).nodes[i];
	    MLVLFINDENTRY(node, node, j, *(*K).ml1);
	    if (node>=0)
	      {
		(*(*K).mg).invdiag[node]=0.0;
	      }
	  }
    }


  for (i=0; i<dim; i++)
    {
      if ( (*K).Bs[i].type == SP_TYPE_FLEX )
	{
	  err = sparse_convert_compressed_row( &(*K).Bs[i] );
	  FUNCTION_FAILURE_HANDLE( err, sparse_convert_compressed_row, 
				   stokes_assem_t21);
	}
    }
  if ( (*K).M.type == SP_TYPE_FLEX )
    {
      err = sparse_convert_compressed_row( &(*K).M );
      FUNCTION_FAILURE_HANDLE( err, sparse_convert_compressed_row, 
			       stokes_assem_t21 );
    }

  /* store the inverse of the diagonal of M in Minvdiag, to use it in
     a Jacobi preconditioner */
  /* printf("vx_nr=%d    pvx_nr=%d\n", (int) vx_nr, (int) pvx_nr); /* */
  err=sparse_extract_invdiag( &K->M, &K->Minvdiag);
  FUNCTION_FAILURE_HANDLE( err, sparse_extract_invdiag, stokes_assem_t21);

  /* free local data */
  free_intdata (&iform);
  free(elF);
  free(elB);
  free(elM);
  free(Jac);
  free(Jacinv);
  free(Jinvgrad);
  free(dofs1);
  free(dofs2);

  return SUCCESS;
}
  

/*FUNCTION*/
int stokes_diri_bc_init_t2( struct mesh *m, struct vector *uinit, 
			    struct vector *rhs, struct navsto_matrix *K
/* initialises the solution vector uinit to fulfill the Dirichlet
   boundary conditions, it is verified that the boundary conditions
   fulfil balancing in and outflow

   In/Out: m       - the mesh (2d T2), mostly input, only the boundary
                     condition data in the hierarchy entries is corrected
           uinit   - the solution vector, stored as uinit[j*vx_nr+i]
                     gives the j-th velocity component in the i-th
                     node of the mesh, the velocity at dirichlet BC -
                     nodes is set to the bescribed value, 
		     if uinit==NULL it is ignored
           rhs     - rhighthandside vector, the entries in lines
                     corresponding to Dirichlet BC are set to zero,
		     if rhs==NULL it is ignored
           K       - stiffness matrix struct, only K.bn_nr and K.nodes
                     are set (list of Dirichlet BC nodes)

   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid
*/
		    ){
  int  err;
  FIDX bd, i, j, k;

  FIDX dim=2, vx_nr, hi_nr, bas_n1d1, vx_w, eg_w, bd_w, hi_w;

  struct int_data iform1;/* integration formula 1d   */
  double bint, bmax;     /* for verifying if the DiriBC are
			    consistent, bint = integral[boundary](u*n),
			    bmax = max[boundary](u)
			 */
  FIDX subtypes[2];
  double *phi1d1, *gradp1d1;

  vx_nr = (*m).vx_nr; hi_nr = (*m).hi_nr; vx_w  = (*m).vx_w;
  eg_w  = (*m).eg_w;  bd_w  = (*m).bd_w;  hi_w  = (*m).hi_w;

  /* get integration formula */
  subtypes[0]=1;
  subtypes[1]=2;
  err=cubature_bases( dim-1, 3, inter, 2, subtypes, &iform1); 
  FUNCTION_FAILURE_HANDLE( err, cubature_bases, stokes_diri_bc_init_t2);

  /* make phi and gradphi better accessible */
  phi1d1   = (iform1.bases[1]->phi);
  gradp1d1 = (iform1.bases[1]->gradphi);
  bas_n1d1 = (iform1.bases[1]->num_basis);

  /* loop over boundary, Dirichlet BC implementation */
  for (bd=0; bd<(*m).bd_nr; bd++)
    {
      /* take Dirichlet BC into account */
      if (((*m).bound[bd*bd_w + MCT2BDTYPE]==1)&&
	  ((*m).edge[(*m).bound[bd*bd_w+MCT2BDEDGE]*eg_w+MCT2EGCHL1]==-1))
	{
	  int  err1, err2, errm;
	  FIDX edi, node1, node2, nodem, fu;
	  double lu0[6];

	  /* check out which nodes */
	  edi   = (*m).bound[bd*bd_w + MCT2BDEDGE];
	  node1 = (*m).edge[edi*eg_w + MCT2EGNOD1];
	  node2 = (*m).edge[edi*eg_w + MCT2EGNOD1 +1];
	  nodem = (*m).edge[edi*eg_w + MCT2EGNODM];
    

	  /* take care of the velocity components */

	  /* get the set boundary values */
	  fu=(*m).bound[bd*bd_w + MCT2BDFNCT];
	  err1=mesh_func_eval(m, fu,
			      &(*m).vertex[node1*vx_w+MCT2VXSTRT],
			      0.0, 2, &lu0[0], NULL, NULL );
	  err2=mesh_func_eval(m, fu,
			      &(*m).vertex[node2*vx_w+MCT2VXSTRT],
			      0.0, 2, &lu0[2], NULL, NULL);
	  errm=mesh_func_eval(m, fu,
			      &(*m).vertex[nodem*vx_w+MCT2VXSTRT],
			      0.0, 2, &lu0[4], NULL, NULL);
	  if (((err1!=SUCCESS)||(err2)!=SUCCESS)||((errm)!=SUCCESS))
	    {
	      fprintf(stderr, "stokes_diri_bc_init_t2: "
		      "function_eval error in DiriBC\n");
	      return FAIL;
	    }
	  
	  /* incorporate these BC */
	  for (j=0; j<dim; j++)
	    {
	      if (uinit!=NULL)
		{
		  (*uinit).V[j*vx_nr+node1]= lu0[0+j];
		  (*uinit).V[j*vx_nr+node2]= lu0[2+j];
		  (*uinit).V[j*vx_nr+nodem]= lu0[4+j];
		}

	      
	      if (rhs!=NULL)
		{
		  /* delete the rhs */
		  (*rhs).V[j*vx_nr+node1]= 0.0;
		  (*rhs).V[j*vx_nr+node2]= 0.0;
		  (*rhs).V[j*vx_nr+nodem]= 0.0;
		}
	    }

	  /* mark the nodes as boundary nodes */
	  (*K).nodes[node1]=1;
	  (*K).nodes[node2]=1;
	  (*K).nodes[nodem]=1;
	} /* end this BD is Dirichlet */
      
    } /* end take Dirichlet BC into account */

  if (uinit!=NULL)
    {
      /* check for conforming BC */
      bint = 0.0;
      bmax = 0.0;
      /* integrate u0*n over the boundary, break if a node with no
	 DiriBC is found */
      for (bd=0; bd<(*m).bd_nr; bd++)
	{
	  FIDX edi, node[3];
	  double t[2], n[2], u[2], ds, udotn, help;

	  /* get the nodes */
	  edi     = (*m).bound[bd*bd_w + MCT2BDEDGE];
	  node[0] = (*m).edge[edi*eg_w + MCT2EGNOD1];
	  node[1] = (*m).edge[edi*eg_w + MCT2EGNOD1 +1];
	  node[2] = (*m).edge[edi*eg_w + MCT2EGNODM];

	  /* only for edges of the finest mesh */
	  if ((*m).edge[edi*eg_w+MCT2EGCHL1]==-1)
	    {
	      /* check if node without DiriBC is pressent */
	      if ((((*K).nodes[node[0]]!=1)||((*K).nodes[node[1]]!=1))
		  ||((*K).nodes[node[2]]!=1))
		{
		  bint = 0.0;
		  if (bmax < 1.0)
		    { bmax=1.0; } /* to avoid div by zero */
		  printf("stop checking for conforming BC\n");
		  break; /* stop the boundry loop */
		}

	      /* integrate over the edge */
	      for (k=0; k<iform1.num_points; k++)
		{
		  /* get tangential vector t and BC vector u at this
		     integration poixnt */
		  t[0]=0.0; t[1]=0.0; u[0]=0.0; u[1]=0.0;
		  for (i=0; i<bas_n1d1; i++)
		    {
		      t[0]+=(*m).vertex[node[i]*vx_w+MCT2VXSTRT  ]
			*(gradp1d1[k*bas_n1d1+i]);
		      t[1]+=(*m).vertex[node[i]*vx_w+MCT2VXSTRT+1]
			*(gradp1d1[k*bas_n1d1+i]);
		      u[0]+=(*uinit).V[       node[i]]*(phi1d1[k*bas_n1d1+i]);
		      u[1]+=(*uinit).V[vx_nr+ node[i]]*(phi1d1[k*bas_n1d1+i]);
		    }

		  /* ds= norm(t) */
		  ds=sqrt( t[0]*t[0]+t[1]*t[1] );
	      
		  /* update bmax=max(norm(u0)) */
		  help= sqrt( u[0]*u[0]+u[1]+u[1]);
		  if (bmax<help)
		    { bmax=help; }
	      
		  /* normal vector n= 1/ds *orientation* rotmatrix(pi/2)*t */
		  n[0]= -(*m).bound[bd*bd_w+MCT2BDORIE]*t[1]/ds;
		  n[1]=  (*m).bound[bd*bd_w+MCT2BDORIE]*t[0]/ds;
	      
		  /* u0*n */
		  udotn= u[0]*n[0]+u[1]*n[1];
	      
		  /* integration step */
		  bint = bint + iform1.weights[k]*ds*udotn;
		}
	    }
	}
    } /* end check BC for allowing mass conservation */
  else
    {
      /* if uinit not set, the above check can not be performed, just
	 make sure it is passed */
      bint= 0.0;
      bmax= 1.0;
    }

  /* now correct the projector data to a list of Dirichlet DOFs */
  j=0;
  for (i=0; i<vx_nr; i++)
    {
      if ((*K).nodes[i]==1)
	{
	  /* this is a Dirichlet node, append it to the list */
	  (*K).nodes[j]=i;
	  j++;
	}
    }
  (*K).bn_nr=j;


  /* mark the hierarchy entries according to their dirichlet boundary
     relation */
  for (i=0; i<hi_nr; i++)
    {
      FIDX node1, node2;

      node1=(*m).hier[i*hi_w+MCT2HIFAT1  ];
      node2=(*m).hier[i*hi_w+MCT2HIFAT1+1];
      if ( ((*K).nodes[node1]==1) || ((*K).nodes[node2]==1) )
	{
	  (*m).hier[i*hi_w+MCT2HIBDMK]=1;
	}
      else
	{
	  (*m).hier[i*hi_w+MCT2HIBDMK]=0;
	}
    }

  /* free local data */
  free_intdata (&iform1);

  if (fabs(bint/bmax)>1e-12)
    {
      printf("%s\nbint= %e, bmax= %e, bint/bmax= %e\n",
	     "stokes_diri_bc_init_t2: nonconforming DiriBC:",
	     bint, bmax, bint/bmax );
      return FAIL;
    }
  else
    {
      return SUCCESS;
    }
}



/*FUNCTION*/
int stokes_pdof_init_t21( struct mesh *m, struct navsto_matrix *K
/* initialises the pdof part of K and the projector data for the
   pressure space (weights for the pressure nodes)

   Input:  m       - the mesh (2d T2), mostly input, only the boundary
                     condition data in the hierarchy entries is corrected
   Output: K       - stiffness matrix struct, K.pvx_nr, K.pdof and
                     K.weight are set

   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid
*/
		    ){
  int  err;
  FIDX el,i,j,k,r;


  struct int_data iform; /* integration formula */
  double elw;            /* element pressure weight */
  double sumweight;      /* sum of the weights for the pressure
			    projector, weights are scaled such that
			    their sum is 1 */
  double *Jac;           /* Jacobian of the element mapping and its
			    inverse */
  double detJac;         /* determinant of the Jacobian */

  FIDX subtypes[1];
  double *phi1, *gradp1;


  FIDX dim=2, bas_n1, vx_nr, pvx_nr, eg_nr, 
    el_w, vx_w, eg_w;

  vx_nr = (*m).vx_nr;
  eg_nr = (*m).eg_nr;
  vx_w  = (*m).vx_w;
  el_w  = (*m).el_w;
  eg_w  = (*m).eg_w;

  /* get integration formula */
  subtypes[0]=1;
  err=cubature_bases( dim, 4, tria, 1, subtypes, &iform); 
  FUNCTION_FAILURE_HANDLE( err, cubature_bases, stokes_pdof_init_t21);
  phi1   = (iform.bases[0]->phi);
  gradp1 = (iform.bases[0]->gradphi);
  bas_n1 = (iform.bases[0]->num_basis);

  /* allocate memory for Jac, Jacinf */
  TRY_MALLOC( Jac, dim*dim, double, stokes_pdof_init_t21);
  
  /* count the number of pressure DOFs, mark all nodes in K.pdof as
     non-pressure, then mark all pressure nodes as such, then count
     them and assign pdof accordingly */
  TRY_MALLOC( (*K).pdof, vx_nr, FIDX, stokes_pdof_init_t21);
  /* mark all as non-pressure */
  for (i=0; i<vx_nr; i++)
    { (*K).pdof[i]=0; }
  /* run over all edges, mark the pressure nodes */
  for (i=0; i<eg_nr; i++)
    { 
      FIDX node1, node2;
      node1= (*m).edge[i*eg_w+MCT2EGNOD1  ];
      node2= (*m).edge[i*eg_w+MCT2EGNOD1+1];
      (*K).pdof[node1]=1;
      (*K).pdof[node2]=1;
    }
  /* count them and correctly declare declare pdof */
  pvx_nr=0;
  for (i=0; i<vx_nr; i++)
    {
      if ((*K).pdof[i])
	{
	  (*K).pdof[i]=pvx_nr;
	  pvx_nr++;
	}
      else
	{
	  (*K).pdof[i]=-1;
	}

    }

  /* now we know everything we need to initialise K */
  (*K).pvx_nr = pvx_nr;

  /* clear P */
  for (i=0; i<(*m).vx_nr; i++)
    {
      (*K).nodes[i]  = 0  ;
      (*K).weight[i] = 0.0;
    }
  sumweight=0.0;

  /* determine the sum of the integration weigths */
  /* set elw to zero */
  elw=0.0;
  for (k=0; k<iform.num_points; k++)
    {
      /* elW += weigth[k] */
      elw += iform.weights[k];
    } 

  /* loop over all elements */
  for (el=0; el<(*m).el_nr; el++)
    {
      double elwdetJac;
      /* the Jacobian of the element maping (considering linear shape
	 functions) */
      /* Jac=0 */
      for (i=0; i<dim*dim; i++)
	Jac[i]=0.0;
	  
      /* Jac = sum_{i=nodes} vertex(i)*gradphi_i^T */
      for (i=0;i<bas_n1; i++)
	for (j=0; j<dim; j++)
	  for (r=0; r<dim; r++)
	    {
	      Jac[j*dim+r]+= 
		(*m).vertex[(*m).elem[el*el_w+MCT2ELNOD1+i]*vx_w
			    +MCT2VXSTRT+j]
		* gradp1[0*bas_n1*dim +i*dim +r];
	    }
	    
      /* get detJac */
      detJac=Jac[0]*Jac[3]-Jac[2]*Jac[1];
      elwdetJac=elw*fabs(detJac);

      for (i=0; i<bas_n1; i++)
	{ 
	  (*K).weight[(*m).elem[el*el_w+MCT2ELNOD1+i]] += elwdetJac;
	}
      sumweight+=bas_n1*elwdetJac;
    } /* end loop over all elements */


  /* now correct the projector data */
  sumweight=1.0/sumweight;
  for (i=0; i<vx_nr; i++)
    {
      (*K).weight[i]=sumweight*(*K).weight[i];
    } /* */
  /* #warning "p_weights=const"
     sumweight=1.0/pvx_nr;
     for (i=0; i<vx_nr; i++)
     {
     if ((*K).weight[i]!=0.0)	(*K).weight[i]=sumweight;
     } /* */

  /* free local data */
  free_intdata (&iform);
  free(Jac);

  return SUCCESS;
} 


/*FUNCTION*/
int stokes_ass_streamfunc_poison_t2(struct sparse *K, struct vector *rhs,
				    struct vector *psi0, struct projector1 *P,
				    struct mesh *m, struct vector *usol
/* performs the assembly of the stiffness matrix K and the right hand
   side vector rhs for the recovery of the streamfunction form the
   velocty field uslo and the finite element discretisation of the
   equation
       -Laplace psi = dv/dx - du/dy
   on the mesh m, such that 
       K psi = rhs

   Input:  m       - the mesh (2d T2)
           usol    - the velocity field stored as usol[j*vx_nr+i]
                     gives the j-th velocity component in the i-th
                     node of the mesh 

   Output: K       - stiffness matrix
           rhs     - righthand side vector
	   psi0    - initial guess for the solution of the equation
 	             system which satisfies the Dirichlet BC
	   P       - data for a projector which projects onto the
           	     linear subspace which doesn't change the values
           	     of DOFs related to Dirichlet BC

   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid
*/
		    ){
  int  err;
  FIDX el,bd,i,j,k,r;

  FIDX dim=2, bas_n, vx_nr, bd_nr, el_w, vx_w, eg_w, fc_w, bd_w, fu_w;
  FIDX subtypes[1];

  /* for FORTRAN calls: */
  char fTrans='T', fNoTrans='N';
  double dhelp, done=1.0, dzero=0.0;
  int fdim, fbas_n;

  struct int_data iform; /* integration formula    */
  double *elK;           /* element stiffnes matrx */
  double *Jac, *Jacinv;  /* Jacobian of the element mapping and its
			    inverse */
  double *Jinvgrad;      /* inverse Jacobian times gradphi */
  double detJac;         /* determinant of the Jacobian */
  double *phi, *gradp;
  double nab[2];         /* for the BC, nab=|b-a|*n */
  FIDX nod0, noda, nodb, nodm, nodt1, nodt2, found;
  /* init */

  if (((*m).dim!=2)||((*usol).len!=2*(*m).vx_nr))
    {
      fprintf( stderr,
	       "stokes_ass_streamfunc_poison_t2: cubature_bases returned "
	       "error!\n");
      return FAIL;
    }


  /* get integration formula */
  subtypes[0]=2;

  err=cubature_bases( dim, 4, tria, 1, subtypes, &iform); 

  if (err!=SUCCESS)
    {
      fprintf( stderr,
	       "stokes_ass_streamfunc_poison_t2: cubature_bases returned "
	       "error!\n");
      return FAIL;
    }

  /* make phi and gradphi better accessible */
  phi   = (iform.bases[0]->phi);
  gradp = (iform.bases[0]->gradphi);
  bas_n = (iform.bases[0]->num_basis);

  fdim   = (int) dim;
  fbas_n = (int) bas_n;

  vx_nr = (*m).vx_nr;
  bd_nr = (*m).bd_nr;
  vx_w  = (*m).vx_w;
  el_w  = (*m).el_w;
  eg_w  = (*m).eg_w;
  fc_w  = (*m).fc_w;
  bd_w  = (*m).bd_w;
  fu_w  = (*m).fu_w;

  /* allocate memory for elK */
  TRY_MALLOC( elK, bas_n*bas_n, double, assem_poison_t1);
  /* allocate memory for Jac, Jacinf */
  TRY_MALLOC( Jac, dim*dim, double, assem_poison_t1);
  TRY_MALLOC( Jacinv, dim*dim, double, assem_poison_t1);
  /* allocate memory for Jinvgrad */
  TRY_MALLOC( Jinvgrad, dim*bas_n, double, assem_poison_t1);

  /* clear K */
  sparse_empty(K);
  /* clear rhs, u0, P */
  for (i=0; i<vx_nr; i++)
    {
      (*rhs).V[i]  = 0.0;
      (*psi0).V[i] = 0.0;
      (*P).V[i]    = 0;
    }

  /* loop over all elements */
  for (el=0; el<(*m).el_nr; el++)
    {
      /* set elK to zero */
      for (i=0; i<bas_n*bas_n; i++) 
	{
	  elK[i]=0.0;
	}
      
      /* loop over all integration points */
      for (k=0; k<iform.num_points; k++)
	{
	  /* compute the Jacobian at this point */
	  /* Jac=0 */
	  for (i=0; i<dim*dim; i++)
	    Jac[i]=0.0;
	  
	  /* Jac = sum_{i=nodes} vertex(i)*gradphi_i^T */
	  for (i=0;i<bas_n; i++)
	    {
	      for (j=0; j<dim; j++)
		{
		  for (r=0; r<dim; r++)
		    {
		      Jac[j*dim+r]+= 
			(*m).vertex[(*m).elem[el*el_w+MCT2ELNOD1+i]*vx_w
				    +MCT2VXSTRT+j]
		      * gradp[k*bas_n*dim +i*dim +r];
		    }
		}
	    }
	  /* get detJac */
	  detJac=Jac[0]*Jac[3]-Jac[2]*Jac[1];

	  /* get Jacinv (here direct) */
	  Jacinv[0]=1.0/detJac*Jac[3];
	  Jacinv[1]=-1.0/detJac*Jac[1];
	  Jacinv[2]=-1.0/detJac*Jac[2];
	  Jacinv[3]=1.0/detJac*Jac[0];

	  /* Jinvgrad= Jacinv * gradphi[k,:,:]  */
	  dgemm_( &fNoTrans, &fNoTrans, &fdim, &fbas_n, &fdim,
		  &done, Jacinv, &fdim, &(gradp[k*bas_n*dim]), &fdim,
		  &dzero, Jinvgrad, &fdim );

	  dhelp=iform.weights[k]*fabs(detJac);
	  /* elK += |detJac|*weigth[k] * Jinvgrad^T*Jinvgrad */
	  dgemm_( &fTrans, &fNoTrans, &fbas_n, &fbas_n, &fdim,
		  &dhelp, Jinvgrad, &fdim, Jinvgrad, &fdim,
		  &done, elK, &fbas_n );

	  /* rhs += |detJac|*weigth[k] * (dv/dx-du/dy) */
	  for (i=0; i<bas_n; i++)
	    for (j=0; j<bas_n; j++)
	      {
		(*rhs).V[(*m).elem[el*el_w+MCT2ELNOD1+i]]-=
		  iform.weights[k]*fabs(detJac)*phi[k*bas_n+i]
		  *(Jinvgrad[j*dim+0]*
		    (*usol).V[(*m).elem[el*el_w+MCT2ELNOD1+j]+vx_nr]
		    -Jinvgrad[j*dim+1]*
		    (*usol).V[(*m).elem[el*el_w+MCT2ELNOD1+j]      ]);
	      }

	} /* end loop over all integration points */
     
      /* elK is ready, add to K */
      err=sparse_add_local(K, NoTrans,
			   bas_n, &((*m).elem[el*el_w+MCT2ELNOD1]),
			   bas_n, &((*m).elem[el*el_w+MCT2ELNOD1]),
			   elK, bas_n );
      FUNCTION_FAILURE_HANDLE( err, sparse_add_local, 
			       stokes_ass_streamfunc_poison_t2);
      
    }
  /* end loop over all elements */
  
  /* Dirichlet BC implementation, 

     psi(x)=int(x_0 along \Gamma to x)(u \dot n) 
        on the boundary \Gamma
  */
  if (bd_nr<1)
    {
      fprintf(stderr,
	      "stokes_ass_streamfunc_poison_t2: no boundary?\n");
      return FAIL;
    }

  nod0=(*m).edge[(*m).bound[MCT2BDEDGE]*eg_w+MCT2EGNOD1  ];
  (*psi0).V[nod0]=0.0;
  (*P).V[nod0]=1;
  noda=-1;
  nodb=nod0;
  while ((nodb!=nod0)||(noda==-1))
    {
      /* look for a boundary entry which connects nodb to any node but
	 noda */
      bd=0;
      found=0;
      while ((bd<bd_nr)&&(found==0))
	{
	  if ((*m).edge[(*m).bound[bd*bd_w+MCT2BDEDGE]*eg_w+
			MCT2EGCHL1]==-1)
	    {
	      nodt1=(*m).edge[(*m).bound[bd*bd_w+MCT2BDEDGE]*eg_w+
			      MCT2EGNOD1  ];
	      nodt2=(*m).edge[(*m).bound[bd*bd_w+MCT2BDEDGE]*eg_w+
			      MCT2EGNOD1+1];
	      nodm =(*m).edge[(*m).bound[bd*bd_w+MCT2BDEDGE]*eg_w+
			      MCT2EGNODM  ];
	      if ((nodt1==nodb)&&(nodt2!=noda))
		{
		  found=1;
		  noda=nodb;
		  nodb=nodt2;
		}
	      else if ((nodt2==nodb)&&(nodt1!=noda))
		{
		  found=-1;
		  noda=nodb;
		  nodb=nodt1;
		}
	      else
		{
		  bd++;
		}
	    }
	  else
	    {
	      bd++;
	    }
	}
      if (found==0)
	{
	  fprintf( stderr, "stokes_ass_streamfunc_poison_t2: "
		   "boundary not closed?\n"); 
	  return FAIL;
	}
      /* so we found a boundary element which extents the path form
	 noda onwards,
	 integrate over it to get the new boundary values for nodm and
	 nodb

	 if the nodm is really the midnode, we get (see notes)
      */
      nab[1]= - found*(*m).bound[bd*bd_w+MCT2BDORIE]
	*((*m).vertex[nodb*vx_w+MCT2VXSTRT  ]
	  -(*m).vertex[noda*vx_w+MCT2VXSTRT  ]);
      nab[0]= + found*(*m).bound[bd*bd_w+MCT2BDORIE]
	*((*m).vertex[nodb*vx_w+MCT2VXSTRT+1]
	  -(*m).vertex[noda*vx_w+MCT2VXSTRT+1]);
      (*psi0).V[nodm]=(*psi0).V[noda]
	+nab[0]*( 5.0/24.0 * (*usol).V[noda      ]
		 -1.0/24.0 * (*usol).V[nodb      ]
		 +1.0/3.0  * (*usol).V[nodm      ])
	+nab[1]*( 5.0/24.0 * (*usol).V[noda+vx_nr]
		 -1.0/24.0 * (*usol).V[nodb+vx_nr]
		 +1.0/3.0  * (*usol).V[nodm+vx_nr]);
      (*psi0).V[nodb]=(*psi0).V[noda]
	+nab[0]*( 1.0/6.0  * (*usol).V[noda      ]
		 +1.0/6.0  * (*usol).V[nodb      ]
		 +2.0/3.0  * (*usol).V[nodm      ])
	+nab[1]*( 1.0/6.0  * (*usol).V[noda+vx_nr]
		 +1.0/6.0  * (*usol).V[nodb+vx_nr]
		 +2.0/3.0  * (*usol).V[nodm+vx_nr]);
      (*P).V[nodm]=1;
      (*P).V[nodb]=1;
    }
  /* if we got to this point we found a closed boundary path */

  /* now correct the projector to a list of Dirichlet DOFs */
  j=0;
  for (i=0; i<vx_nr; i++)
    {
      if ((*P).V[i]!=0)
	{
	  /* this is a Dirichlet node, append it to the list */
	  (*P).V[j]=i;
	  j++;
	}
    }
  (*P).len=j;



  /* free local data */
  free_intdata (&iform);
  free(elK);
  free(Jac);
  free(Jacinv);
  free(Jinvgrad);

  return SUCCESS;
}
  
  
  
  
