/***********************************************************************

    This file is part of
    FEINS, Finite Element Incompressible Navier-Stokes solver,
    which is expanding to a more general FEM solver and toolbox,
    Copyright (C) 2003--2013, Rene Schneider 
    <rene.schneider@mathematik.tu-chemnitz.de>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program. If not, see <http://www.gnu.org/licenses/>.

    Minor contributions to this program (for example bug-fixes and
    minor extensions) by third parties automatically transfer the
    copyright to the general author of FEINS, to maintain the
    possibility of commercial re-licensing. If you contribute but wish
    to keep the copyright of your contribution, make that clear in
    your contribution!

    Non-GPL licenses to this program are available upon request from
    the author.

************************************************************************/

/*
FILE lame_adj.c
HEADER lame_adj.h

TO_HEADER:


// useful text macros
#include "feins_macros.h"
// data structures 
#include "datastruc.h"
*/

#include <math.h>
#include "feins_macros.h"
#include "feins_lapack.h"
#include "lame_adj.h"
#include "sparse.h"
#include "mesh.h"
#include "cubature.h"
#include "elements.h"

/*FUNCTION*/
int lame_dIdu_t2( struct mesh *m,
                  struct vector *u,
		  struct vector *dIdu,
		  double *Is,
		  double lambda,
		  double mu
/* evaluates the performance criteria (given with the mesh) for the
   supplied solution vector sol, stores the values in pcvec

   Input:  m       - the mesh
           u       - vector containing the solution of a lame problem, 
	             calculated using the T2/T1 element pair
	   lambda,
           mu      - lame parameters

   Output: dIdu    - vector containing the values of the derivates 
                     of Is with respect to the solution u, ignored if ==NULL
           Is      - the value of the performance criteria (by
                     reference), ignored if ==NULL


   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid
*/
			 ){

  int err;
  /* ONLY FOR 2D ! */
  FIDX dim=2, bas_n, vx_nr, el_w, vx_w, eg_w, fc_w, bd_w, fu_w, i, j, el, k, r, s;
  FIDX subtypes[1];
  FIDX crit, type;

  /* for FORTRAN calls: */
  char fNoTrans='N';
  double dhelp, done=1.0, dzero=0.0;
  int fdim, fbas_n;

  double *Jac, *Jacinv, *Jinvgrad;   /* Jacobian of the element mapping */
  double ux[2];                    /* function value in the given point*/
  double detJac;                     /* determinant of the Jacobian */
  double AdetJac;                    /* absolut of the determinant of the Jacobian */
  double *gradu;
  struct int_data iform;             /* integration formula    */
  double *phi, *gradp;
  struct int_data iform1;            /* integration formula 1d   */
  double *phi1d1, *gradp1d1;
  FIDX  bas_n1d1, bd;
  FIDX edi, *node, fu;
  int assem_el, assem_bd;
  
  /* init */
  /* get integration formulas */
  subtypes[0]=2;
  
  err=cubature_bases( dim-1, 3, inter, 1, subtypes, &iform1); 
  FUNCTION_FAILURE_HANDLE( err, cubature_bases, lame_dIdu_t2);

  /* make phi and gradphi better accessible */
  phi1d1   = (iform1.bases[0]->phi);
  gradp1d1 = (iform1.bases[0]->gradphi);
  bas_n1d1 = (iform1.bases[0]->num_basis);
  
  err=cubature_bases( dim, 4, tria, 1, subtypes, &iform); 
  FUNCTION_FAILURE_HANDLE( err, cubature_bases, lame_dIdu_t2);

  /* make phi and gradphi better accessible */
  phi   = (iform.bases[0]->phi);
  gradp = (iform.bases[0]->gradphi);
  bas_n = (iform.bases[0]->num_basis);
  
  fdim  = (int) dim;
  fbas_n= (int) bas_n;

    
  vx_nr = (*m).vx_nr;
  vx_w  = (*m).vx_w;
  el_w  = (*m).el_w;
  eg_w  = (*m).eg_w;
  fc_w  = (*m).fc_w;
  bd_w  = (*m).bd_w;
  fu_w  = (*m).fu_w;

  /* allocate memory for Jac */
  TRY_MALLOC( node, bas_n, FIDX, lame_dIdu_t2);
  TRY_MALLOC( Jac, dim*dim, double, lame_dIdu_t2);
  TRY_MALLOC( gradu, dim*dim, double, lame_dIdu_t2);
  TRY_MALLOC( Jacinv, dim*dim, double, lame_dIdu_t2); 
  TRY_MALLOC( Jinvgrad, dim*bas_n, double, lame_dIdu_t2); 

  /* set Is and  dIdu zero */
  if (Is != NULL)
    {
      *Is = 0.0;
    }
  if (dIdu != NULL)
    {
      for (i=0; i<vx_nr*dim; i++) (*dIdu).V[i] = 0.0;
    }

  /* Check which kind (elements and boundary) are needed
     to assemble the performance function */
  assem_el=0;
  assem_bd=0;
  for (crit=0; crit<(*m).pc_nr; crit++)
    {
      type=(FIDX)(*m).pccrit[crit*(*m).pc_w+MCXXPCTYPE];
      switch(type)
	{
	  case 11: /* area */
	    assem_el=1;
	    break;
	  case 12: /* potential energy */
	    assem_el=1;
	    assem_bd=1;
	    break;
	  case 13: /* displacement in given point */
	    break;
	  default: /* unknown type for Lame */
	    fprintf(stderr,"lame_dIdu_t2: "
	      "unknown pccrit type=%"dFIDX"\n",type);
	    return FAIL;
	}
    }
  /* Assembling over boundary */
  if (assem_bd==1)
    {
      /* loop over boundary, for Neumann part of energy criterion */
      for (bd=0; bd<(*m).bd_nr; bd++)
        {
          edi   = (*m).bound[bd*bd_w + MCT2BDEDGE];
	  /* check if it is Neumann boundary */
          if (((*m).bound[bd*bd_w + MCT2BDTYPE]==2)&&
	      ((*m).edge[edi*eg_w+MCT2EGCHL1]==-1))
	    {
	      double g[2], x[2], t[2];
	      double ds;

	      /* check out which nodes */
	      node[0] = (*m).edge[edi*eg_w + MCT2EGNOD1];
	      node[1] = (*m).edge[edi*eg_w + MCT2EGNOD1 +1];
	      node[2] = (*m).edge[edi*eg_w + MCT2EGNODM];

	      /* get the function values for the boundary conditions */
	      fu=(*m).bound[bd*bd_w + MCT2BDFNCT];

	      /* loop over integration points */
	      for (k=0; k<iform1.num_points; k++)
	        {
	          /* the integration point and tangential */
	          for (r=0; r<dim; r++)
		    {
		      x[r]=0;
		      t[r]=0;
		      ux[r]=0.0;
		      for (i=0; i<bas_n1d1; i++)
		        {
		          x[r] += (*m).vertex[node[i]*vx_w+MCT2VXSTRT+r]
			    *phi1d1[k*bas_n1d1 +i];
			  t[r] += (*m).vertex[node[i]*vx_w+MCT2VXSTRT+r]
			    *gradp1d1[k*bas_n1d1+i];
			  ux[r] += (*u).V[node [i]+r*vx_nr]
			      *phi1d1[k*bas_n1d1+i];
		        }
		    }
	          err=mesh_func_eval(m, fu, x, 0.0, 2, g, NULL, NULL );
		  FUNCTION_FAILURE_HANDLE( err, mesh_func_eval, lame_dIdu_t2);
	          /* ds= norm(t) */
	          ds=sqrt( t[0]*t[0]+t[1]*t[1] );
		    
	          /* evaluate the performance functions */
		  for (crit=0; crit<(*m).pc_nr; crit++)
		    {
		      type=(FIDX)(*m).pccrit[crit*(*m).pc_w+MCXXPCTYPE];
		      switch(type)
			{
			case 12: /* build the integral */
			  dhelp=(*m).pccrit[crit*(*m).pc_w+MCXXPCDAT1];
			  if (Is != NULL)
			    {
			      for (r=0; r<dim; r++)
				{
				  *Is += g[r]*ux[r]*ds
				    *iform1.weights[k]*dhelp;
				}
			    }
			  if (dIdu != NULL)
			    {
			      for (i=0; i<bas_n1d1; i++)
				for (r=0; r<dim; r++)
				  {
				    (*dIdu).V[node[i]+r*vx_nr] +=
				      g[r]*phi1d1[k*bas_n1d1+i]*ds
				      *iform1.weights[k]*dhelp;
				  }
			    }
			  break;
			default:
			  break;
			}
		    } /* end loop over pccrits */
		  
		}/* end loop integration points */
	    } /* end this BD is Neumann (for energy criterion) */
        } /* end loop over boundary elements */
    } /* end assembling over boundary */

  /* Assembling over elements */
  if (assem_el==1)
    {
      for (el=0; el<(*m).el_nr; el++)
        {
	  /* set nodes for easier access */
	  for (i=0; i<bas_n; i++) 
	    {
	      node[i]=(*m).elem[el*el_w+MCT1ELNOD1+i];
	    }

	  for (k=0; k<iform.num_points; k++)
            {
	      /* compute  the Jacobian at this point */
	      /* Jac=0 */
	      for (i=0; i<dim*dim; i++) Jac[i]=0.0;
	      /* Jac = sum_{i=nodes} vertex(i)*gradphi_i^T */
	      for (i=0;i<bas_n; i++)
	        for (r=0; r<dim; r++)
	          for (s=0; s<dim; s++)
		    Jac[s*dim+r]+= 
		        (*m).vertex[node[i]*vx_w+MCT1VXSTRT+s] 
			*gradp[k*bas_n*dim +i*dim +r];
		    
	      /* get detJac */
	      detJac=Jac[0]*Jac[3]-Jac[2]*Jac[1];
	      AdetJac = fabs(detJac);
	      /* get Jacinv (here direct) */
	      Jacinv[0]=1.0/detJac*Jac[3];
	      Jacinv[1]=-1.0/detJac*Jac[1];
	      Jacinv[2]=-1.0/detJac*Jac[2];
	      Jacinv[3]=1.0/detJac*Jac[0];
	      AdetJac = fabs(detJac);

	      /* real world gradient, not constant for quadratic elements */
	      /* Jinvgrad= Jacinv * gradphi[k,:,:]  */
	      dgemm_( &fNoTrans, &fNoTrans, &fdim, &fbas_n, &fdim, 
		  &done, Jacinv, &fdim, &(gradp[k*bas_n*dim]), &fdim,
		  &dzero, Jinvgrad, &fdim );

	      /* gradu */
	      for (i=0; i<dim*dim; i++) gradu[i] = 0.0;
	      /* gradu = sum u_j * gradphi_j */
	      for (s=0; s<dim; s++) /* x or y component */
		for (r=0; r<dim; r++) /* derivate with respect to x or y */
		  for (i=0; i<bas_n; i++)
		      gradu[s*dim+r] += Jinvgrad[i*dim+r]
			*(*u).V[node[i]+s*vx_nr];	  
		  
	      /* evaluate performance functions */
	      for (crit=0; crit<(*m).pc_nr; crit++)
		{
		  type=(FIDX)(*m).pccrit[crit*(*m).pc_w+MCXXPCTYPE];
		  switch(type)
		    {
		    case 11: /* area */
		      dhelp=(*m).pccrit[crit*(*m).pc_w+MCXXPCDAT1];
		      if (Is != NULL)
			{
			  *Is += iform.weights[k]*AdetJac*dhelp;
			}
		      break;
		    case 12: /* potential energy */
		      dhelp=iform.weights[k]*AdetJac/2
			*(*m).pccrit[crit*(*m).pc_w+MCXXPCDAT1];
		      if (Is != NULL)
			{
			  *Is +=dhelp*(lambda+2*mu)
			    *(gradu[0*dim+0]*gradu[0*dim+0]
			      +gradu[1*dim+1]*gradu[1*dim+1]);		  
			  *Is +=dhelp*2*lambda
			    *gradu[0*dim+0]*gradu[1*dim+1];	
			  *Is +=dhelp*mu
			    *(gradu[0*dim+1]+gradu[1*dim+0])
			    *(gradu[0*dim+1]+gradu[1*dim+0]);
			}
		      if (dIdu != NULL)
			{
			  for (i=0; i<bas_n; i++)
			    for (r=0; r<dim; r++)
			      {
				(*dIdu).V[node[i]+r*vx_nr] += dhelp
				  *(lambda+2*mu)*2*gradu[r*dim+r]
				  *Jinvgrad[i*dim+r];
				(*dIdu).V[node[i]+r*vx_nr] += dhelp
				  *2*lambda*gradu[(1-r)*dim+(1-r)]
				  *Jinvgrad[i*dim+r];
				(*dIdu).V[node[i]+r*vx_nr] += dhelp
				  *mu*2*(gradu[0*dim+1]+gradu[1*dim+0])
				  *Jinvgrad[i*dim+(1-r)];
			      }
			}
		      break;
		    default:
		      break;
		    }
		} /* end loop over pccrits */
	  
            } /* end loop over integration points */                          
	} /* end loop over elements */
    } /* end assembling over elements */

  /* evaluate performance functions that do not need assembling
     over elements or edges*/
  for (crit=0; crit<(*m).pc_nr; crit++)
    {
      /* square of deformation in set of points */
      if ( (*m).pccrit[crit*(*m).pc_w+MCXXPCTYPE] == 13 )
	{
	  double u_rj;
	  r=(int) (*m).pccrit[crit*(*m).pc_w+MCXXPCDAT1];
	  dhelp=(*m).pccrit[crit*(*m).pc_w+MCXXPCDAT1+1];
	  for (j=0; j<dim; j++)
	    {
	      u_rj= (*u).V[j*vx_nr+r];       
	      if (Is != NULL)
		{
		  *Is += u_rj*u_rj*dhelp;
		}
	      if (dIdu != NULL)
		{
		  (*dIdu).V[j*vx_nr+r]+=2*u_rj*dhelp;
		}
	    }
	}
    } 
    
  /* free local data */
  free_intdata (&iform1);
  free_intdata (&iform);
  free(Jac);
  free(gradu);
  free(Jacinv);
  free(Jinvgrad);
    
  return SUCCESS;
}







/*FUNCTION*/
int lame_DIDx_t2( struct mesh *m,
                   double lambda,
                   double mu,
		   struct vector *psi,
                   struct vector *u,
                   struct vector *DIDx
    
/* evaluates the derivatives of the performance criteria (given with
   the mesh) with respect to the nodal coordinates, using the adjoint
   solution psi

     DI   dI         dR
     -- = -- - psi^T --
     Dx   ds         ds

   Input:  m         - the mesh
           lambda    - Lame' constant 
           mu        - Lame' constant 
	   psi       - vector containing the solution of the adjoint equation
           u         - vector containing the solution of a lame problem

   Output: DIDx     - total gradient of I with respect to the node positions

   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid
*/
			 ){

  int err;
  /* ONLY FOR 2D ! */
  FIDX dim=2, bas_n, vx_nr, el_w, vx_w, eg_w, fc_w, bd_w, fu_w, i, j, el, k, r, s;
  FIDX subtypes[1];

  /* for FORTRAN calls: */
  char fNoTrans='N';
  double dhelp, done=1.0, dzero=0.0;
  int fdim, fbas_n;


  struct int_data iform;  /* integration formula    */
  struct int_data iform1; /* integration formula 1d   */
  FIDX  bas_n1d1, bd, crit, type;
  FIDX edi, node[6], fu;
  double *Jac, *Jacinv;   /* Jacobian of the element mapping and its inverse */
  double *Jinvgrad;       /* inverse Jacobian times gradphi */
  double ux[2];           /* function value at the integration point */
  double detJac;          /* determinant of the Jacobian */
  double absdetJac;       /* absolut of the determinant of the Jacobian */
  double sgndetJac;  	  /* signum of the determinant of the Jacobian */
  double *phi, *gradp;
  double *phi1d1, *gradp1d1;
  double *gradu;          /* gradient of solution u on the element el in 
                             the integration point x_k */
  double *eldRds;         /* eldRds on the element */
  struct vector psi_dRds; /* product Psi^T * dRds */
  struct vector dIds;     /* derivate of Is with respect to the node positions */
  double *graddetJac;     /* derivate of detJac wrt the node positions */
  double *dfds_loc;       /* local derivate of the right-hand-side f
			     with respect to the node positions*/
  
  /* init */
  /* get integration formula */
  subtypes[0]=2;

  err=cubature_bases( dim, 4, tria, 1, subtypes, &iform); 
  FUNCTION_FAILURE_HANDLE( err, cubature_bases, lame_dIdx_t2);
  /* make phi and gradphi better accessible */
  phi   = (iform.bases[0]->phi);
  gradp = (iform.bases[0]->gradphi);
  bas_n = (iform.bases[0]->num_basis);
  fdim  = (int) dim;
  fbas_n= (int) bas_n;

  err=cubature_bases( dim-1, 3, inter, 1, subtypes, &iform1); 
  FUNCTION_FAILURE_HANDLE( err, cubature_bases, lame_dIdx_t2);
  /* make phi and gradphi better accessible */
  phi1d1   = (iform1.bases[0]->phi);
  gradp1d1 = (iform1.bases[0]->gradphi);
  bas_n1d1 = (iform1.bases[0]->num_basis);

  vx_nr = (*m).vx_nr;
  vx_w  = (*m).vx_w;
  el_w  = (*m).el_w;
  eg_w  = (*m).eg_w;
  fc_w  = (*m).fc_w;
  bd_w  = (*m).bd_w;
  fu_w  = (*m).fu_w;  
  
  
  /* allocate memory for Jac, Jacinv, gradu, eldRds und the psi's */
  TRY_MALLOC( Jac, dim*dim, double, lame_dIdx_t2);
  TRY_MALLOC( Jacinv, dim*dim, double, lame_dIdx_t2);
  TRY_MALLOC( gradu, dim*dim, double, lame_dIdx_t2);
  TRY_MALLOC( eldRds, dim*bas_n*dim*bas_n, double, lame_dIdx_t2);
  TRY_MALLOC( Jinvgrad, dim*bas_n, double, lame_dIdx_t2);
  TRY_MALLOC( graddetJac, dim*bas_n, double, lame_dIdx_t2);
  TRY_MALLOC( dfds_loc, bas_n1d1*dim*bas_n1d1*dim, double, lame_dIdX_t2);

  err=vector_alloc( &psi_dRds, dim*vx_nr );
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, lame_dIdx_t2);
  err=vector_alloc( &dIds, dim*vx_nr );
  FUNCTION_FAILURE_HANDLE( err, vector_alloc, lame_dIdx_t2);


  /* set dIds, dIdx, psi_dRds = 0 */
  for (i=0;i<vx_nr*dim;i++)
    {
      dIds.V[i] = 0.0;
      (*DIDx).V[i]= 0.0;
      psi_dRds.V[i] = 0.0;
    }  

  /* loop over boundary */
  for (bd=0; bd<(*m).bd_nr; bd++)
    {
      edi   = (*m).bound[bd*bd_w + MCT2BDEDGE];
      /* take Neumann BC into account */
      if (((*m).bound[bd*bd_w + MCT2BDTYPE]==2)&&
	  ((*m).edge[edi*eg_w+MCT2EGCHL1]==-1))
	{
	  double g[2], x[2], t[2];
	  double ds;

	  /* check out which nodes */
	  node[0] = (*m).edge[edi*eg_w + MCT2EGNOD1];
	  node[1] = (*m).edge[edi*eg_w + MCT2EGNOD1 +1];
	  node[2] = (*m).edge[edi*eg_w + MCT2EGNODM];
    
	  /* get the set boundary values */
	  fu=(*m).bound[bd*bd_w + MCT2BDFNCT];

	  /* set rhs_loc to zero */
	  for (i=0; i<bas_n1d1*dim*bas_n1d1*dim; i++)
	    dfds_loc[i]=0.0;

	  /* loop over integration points */
	  for (k=0; k<iform1.num_points; k++)
	    {
	      /* the integration point and tangential */
	      for (r=0; r<dim; r++)
		{
		  x[r]=0;
		  t[r]=0;
		  for (i=0; i<bas_n1d1; i++)
		    {
		      x[r] += (*m).vertex[node[i]*vx_w+MCT2VXSTRT+r]
			*phi1d1[k*bas_n1d1 +i];
		      t[r] += (*m).vertex[node[i]*vx_w+MCT2VXSTRT+r]
			*(gradp1d1[k*bas_n1d1+i]);
		    }
		}

	      err=mesh_func_eval(m, fu, x, 0.0, 2, g, NULL, NULL );
	      FUNCTION_FAILURE_HANDLE( err, mesh_func_eval, lame_dIdx_t2);
	  
	      /* ds= norm(t) */
	      ds=sqrt( t[0]*t[0]+t[1]*t[1] );

	      /* build the integral df/ds*/
              for (i=0; i<bas_n1d1; i++)
                for (r=0; r<dim; r++)
                  for (j=0; j<bas_n1d1; j++)
                    for (s=0; s<dim; s++)
		      {
			dfds_loc[ (s*bas_n1d1+j)*dim*bas_n1d1 + r*bas_n1d1+i] += 
			  g[r]*phi1d1[k*bas_n1d1+i] * iform1.weights[k] 
			  *t[s]*gradp1d1[k*bas_n1d1+j]/ds;
		      }
	      /* derivatives for the performance functions */
	      for (crit=0; crit<(*m).pc_nr; crit++)
		{
		  type=(FIDX)(*m).pccrit[crit*(*m).pc_w+MCXXPCTYPE];
		  switch(type)
		    {
		    case 11: 
		      break;
		    case 12:
		      dhelp=(*m).pccrit[crit*(*m).pc_w+MCXXPCDAT1]/ds;
		      for (r=0; r<dim; r++)
			{
			  ux[r]=0.0;
			  for (i=0; i<bas_n1d1; i++)
			    ux[r] += (*u).V[node [i]+r*vx_nr]
			      *phi1d1[k*bas_n1d1+i];		    
			}
		      /* build the integral */
		      for (i=0; i<bas_n1d1; i++)
			for (r=0; r<dim; r++)
			  for (s=0; s<dim; s++)
			    dIds.V[node[i]+r*vx_nr] +=
			      g[s]*ux[s]*iform1.weights[k]*t[r]
			      *gradp1d1[k*bas_n1d1+i]*dhelp;
		      break;
		    default:
		      break;
		    }
	        }/* end loop over pccrit */
	    } /* end loop over integration points */

	  /* the local dfds is now complete, add it to the global psi_dRds */
          for (i=0; i<bas_n1d1; i++)
            for (r=0; r<dim; r++)
              for (j=0; j<bas_n1d1; j++)
                for (s=0; s<dim; s++)
		  {
		    psi_dRds.V[s*vx_nr+node[j]] -= (*psi).V[r*vx_nr+node[i]]*
                      dfds_loc[(s*bas_n1d1+j)*dim*bas_n1d1 + r*bas_n1d1+i];
		  }
	} /* end this BD is Neumann */
      
    } /* end loop over boundary elements */

  /* loop over all elements */
  for (el=0; el<(*m).el_nr; el++)
    {
      /* set nodes for easier access */
      for (i=0; i<bas_n; i++) node[i]=
		  (*m).elem[el*el_w+MCT1ELNOD1+i];
      /* set eldRds to zero */
      for (i=0; i < dim*bas_n*dim*bas_n; i++) eldRds[i]=0.0;
      
      /* loop over all integration points */
      for (k=0; k<iform.num_points; k++)
	{
	  /* compute the Jacobian at this point */
	  for (i=0; i<dim*dim; i++) Jac[i]=0.0;
	  /* Jac = sum_{i=nodes} vertex(i)*gradphi_i^T */
	  for (i=0; i<bas_n; i++)
	    for (j=0; j<dim; j++)
	      for (r=0; r<dim; r++)
		Jac[j*dim+r]+= 
		    (*m).vertex[node[i]*vx_w+MCT2VXSTRT+j]
		    *gradp[k*bas_n*dim +i*dim +r];
		    
	  /* get detJac */
	  detJac=Jac[0]*Jac[3]-Jac[2]*Jac[1];
	  absdetJac = fabs(detJac);

	  /* get sgndetJac, case detJac=0 is not possible */
	  if (detJac>0) {sgndetJac = 1;}
	    else {sgndetJac = -1;}
	  
	  /* get Jacinv (here direct) */
	  Jacinv[0]=1.0/detJac*Jac[3];
	  Jacinv[1]=-1.0/detJac*Jac[1];
	  Jacinv[2]=-1.0/detJac*Jac[2];
	  Jacinv[3]=1.0/detJac*Jac[0];
     
	  /* real world dIdxent, not constant for quadratic elements */
	  /* Jinvgrad= Jacinv * gradphi[k,:,:]  */
	  dgemm_( &fNoTrans, &fNoTrans, &fdim, &fbas_n, &fdim, 
		  &done, Jacinv, &fdim, &(gradp[k*bas_n*dim]), &fdim,
		  &dzero, Jinvgrad, &fdim );	  
	    
	  /* compute the function value of u on the element 
	     in the integration point x_k */
	  for (r=0; r<dim; r++)
	    {
	      ux[r] = 0.0;
	      for (i=0; i<bas_n; i++)
		  ux[r] += (*u).V[node[i]+r*vx_nr]
                       *phi[k*bas_n+i];     
	    }
	    
	  /* compute the gradient of u on the element 
	     in the integration point x_k */
	  for (i=0; i<dim*dim; i++) 
	    {
	      gradu[i] = 0.0;
	    }
	  /* gradu = sum u_j * gradphi_j */
	  for (r=0; r<dim; r++) /* x or y component */
	    for (s=0; s<dim; s++) /* derivate wrt x or y */
	      for (i=0; i<bas_n; i++)
		{
		  gradu[r*dim+s] += 
		    (*u).V[node[i]+r*vx_nr]*Jinvgrad[i*dim+s];
		}

	  /* compute d(detJac)/ds */        
	  for (j=0; j<bas_n; j++)
	    {
	      /* First, the x-coorindate */
	      graddetJac[0*bas_n+j]=sgndetJac*
		(gradp[k*bas_n*dim+j*dim+0]*Jac[3]
		 -gradp[k*bas_n*dim+j*dim+1]*Jac[2]);

	      /* Now, the y-coordinate */
	      graddetJac[1*bas_n+j]=sgndetJac*
		(gradp[k*bas_n*dim+j*dim+1]*Jac[0]
		 -gradp[k*bas_n*dim+j*dim+0]*Jac[1]);
	    }
	  /* derivatives for the performance functions */
	  for (crit=0; crit<(*m).pc_nr; crit++)
	    {
	      type=(FIDX)(*m).pccrit[crit*(*m).pc_w+MCXXPCTYPE];
	      switch(type)
		{
		case 11: 
		  dhelp= iform.weights[k]
		    *(*m).pccrit[crit*(*m).pc_w+MCXXPCDAT1];
		    for (i=0; i<bas_n; i++)
		      for (r=0; r<dim; r++)
		        dIds.V[node[i]+r*vx_nr] +=
		          dhelp*graddetJac[r*bas_n+i];  
		    break;
		case 12:
		  /* Derivate with the product rule  
		     First: the part where detJac is constant 
		            and the rest is derivated */    
		  dhelp=iform.weights[k]*absdetJac/2
		    *(*m).pccrit[crit*(*m).pc_w+MCXXPCDAT1];
		  
		  for (i=0; i<bas_n; i++) /* derivate wrt the point*/
		    for (r=0; r<dim; r++) /* derivate wrt the component*/
		      {	
			for (s=0; s<dim; s++) 
			  dIds.V[node[i]+r*vx_nr] -= dhelp*(lambda+2*mu)
			    *2*gradu[s*dim+s]*Jinvgrad[i*dim+s]*gradu[s*dim+r]; 
			
			dIds.V[node[i]+r*vx_nr] -= dhelp*2*lambda
			  *gradu[0*dim+0]*(Jinvgrad[i*dim+1]*gradu[1*dim+r]);
			dIds.V[node[i]+r*vx_nr] -= dhelp*2*lambda
			  *gradu[1*dim+1]*(Jinvgrad[i*dim+0]*gradu[0*dim+r]);		  
			
			dIds.V[node[i]+r*vx_nr] -= dhelp*mu
			  *2*(gradu[0*dim+1]+gradu[1*dim+0])
			  *(Jinvgrad[i*dim+1]*gradu[0*dim+r]
			    +Jinvgrad[i*dim+0]*gradu[1*dim+r]); 
		      }
		  /* Second: the part where detJac is derivated 
		     and the rest is constant */    
		    for (i=0; i<bas_n; i++) /* derivate wrt the point*/
		      for (r=0; r<dim; r++) /* derivate wrt the component*/
			{	
			  dhelp=iform.weights[k]*graddetJac[r*bas_n+i]/2
			    *(*m).pccrit[crit*(*m).pc_w+MCXXPCDAT1];;
			  dIds.V[node[i]+r*vx_nr] +=dhelp*(lambda+2*mu)
			    *(gradu[0*dim+0]*gradu[0*dim+0]
			      +gradu[1*dim+1]*gradu[1*dim+1]);		  
			  dIds.V[node[i]+r*vx_nr] +=dhelp*2*lambda
			    *(gradu[0*dim+0]*gradu[1*dim+1]);	
			  dIds.V[node[i]+r*vx_nr] +=dhelp*mu
			    *(gradu[0*dim+1]+gradu[1*dim+0])
			    *(gradu[0*dim+1]+gradu[1*dim+0]);
			}
		    break;
		case 13:
		  break;
		}		
	    }
	    
	  /* get eldRds, then multlply psi_local^T * eldRds */
	  /* Derivate with the product rule  
	     First: the part where detJac is constant 
	            and the rest is derivated */    
	  dhelp=iform.weights[k]*absdetJac*lambda;
	  for (i=0; i<bas_n; i++) /* i-th intergration point */
	    for (r=0; r<dim; r++) /* component of the point */
	      for (j=0; j<bas_n; j++) /* derivate wrt the point*/
	        for (s=0; s<dim; s++) /* derivate wrt the component*/
	          {
		    eldRds[ (s*bas_n+j) * dim*bas_n + (r*bas_n+i)] -= 
			  dhelp*( (Jinvgrad[j*dim+0]*gradu[0*dim+s]+
			    Jinvgrad[j*dim+1]*gradu[1*dim+s])*Jinvgrad[i*dim+r]  
			  +(gradu[0*dim+0]+gradu[1*dim+1])
			    *Jinvgrad[j*dim+r]*Jinvgrad[i*dim+s]); 
		  }
		     
	  dhelp=iform.weights[k]*absdetJac*mu;
	  for (i=0; i<bas_n; i++) /* i-th intergration point */
	    for (r=0; r<dim; r++) /* component of the point */
	      for (j=0; j<bas_n; j++) /* derivate wrt the point*/
	        for (s=0; s<dim; s++) /* derivate wrt the component*/
		  {
	    	    eldRds[ (s*bas_n+j) * dim*bas_n + (r*bas_n+i)] -= dhelp*
		      (2*Jinvgrad[j*dim+r]*gradu[r*dim+s]*Jinvgrad[i*dim+r]
		       +2*gradu[r*dim+r]*Jinvgrad[j*dim+r]*Jinvgrad[i*dim+s]
		       +(Jinvgrad[j*dim+1]*gradu[0*dim+s]
			 +Jinvgrad[j*dim+0]*gradu[dim+s])*Jinvgrad[i*dim+(1-r)]
		       +(gradu[0*dim+1]+gradu[dim+0])*Jinvgrad[j*dim+(1-r)]
		       *Jinvgrad[i*dim+s]);
		  }
		     
          /* Second: the part where detJac is derivated 
	     and the rest is constant */    
	  dhelp=iform.weights[k]*lambda;
	  for (i=0; i<bas_n; i++) /* i-th intergration point */
	    for (r=0; r<dim; r++) /* component of the point */
	      for (j=0; j<bas_n; j++) /* derivate wrt the point*/
	        for (s=0; s<dim; s++) /* derivate wrt the component*/
	          {
	            eldRds[ (s*bas_n+j) * dim*bas_n + (r*bas_n+i)] += dhelp*
			(gradu[0*dim+0]+gradu[1*dim+1])*(Jinvgrad[i*dim+r])
                       *graddetJac[s*bas_n+j];
	          }
	     
	  dhelp=iform.weights[k]*mu;
	  for (i=0; i<bas_n; i++) /* i-th intergration point */
	    for (r=0; r<dim; r++) /* component of the point */
	      for (j=0; j<bas_n; j++) /* derivate wrt the point*/
	        for (s=0; s<dim; s++) /* derivate wrt the component*/
		  {
		    eldRds[ (s*bas_n+j) * dim*bas_n + (r*bas_n+i)] += dhelp*
		      (2*gradu[r*dim+r]*Jinvgrad[i*dim+r]+
		       (gradu[0*dim+1]+gradu[1*dim+0])
		       *Jinvgrad[i*dim+(1-r)])*graddetJac[s*bas_n+j];
		  }
   
 	} /* end loop over all integration points */
	
      /* eldRds is ready, multiply psi^T * eldRds and add to psi_dRds */
      for (s=0; s<dim; s++)
        for (j=0; j<bas_n; j++)
          for (r=0; r<dim; r++)
            for (i=0; i<bas_n; i++)
              {
                psi_dRds.V[node[j]+s*vx_nr] += 
                         (*psi).V[node[i]+r*vx_nr]
                          * eldRds[(s*bas_n+j)*dim*bas_n+(r*bas_n+i)];
              }
      
    } /* end loop over all elements */

  /* Finally, compute psi = dIds - psi_dRds */
  for (i=0; i<vx_nr*dim; i++)
    
    {
      (*DIDx).V[i] = dIds.V[i] - psi_dRds.V[i];
    }

  /* free local data */
  free_intdata (&iform);
  free_intdata (&iform1);
  free(dfds_loc);
  free(Jac);
  free(Jacinv);
  free(gradu);
  free(eldRds);
  free(Jinvgrad);
  free(graddetJac);
  vector_free(&psi_dRds);
  vector_free(&dIds);

  return SUCCESS;
}









/******************************************************************
 ***                                                            ***
 *** tests of formulas from paper                               ***
 *** G. Allaire, F. Jouve, A.-M. Toader                         ***
 *** Structural optimization using sensitivity analysis and     ***
 *** a level-set method                                         ***
 *** Journal of Computational Physics, 194 (2004), pp. 363-393, ***
 *** Elsevier, 2004                                             ***
 ***                                                            ***
 ******************************************************************/


/*FUNCTION*/
int lame_Allaire_et_al_2004__formula_8_t2( struct mesh *m,
                   double lambda,
                   double mu,
                   struct vector *u,
                   struct vector *DIDx
    
/* evaluates the derivatives of the compliance (energy) according to
   formula (8) of [Allaire et.al. 2004]
   simplification: assume rhs function f=0  (no volume forces)

   Input:  m         - the mesh
           lambda    - Lame' constant 
           mu        - Lame' constant 
           u         - vector containing the solution of a lame problem

   Output: DIDx      - total gradient of I with respect to the node
                       positions (non-zero only on boundary)

   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid
*/
			 ){

  int err;
  /* ONLY FOR 2D ! */
  FIDX dim=2, vx_nr, el_w, vx_w, eg_w, fc_w, bd_w, bd_nr, fu_w,
    i, j, el, k, r, s; 
  FIDX subtypes[1];

  /* for FORTRAN calls: */
  char fNoTrans='N';
  double done=1.0, dzero=0.0;
  int fdim, fbas_n1d2;


  struct int_data iform1; /* integration formula 1d   */
  
  FIDX  bas_n1d2, bd;
  FIDX edi, node[6], fu;
  double *Jac, *Jacinv;   /* Jacobian of the element mapping and its inverse */
  double *Jinvgrad;       /* inverse Jacobian times gradphi */
  double ux[2];           /* function value at the integration point */
  double tvec[2];         /* tangent vector at integration point */
  double nvec[2];         /* normal vector at integration point */
  double xvec[2];         /* the integration point (world coordinates) */
  double *elDIDx;         /* element contribution to DIDx */
  double detJac;          /* determinant of the Jacobian */
  double absdetJac;       /* absolut of the determinant of the Jacobian */
  double sgndetJac;  	  /* signum of the determinant of the Jacobian  */
  double neumann_sign;    /* to unify the calculation of the energy
			     part of the integrals between the Neumann
			     and Dirichlet boundaries, either -1 or +1 */
  double ds;              /* ds is the norm of the tangent vector */

  double *phi1d2, *gradp1d2;
  double *gradu;          /* gradient of solution u on the element el in 
                             the integration point x_k */
  double *epsu;           /* strain tensor of solution u on the element el in 
                             the integration point x_k */

  FIDX   *bound_elem;     /* bound_elem[i] stores the id of the
			     element to which the i-th boundary entry
			     belongs */

  /* for tests of curvature code: {
     double curv;
     double x1[2]={ M_PI+2*cos(0.3),    -M_PI/3+2*sin(0.3)};
     double x2[2]={ M_PI+2*cos(0.3000001), -M_PI/3+2*sin(0.3000001)};
     double x3[2]={ M_PI+2*cos(0.3000002), -M_PI/3+2*sin(0.3000002)};
     
     err=curvature_2d_3points( x1,x2,x3, &curv);
     FUNCTION_FAILURE_HANDLE( err, curvature_2d_3points, 
     ____test____);

     return FAIL;
     } */


  /* init */
  /* get integration formula */
  subtypes[0]=2;

  err=cubature_bases( 1, 3, tria, 1, subtypes, &iform1); 
  FUNCTION_FAILURE_HANDLE( err, cubature_bases, lame_Allair_et_al_2004__formula_8_t2);
  /* make phi and gradphi better accessible */
  phi1d2   = (iform1.bases[0]->phi);
  gradp1d2 = (iform1.bases[0]->gradphi);
  bas_n1d2 = (iform1.bases[0]->num_basis);
  fdim     = (int) dim;
  fbas_n1d2= (int) bas_n1d2;

  vx_nr = (*m).vx_nr;
  vx_w  = (*m).vx_w;
  el_w  = (*m).el_w;
  eg_w  = (*m).eg_w;
  fc_w  = (*m).fc_w;
  bd_w  = (*m).bd_w;
  bd_nr = (*m).bd_nr;
  fu_w  = (*m).fu_w;  

  /*************************************************************
     build the bound_elem list
  **************************************************************/

  TRY_MALLOC( bound_elem, bd_nr, FIDX, 
	      lame_Allair_et_al_2004__formula_8_t2);

  /* init to "not found" (= -1) */
  for (i=0; i<bd_nr; i++) 
    {
      bound_elem[i] = -1;
    }

  /* loop over all elements, find those with boundary edges */
  for (el=0; el<(*m).fc_nr; el++)
    {
      FIDX eg,bd;
      for (i=0; i<3; i++)
	{
	  /* the i-th edge */
	  eg=(*m).face[el*fc_w+MCT2FCEDG1+i];
	  /* belongs to boundary? */
	  bd=(*m).edge[eg*eg_w+MCT2EGBND];
	  if (bd>=0)
	    {
#ifdef DEBUGFEINS
	      /* check sanity */
	      if (bd>=bd_nr) 
		{
		  fprintf(stderr,"lame_Allair_et_al_2004__formula_8_t2: \n"
			  "bd out of range ???\n");
		  return FAIL;
		}
	      /* check sanity */
	      if (bound_elem[bd]!=-1) 
		{
		  fprintf(stderr,"lame_Allair_et_al_2004__formula_8_t2: \n"
			  "boundary part of more than one element ???\n");
		  return FAIL;
		}
#endif
	      /* store this element for the boundary bd */
	      bound_elem[bd] = el;
	    }
	}
    }
  /**************************************************************
   * end build bound_elem list                                  *
   **************************************************************/
  
  /* allocate memory for Jac, Jacinv, gradu, epsu, eldRds und the psi's */
  TRY_MALLOC( Jac, dim*dim, double, lame_Allair_et_al_2004__formula_8_t2);
  TRY_MALLOC( Jacinv, dim*dim, double, lame_Allair_et_al_2004__formula_8_t2);
  TRY_MALLOC( gradu, dim*dim, double, lame_Allair_et_al_2004__formula_8_t2);
  TRY_MALLOC( epsu, dim*dim, double, lame_Allair_et_al_2004__formula_8_t2);
  TRY_MALLOC( Jinvgrad, dim*bas_n1d2, double, lame_Allair_et_al_2004__formula_8_t2);
  TRY_MALLOC( elDIDx, dim*bas_n1d2, double, lame_Allair_et_al_2004__formula_8_t2);

  /* set DIDx = 0 */
  for (i=0;i<vx_nr*dim;i++)
    {
      (*DIDx).V[i]= 0.0;
    }  

  /* loop over boundary */
  for (bd=0; bd<(*m).bd_nr; bd++)
    {
      edi   = (*m).bound[bd*bd_w + MCT2BDEDGE];
      /* only procede if edge belongs to current level */
      if ((*m).edge[edi*eg_w+MCT2EGCHL1]==-1)
	{
	  FIDX nodm;
	  el=bound_elem[bd];
	  if (el<0)
	    {
	      fprintf(stderr,"lame_Allair_et_al_2004__formula_8_t2: \n"
		      "boundary part without element ???\n");
	      return FAIL;
	    }	    

	  /* set nodes for easier access */
	  for (i=0; i<bas_n1d2; i++) 
	    {
	      node[i]= (*m).elem[el*el_w+MCT1ELNOD1+i];
	    }

	  /* make sure edi is the edge [0-3-1] (or reversed) of local
	     nodes */
	  nodm=(*m).edge[edi*eg_w+MCT2EGNODM];

	  /* fprintf(stderr,"orig elem=(%d %d %d %d %d %d)\n"
	     "nodm= %d\n", 
	     node[0],node[1],node[2],node[3],node[4],node[5],
	     nodm); /* */

	  if (nodm!=node[3])
	    {
	      FIDX rotate=0;
	      FIDX node_old[6];

	      /* find nodm, define how to rotate the element to make
		 this the node[3] */
	      if (nodm==node[4])
		{
		  rotate=2;
		}
	      else if (nodm==node[5])
		{
		  rotate=1;
		}
	      else
		{
		  fprintf(stderr,"lame_Allair_et_al_2004__formula_8_t2: \n"
			  "correct rotation of element not possible ???\n");
		  fprintf(stderr,"elem=(%"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX")\n"
			  "nodm= %"dFIDX"\n", 
			  node[0],node[1],node[2],node[3],node[4],node[5],
			  nodm);
		  return FAIL;
		}	

	      
	      /* copy node to node_old */
	      for (i=0; i<6; i++)
		{
		  node_old[i]=node[i];
		}
	      /* write rotated enumeration to node */
	      for (i=0; i<3; i++)
		{
		  int rot_i=(i+rotate)%3;
		  node[rot_i  ] = node_old[i  ];
		  node[rot_i+3] = node_old[i+3];
		}

	      if (rotate==2)
		{ fprintf(stderr,"rot  elem=(%"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX")\n"
			  "nodm= %"dFIDX"\n", 
			  node[0],node[1],node[2],node[3],node[4],node[5],
			  nodm); 
		}
	    } /* end rotate element to make edi edge [0-3-1] (or
		 reversed) of local nodes */

	  /* fprintf(stderr,"rot  elem=(%"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX")\n"
	     "nodm= %"dFIDX"\n", 
	     node[0],node[1],node[2],node[3],node[4],node[5],
	     nodm); /* */

	  
	  /* set elDIDx to zero */
	  for (i=0; i < dim*bas_n1d2; i++) elDIDx[i]=0.0;
      
	  /* loop over all integration points */
	  for (k=0; k<iform1.num_points; k++)
	    {
	      double deriv_core;

	      deriv_core = 0.0;

	      /* compute the Jacobian at this point */
	      for (i=0; i<dim*dim; i++) Jac[i]=0.0;
	      /* Jac = sum_{i=nodes} vertex(i)*gradphi_i^T */
	      for (i=0; i<bas_n1d2; i++)
		for (j=0; j<dim; j++)
		  for (r=0; r<dim; r++)
		    Jac[j*dim+r]+= 
		      (*m).vertex[node[i]*vx_w+MCT2VXSTRT+j]
		      *gradp1d2[k*bas_n1d2*dim +i*dim +r];
		    
	      /* get detJac */
	      detJac=Jac[0]*Jac[3]-Jac[2]*Jac[1];
	      absdetJac = fabs(detJac);

	      /* get sgndetJac, case detJac=0 is not possible */
	      if (detJac>0) {sgndetJac = 1;}
	      else {sgndetJac = -1;}

	      /* get Jacinv (here direct) */
	      Jacinv[0]=1.0/detJac*Jac[3];
	      Jacinv[1]=-1.0/detJac*Jac[1];
	      Jacinv[2]=-1.0/detJac*Jac[2];
	      Jacinv[3]=1.0/detJac*Jac[0];
     
	      /* real world gradient, not constant for quadratic elements */
	      /* Jinvgrad= Jacinv * gradphi[k,:,:]  */
	      dgemm_( &fNoTrans, &fNoTrans, &fdim, &fbas_n1d2, &fdim, 
		      &done, Jacinv, &fdim, &(gradp1d2[k*bas_n1d2*dim]), &fdim,
		      &dzero, Jinvgrad, &fdim );	  

	      /* compute the integration point, tangent vector and ds */
	      ds = 0.0;
	      for (r=0; r<dim; r++)
		{
		  xvec[r]=0;
		  tvec[r]=0;
		  for (i=0; i<bas_n1d2; i++)
		    {
		      xvec[r] += (*m).vertex[node[i]*vx_w+MCT2VXSTRT+r]
			*phi1d2[k*bas_n1d2 +i];
		      tvec[r] += (*m).vertex[node[i]*vx_w+MCT2VXSTRT+r]
			*(gradp1d2[k*bas_n1d2*dim+i*dim+0]);
		    }
		  ds += tvec[r]*tvec[r];
		}
	      ds = sqrt(ds);

	      /* compute the normal vector,
                 
                 rotate the normalised tangent by minus 90 degree,
                 adjust sign according to sign of the element Jacobian */
	      nvec[0] =  sgndetJac*tvec[1]/ds;
	      nvec[1] = -sgndetJac*tvec[0]/ds;
	      
	      
	    
	      /* compute the function value of u on the element 
		 in the integration point x_k */
	      for (r=0; r<dim; r++)
		{
		  ux[r] = 0.0;
		  for (i=0; i<bas_n1d2; i++)
		    ux[r] += (*u).V[node[i]+r*vx_nr]
		      *phi1d2[k*bas_n1d2+i];     
		}
	    
	      /* compute the gradient of u on the element 
		 in the integration point x_k */
	      for (i=0; i<dim*dim; i++) 
		{
		  gradu[i] = 0.0;
		}
	      /* gradu = sum u_j * gradphi_j */
	      for (r=0; r<dim; r++) /* x or y component */
		for (s=0; s<dim; s++) /* derivate wrt x or y */
		  for (i=0; i<bas_n1d2; i++)
		    {
		      gradu[r*dim+s] += 
			(*u).V[node[i]+r*vx_nr]*Jinvgrad[i*dim+s];
		    }
	      /* compute the strain tensor of u on the element 
		 in the integration point x_k */
	      for (r=0; r<dim; r++) 
		for (s=0; s<dim; s++) 
		  {
		    epsu[r*dim+s]=0.5*(gradu[r*dim+s]+gradu[s*dim+r]);
		  }

	      
	      /* special part for Neumann boundary */
	      if ((*m).bound[bd*bd_w + MCT2BDTYPE]==2)
		{
		  double g[2], dudn[2], curv;

		  /* get the boundary function data */
		  fu=(*m).bound[bd*bd_w + MCT2BDFNCT];


#ifdef FEINS_have_warning 
#warning "can't handle derivatives of the Neumann data yet, only constant Neumann data supported for shape derivative code"
#endif
		  err=mesh_func_eval(m, fu, xvec, 0.0, 2, g, NULL, NULL );
		  FUNCTION_FAILURE_HANDLE( err, mesh_func_eval, 
					   lame_Allair_et_al_2004__formula_8_t2);

		  err=curvature_2d_3points( &(*m).vertex[node[0]*vx_w+MCT2VXSTRT],
					    &(*m).vertex[node[1]*vx_w+MCT2VXSTRT],
					    &(*m).vertex[node[3]*vx_w+MCT2VXSTRT],
					    &curv);
		  FUNCTION_FAILURE_HANDLE( err, curvature_2d_3points, 
					   lame_Allair_et_al_2004__formula_8_t2);
	  
		  /* compute the normal derivative of u */
		  for (r=0; r<dim; r++) /* x or y component */
		    {
		      dudn[r] = 0.0; 
		      for (s=0; s<dim; s++) /* derivate wrt x or y */
			{
			  dudn[r] +=  gradu[r*dim+s] * nvec[s]; // orig
			}
		    }
		  
		  /* compute the normal derivative part of the shape
		     derivative (formula (8)) */
		  for (r=0; r<dim; r++) /* x or y component */
		    {
		      deriv_core += 2*( g[r]*dudn[r]
					+ curv*g[r]*ux[r]); /* */
		    }

		  neumann_sign=-1;//-1;
		} /* end special part for Neumann boundary */
	      else
		{
		  neumann_sign=+1;
		}

	      /* energy part of the shape derivative */
	      /* Ae(u)*e(u) in the paper means the integrand of the
		 energy a(u,u), i.e. 
		 lambda*div(u)*div(u) + 2*mu*eps:eps */
	      {
		double divu, eps_colon_eps;

		divu          = 0.0;
		eps_colon_eps = 0.0;

		for (r=0; r<dim; r++) /* x or y component */
		  {
		    divu+= gradu[r*dim+r];
		  }
		
		for (r=0; r<dim; r++) /* x or y component */
		  for (s=0; s<dim; s++) /* derivate wrt x or y */
		    {
		      /* eps_colon_eps +=
			 gradu[r*dim+s]*gradu[r*dim+s]; /* wrong !*/
		      eps_colon_eps +=  epsu[r*dim+s]*epsu[r*dim+s];

		    }

		deriv_core += neumann_sign*(lambda*divu*divu
					    +2*mu*eps_colon_eps); /* orig */

	      } /* end energy part of integrand */


	      /* add contributions of this integration point to local
		 integral */
	      for (i=0;i<bas_n1d2; i++)
		for (r=0; r<dim; r++)
		  {
		    /* formula (8) form the paper:
		       the deformation ansatz function Theta is taken
		       to be the shape function of the element itself */

		    elDIDx[i*dim+r] += 
		      deriv_core*phi1d2[k*bas_n1d2 +i]*nvec[r]
		      *iform1.weights[k]*ds;  /* */
		  }

	    } /* end loop over integration points */

	  /* add local contribution to DIDx */
	  for (i=0;i<bas_n1d2; i++)
	    for (r=0; r<dim; r++)
	      {
		(*DIDx).V[r*vx_nr+node[i]]+=elDIDx[i*dim+r]; 
	      }
      
	} /* end of edge belongs to current level */
    } /* end loop over boundary elements */


  /* free local data */
  free_intdata (&iform1);
  free(elDIDx);
  free(Jac);
  free(Jacinv);
  free(gradu);
  free(bound_elem);
  free(Jinvgrad);

  return SUCCESS;
}


/*FUNCTION*/
int curvature_2d_3points( double *x1, double *x2, double *x3,
			  double *curv
/* evaluates the curvature curv of a line segment through
   three points by determining a circle through these points and
   using 1/radius 

   the circle is determined as in 
     http://www.arndt-bruenner.de/mathe/scripts/kreis3p.htm
   by considering a non-linear equation system in the variables
     xm (center x),ym (center y), radius 
   this is simplified to a linear system using the auxiliary variables
     [A,B,C]
   such that
     xm = B/2, ym = C/2, und r^2 = xm^2 + ym^2 - A


   Input:  x1,x2,x3  - two dimensional vectors, the three points which
                       are given

   Output: curv      - curvature of the circle through the three
                       points, ore zero if the three points are on a
                       line 

   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid
*/
			  ){

  double radius;

  /* for FORTRAN calls: */
  int ione=1, nr=3, LDA=3;
  int info;

  double A[3*3],b[3];
  int ipiv[3];

  /* build the system matrix */
  A[0*LDA+0]=1.0;
  A[0*LDA+1]=1.0;
  A[0*LDA+2]=1.0;

  A[1*LDA+0]=-x1[0];
  A[1*LDA+1]=-x2[0];
  A[1*LDA+2]=-x3[0];

  A[2*LDA+0]=-x1[1];
  A[2*LDA+1]=-x2[1];
  A[2*LDA+2]=-x3[1];

  /* build the rhs b */
  b[0] = -(x1[0]*x1[0] + x1[1]*x1[1]);
  b[1] = -(x2[0]*x2[0] + x2[1]*x2[1]);
  b[2] = -(x3[0]*x3[0] + x3[1]*x3[1]);

  /* solve equation system for center of the circle on which those 3
     points are */
  dgesv_( &nr, &ione, A, &LDA, ipiv, b, &LDA, &info );
  if (info != 0) 
    {
      /* equation system could not be solved, that means x1,x2,x3 are
	 collinear, thus the curvature is zero */
      (*curv) = 0.0;
    }
  else
    {
      /* system solved, evaluate radius and thus curvature as 1/radius
       */
      radius= sqrt( (b[1]/2)*(b[1]/2)+(b[2]/2)*(b[2]/2) - b[0] );
      
      (*curv) = 1.0/radius;
    }
  /* printf("debug: curv=%8.2e info=%d\n", *curv,info); /* */
  

  return SUCCESS;
}



/********************************************************************
*****                                                           *****
*****                                                           *****
*****                                                           *****
*****  testing Epplers conjecture: Remark 3.3 from preprint     *****
*****   "On Hadamard shape gradient representations in          *****
*****    linear elasticity"                                     *****
*****                                                           *****
********************************************************************/



/*FUNCTION*/
int lame_zz_interpolate_sigma_tx(struct mesh *m,
				int type,
				FIDX ncomp, struct vector *u,
				double lambda, double mu,
				struct vector *sigmu
/* averages the values of the stress tensor at the nodes
   of the mesh, thus interpolation of these provides smoothed
   stress tensors.

   
   Input:  m       - the mesh, has to be T1 or T2 
           type    - defining which type of mesh this actually is
	             type=1 ==> linear,  type=2 quadratic triangles
           ncomp   - number of (solution) components in u, i.e. number
                     of variables per node in the mesh
	   u       - coefficient vector representing a function on
	             the current mesh (e.g. solution of PDE problem)
		     u.V[r*m->vx_nr+i] has to be the value of the r-th
		     component of the (possibly) vector valued
		     function u at node i of the mesh
	   lambda, mu
                   - Lame constants

   Output: sigmu   - the stress tensor (of u) averaged at the
                     nodes of the mesh, has to be of size
                     dim*ncomp*vx_nr,
		     sigmu.V[d*ncomp*vx_nr + r*vx_nr +i]
		     will be the averaged value of sigma_d,r at the
		     i-th node of the mesh 

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  FIDX i, j, r, el, pt, cmp;
  int err;

  FIDX dim, basn, bas_n, vx_nr, el_w, vx_w;


  double points_t1[2*3]={ 0.0, 0.0,
			  1.0, 0.0,
			  0.0, 1.0 };
  double points_t2[2*6]={ 0.0, 0.0,
			  1.0, 0.0,
			  0.0, 1.0,
			  0.5, 0.0,
			  0.5, 0.5,
			  0.0, 0.5 };
  double *points;


  /* for FORTRAN calls: */
  char   fNoTrans='N';
  double done=1.0, dzero=0.0;
  int fdim, fbas_n;

  double *Jac, *Jacinv;  /* Jacobian of the element mapping and its
			    inverse */
  FIDX   *ldofs;         /* list of local dofs */
  double *Jinvgrad;      /* inverse Jacobian times gradphi */
  double detJac;         /* determinant of the Jacobian */
  double AdetJac;        /* abs(detJac) */
  double *phi, *gradp, *hessphi;
  double *lgradu;         /* the local gradient of u */
  double *lepsu;          /* the local strain tensor (of u) */
  double *lsigmu;         /* the local stess tensor (of u) */

  double *weight_count;  /* summs the weights of all contributions to
			    each node, (for now this will mean counts
			    the number of elements the node belongs to */

  FIDX l_mctxELNOD1, l_mctxVXSTRT;

  vx_nr = m->vx_nr;
  dim   = m->dim;
  vx_w  = m->vx_w;
  el_w  = m->el_w;

  if ((*u).len!=ncomp*vx_nr)
    {
      fprintf(stderr,
	      "lame_zz_interpolate_sigma_tx: "
	      "size of u does not match ncomp*vx_nr\n");
      return FAIL;
    }
  
  if ((*sigmu).len!=dim*ncomp*vx_nr)
    {
      fprintf(stderr,
	      "lame_zz_interpolate_sigma_tx: "
	      "size of sigmu does not match dim*ncomp*vx_nr\n");
      return FAIL;
    }


  switch (type)
    {
    case 1:
      l_mctxVXSTRT = MCT1VXSTRT;
      l_mctxELNOD1 = MCT1ELNOD1;
      points = points_t1;
      basn   = 3;
      break;
    case 2:
      l_mctxVXSTRT = MCT2VXSTRT;
      l_mctxELNOD1 = MCT2ELNOD1;
      points = points_t2;
      basn   = 6;
      break;
    default:
      fprintf(stderr,"lame_zz_interpolate_sigma_tx: unknown type=%d\n",type);
      return FAIL;
    }

  /* init */
  /* initialise interpolation info  */
  err= eval_basis( dim, tria, type, basn, points,
		   &bas_n, &phi, &gradp, &hessphi);
  FUNCTION_FAILURE_HANDLE( err, eval_basis, lame_zz_interpolate_sigma_tx);
  free( hessphi );

  if (basn!=bas_n)
    {
      fprintf(stderr,"lame_zz_interpolate_sigma_tx: basn!=bas_n\n");
      return FAIL;
    }

  TRY_MALLOC( weight_count, vx_nr, double, lame_zz_interpolate_sigma_tx);

  /* allocate memory for Jac, Jacinf */
  TRY_MALLOC( Jac, dim*dim, double, lame_zz_interpolate_sigma_tx);
  TRY_MALLOC( Jacinv, dim*dim, double, lame_zz_interpolate_sigma_tx);
  /* allocate memory for Jinvgrad */
  TRY_MALLOC( Jinvgrad, dim*bas_n, double, lame_zz_interpolate_sigma_tx);
  /* allocate memory for ldofs */
  TRY_MALLOC( ldofs, ncomp*bas_n, FIDX, lame_zz_interpolate_sigma_tx);
  TRY_MALLOC( lgradu, dim*dim, double, lame_zz_interpolate_sigma_tx);
  TRY_MALLOC( lepsu, dim*dim, double, lame_zz_interpolate_sigma_tx);
  TRY_MALLOC( lsigmu, dim*dim, double, lame_zz_interpolate_sigma_tx);

  /* seto gradu to zero */
  for(i=0; i<sigmu->len; i++)
    {
      sigmu->V[i]=0.0;
    }

  /* seto weight_count to zero */
  for(i=0; i<vx_nr; i++)
    {
      weight_count[i]=0.0;
    }

  /* loop over all elements */
  for (el=0; el<m->el_nr; el++)
    {
      /* define ldofs */
      for(i=0; i<ncomp; i++)
	for(j=0; j<bas_n; j++)
	  {
	    ldofs[i*bas_n+j]=i*vx_nr + m->elem[el*el_w+ l_mctxELNOD1 +j];
	  }

      /* loop over the nodes of the element */
      for(pt=0; pt<bas_n; pt++)
	{
	  /* evaluate the gradient at this point */
	  /* Jac=0 */
	  for (i=0; i<dim*dim; i++)
	    Jac[i]=0.0;
	  
	  /* Jac = sum_{i=nodes} vertex(i)*gradphi_i^T */
	  for (i=0;i<bas_n; i++)
	    {
	      for (j=0; j<dim; j++)
		for (r=0; r<dim; r++)
		  {
		    Jac[j*dim+r]+= 
		      m->vertex[ldofs[i]*vx_w + l_mctxVXSTRT+j]
		      * gradp[pt*bas_n*dim +i*dim +r];
		  }
	    }

	  /* get detJac */
	  detJac=Jac[0]*Jac[3]-Jac[2]*Jac[1];
	  
	  /* get Jacinv (here direct) */
	  Jacinv[0]=1.0/detJac*Jac[3];
	  Jacinv[1]=-1.0/detJac*Jac[1];
	  Jacinv[2]=-1.0/detJac*Jac[2];
	  Jacinv[3]=1.0/detJac*Jac[0];

	  AdetJac = fabs(detJac);


	  /* real world gradient, not constant for quadratic elements */
	  /* Jinvgrad= Jacinv * gradphi[k,:,:]  */
	  fdim=dim;
	  fbas_n=bas_n;
	  dgemm_( &fNoTrans, &fNoTrans, &fdim, &fbas_n, &fdim, 
		  &done, Jacinv, &fdim, &(gradp[pt*bas_n*dim]), &fdim,
		  &dzero, Jinvgrad, &fdim );

	  /* compute the gradient of the component of u */
	  for (i=0; i<dim*dim; i++) 
	    {
	      lgradu[i] = 0.0;
	      lepsu[i]  = 0.0;
	      lsigmu[i] = 0.0;
	    }

	  /* loop over all components of u */
	  for(cmp=0; cmp<ncomp; cmp++)
	    {
	      /* gradu = sum u_j * gradphi_j */
	      for (r=0; r<dim; r++) /* derivate wrt x or y */
		for (i=0; i<bas_n; i++)
		  {
		    /* du_cmp/dx_r */
		    lgradu[r*dim+cmp] += 
		      u->V[ldofs[cmp*bas_n+i]]*Jinvgrad[i*dim+r];
		  }
	    }

	  /* eps_i,j= 1/2( du_i/dx_j + du_j/dx_i ) */
	  for (r=0; r<dim; r++) /* derivate wrt x or y */
	    for (i=0; i<dim; i++)
	      {
		lepsu[r*dim+i]=0.5*(lgradu[r*dim+i] 
				    +lgradu[i*dim+r]);
	      }

	  /* [sigma_11]   [ lam+2*mu     lam      0  ] [eps_11]
	     [sigma_22] = [    lam    lam+2*mu    0  ]*[eps_22]
	     [sigma_12]   [     0         0     2*mu ] [eps_12] */
	  lsigmu[0*dim+0]=
	    (lambda+2*mu)*lepsu[0*dim+0]+lambda*lepsu[1*dim+1];
	  lsigmu[1*dim+1]=
	    lambda*lepsu[0*dim+0]+(lambda+2*mu)*lepsu[1*dim+1];
	  lsigmu[0*dim+1]= 2*mu*lepsu[0*dim+1];
	  lsigmu[1*dim+0]= lsigmu[0*dim+1]; /* symmetry */
	    
	  
	  /* add this to sigmu */
	  for(cmp=0; cmp<ncomp; cmp++)
	    {
	      for (r=0; r<dim; r++) /* derivate wrt x or y */
		{
		  sigmu->V[r*ncomp*vx_nr + cmp*vx_nr +ldofs[pt]]
		    +=lsigmu[cmp*dim+r];
		}
	    } /* end loop over components */
	      
	  weight_count[ldofs[pt]]+=1.0;

	} /* end loop over local nodes */

    } /* end loop over all elements */


  /* now divide by accumulated the weights */
  for(r=0; r<dim; r++)
    for(cmp=0; cmp<ncomp; cmp++)
      for(i=0; i<vx_nr; i++)
	{
	  sigmu->V[r*ncomp*vx_nr + cmp*vx_nr +i]/=weight_count[i];
	}

  free(lsigmu);
  free(lepsu);
  free(lgradu);
  free(ldofs);
  free(Jinvgrad);
  free(Jacinv);
  free(Jac);
  free(weight_count);
  free(gradp);
  free(phi);

  return SUCCESS;
}








/*FUNCTION*/
int lame_zz_interpolate_sigma_gamma_tx(struct mesh *m,
				      int type,
				      FIDX ncomp, struct vector *u,
				      double lambda, double mu,
				      struct vector *sigmGu
/* averages the values of the *surface* stress tensor at the nodes of
   the mesh, thus interpolation of these provides smoothed surface
   stress tensors.

   
   Input:  m       - the mesh, has to be T1 or T2 
           type    - defining which type of mesh this actually is
	             type=1 ==> linear,  type=2 quadratic triangles
           ncomp   - number of (solution) components in u, i.e. number
                     of variables per node in the mesh
	   u       - coefficient vector representing a function on
	             the current mesh (e.g. solution of PDE problem)
		     u.V[r*m->vx_nr+i] has to be the value of the r-th
		     component of the (possibly) vector valued
		     function u at node i of the mesh
	   lambda, mu
                   - Lame constants

   Output: sigmGu  - the surface stress tensor (of u) averaged at the
                     nodes of the mesh, has to be of size
                     dim*ncomp*vx_nr,
		     sigmu.V[d*ncomp*vx_nr + r*vx_nr +i]
		     will be the averaged value of sigma_G_d,r at the
		     i-th node of the mesh 

   Return: SUCCESS - success,
           FAIL    - failure, see error message
*/
		 ){
  FIDX i, j, r, el, bd, pt, cmp;
  int err;

  FIDX dim, basn, bas_n, vx_nr, el_w, vx_w, eg_w, bd_w, bd_nr, fc_w;


  double points_t1[2*3]={ 0.0, 0.0,
			  1.0, 0.0,
			  0.0, 1.0 };
  double points_t2[2*6]={ 0.0, 0.0,
			  1.0, 0.0,
			  0.0, 1.0,
			  0.5, 0.0,
			  0.5, 0.5,
			  0.0, 0.5 };
  double *points;


  /* for FORTRAN calls: */
  char   fNoTrans='N';
  double done=1.0, dzero=0.0;
  int fdim, fbas_n;

  double *Jac, *Jacinv;  /* Jacobian of the element mapping and its
			    inverse */
  FIDX   *node;          /* list of local nodes */
  FIDX   *ldofs;         /* list of local dofs */
  double *Jinvgrad;      /* inverse Jacobian times gradphi */
  double detJac;         /* determinant of the Jacobian */
  double AdetJac;        /* abs(detJac) */
  double *phi, *gradp, *hessphi;
  double *lgradu;         /* the local gradient of u */
  double *lepsu;          /* the local strain tensor (of u) */
  double *lsigmu;         /* the local stess tensor (of u) */

  FIDX   *bound_elem;     /* bound_elem[i] stores the id of the
			     element to which the i-th boundary entry
			     belongs */

  double *weight_count;  /* summs the weights of all contributions to
			    each node, (for now this will mean counts
			    the number of elements the node belongs to */

  FIDX l_mctxELNOD1, l_mctxVXSTRT;

  vx_nr = m->vx_nr;
  dim   = m->dim;
  vx_w  = m->vx_w;
  el_w  = m->el_w;
  eg_w  = m->eg_w;
  fc_w  = m->fc_w;
  bd_w  = m->bd_w;
  bd_nr = m->bd_nr;

  if ((*u).len!=ncomp*vx_nr)
    {
      fprintf(stderr,
	      "lame_zz_interpolate_sigma_gamma_tx: "
	      "size of u does not match ncomp*vx_nr\n");
      return FAIL;
    }
  
  if ((*sigmGu).len!=dim*ncomp*vx_nr)
    {
      fprintf(stderr,
	      "lame_zz_interpolate_sigma_gamma_tx: "
	      "size of sigmu does not match dim*ncomp*vx_nr\n");
      return FAIL;
    }


  switch (type)
    {
    case 1:
      l_mctxVXSTRT = MCT1VXSTRT;
      l_mctxELNOD1 = MCT1ELNOD1;
      points = points_t1;
      basn   = 3;
      break;
    case 2:
      l_mctxVXSTRT = MCT2VXSTRT;
      l_mctxELNOD1 = MCT2ELNOD1;
      points = points_t2;
      basn   = 6;
      break;
    default:
      fprintf(stderr,"lame_zz_interpolate_sigma_gamma_tx: unknown type=%d\n",type);
      return FAIL;
    }

  /*************************************************************
     build the bound_elem list
  **************************************************************/

  TRY_MALLOC( bound_elem, bd_nr, FIDX, 
	      lame_zz_interpolate_sigma_gamma_tx);

  /* init to "not found" (= -1) */
  for (i=0; i<bd_nr; i++) 
    {
      bound_elem[i] = -1;
    }

  /* loop over all elements, find those with boundary edges */
  for (el=0; el<(*m).fc_nr; el++)
    {
      FIDX eg,bd;
      for (i=0; i<3; i++)
	{
	  /* the i-th edge */
	  eg=(*m).face[el*fc_w+MCT2FCEDG1+i];
	  /* belongs to boundary? */
	  bd=(*m).edge[eg*eg_w+MCT2EGBND];
	  if (bd>=0)
	    {
#ifdef DEBUGFEINS
	      /* check sanity */
	      if (bd>=bd_nr) 
		{
		  fprintf(stderr,"lame_zz_interpolate_sigma_gamma_tx: \n"
			  "bd out of range ???\n");
		  return FAIL;
		}
	      /* check sanity */
	      if (bound_elem[bd]!=-1) 
		{
		  fprintf(stderr,"lame_zz_interpolate_sigma_gamma_tx: \n"
			  "boundary part of more than one element ???\n");
		  return FAIL;
		}
#endif
	      /* store this element for the boundary bd */
	      bound_elem[bd] = el;
	    }
	}
    }
  /**************************************************************
   * end build bound_elem list                                  *
   **************************************************************/

  /* init */
  /* initialise interpolation info  */
  err= eval_basis( dim, tria, type, basn, points,
		   &bas_n, &phi, &gradp, &hessphi);
  FUNCTION_FAILURE_HANDLE( err, eval_basis, lame_zz_interpolate_sigma_gamma_tx);
  free( hessphi );

  if (basn!=bas_n)
    {
      fprintf(stderr,"lame_zz_interpolate_sigma_gamma_tx: basn!=bas_n\n");
      return FAIL;
    }

  TRY_MALLOC( weight_count, vx_nr, double, lame_zz_interpolate_sigma_gamma_tx);

  /* allocate memory for Jac, Jacinf */
  TRY_MALLOC( Jac, dim*dim, double, lame_zz_interpolate_sigma_gamma_tx);
  TRY_MALLOC( Jacinv, dim*dim, double, lame_zz_interpolate_sigma_gamma_tx);
  /* allocate memory for Jinvgrad */
  TRY_MALLOC( Jinvgrad, dim*bas_n, double, lame_zz_interpolate_sigma_gamma_tx);
  /* allocate memory for ldofs */
  TRY_MALLOC( ldofs, ncomp*bas_n, FIDX, lame_zz_interpolate_sigma_gamma_tx);
  TRY_MALLOC( node,  bas_n, FIDX, lame_zz_interpolate_sigma_gamma_tx);
  TRY_MALLOC( lgradu, dim*dim, double, lame_zz_interpolate_sigma_gamma_tx);
  TRY_MALLOC( lepsu, dim*dim, double, lame_zz_interpolate_sigma_gamma_tx);
  TRY_MALLOC( lsigmu, dim*dim, double, lame_zz_interpolate_sigma_gamma_tx);

  /* seto gradu to zero */
  for(i=0; i<sigmGu->len; i++)
    {
      sigmGu->V[i]=0.0;
    }

  /* seto weight_count to zero */
  for(i=0; i<vx_nr; i++)
    {
      weight_count[i]=0.0;
    }

  /* loop over boundary */
  for (bd=0; bd<(*m).bd_nr; bd++)
    {
      FIDX edi;
      edi   = (*m).bound[bd*bd_w + MCT2BDEDGE];
      /* only procede if edge belongs to current level, and is of
	 Neumman type (TYPE is 0,2 or 3) */
      if (((*m).edge[edi*eg_w+MCT2EGCHL1]==-1)&&
	  ( ((*m).bound[bd*bd_w + MCT2BDTYPE]==0)
	    ||((*m).bound[bd*bd_w + MCT2BDTYPE]==2)
	    ||((*m).bound[bd*bd_w + MCT2BDTYPE]==3) ))
	{
	  FIDX nodm;
	  el=bound_elem[bd];

	  if (el<0)
	    {
	      fprintf(stderr,"lame_zz_interpolate_sigma_gamma_tx: \n"
		      "boundary part without element ???\n");
	      return FAIL;
	    }	    

	  /* set nodes for easier access */
	  for (i=0; i<bas_n; i++) 
	    {
	      node[i]= (*m).elem[el*el_w+MCT1ELNOD1+i];
	    }

	  /* make sure edi is the edge [0-3-1] (or reversed) of local
	     nodes */
	  nodm=(*m).edge[edi*eg_w+MCT2EGNODM];

	  /* fprintf(stderr,"orig elem=(%d %d %d %d %d %d)\n"
	     "nodm= %d\n", 
	     node[0],node[1],node[2],node[3],node[4],node[5],
	     nodm); /* */

	  if (nodm!=node[3])
	    {
	      FIDX rotate=0;
	      FIDX node_old[6];

	      /* find nodm, define how to rotate the element to make
		 this the node[3] */
	      if (nodm==node[4])
		{
		  rotate=2;
		}
	      else if (nodm==node[5])
		{
		  rotate=1;
		}
	      else
		{
		  fprintf(stderr,"lame_zz_interpolate_sigma_gamma_tx: \n"
			  "correct rotation of element not possible ???\n");
		  fprintf(stderr,"elem=(%"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX")\n"
			  "nodm= %"dFIDX"\n", 
			  node[0],node[1],node[2],node[3],node[4],node[5],
			  nodm);
		  return FAIL;
		}	

	      
	      /* copy node to node_old */
	      for (i=0; i<6; i++)
		{
		  node_old[i]=node[i];
		}
	      /* write rotated enumeration to node */
	      for (i=0; i<3; i++)
		{
		  int rot_i=(i+rotate)%3;
		  node[rot_i  ] = node_old[i  ];
		  node[rot_i+3] = node_old[i+3];
		}

	      if (rotate==2)
		{ fprintf(stderr,"rot  elem=(%"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX")\n"
			  "nodm= %"dFIDX"\n", 
			  node[0],node[1],node[2],node[3],node[4],node[5],
			  nodm); 
		}
	    } /* end rotate element to make edi edge [0-3-1] (or
		 reversed) of local nodes */

	  /* define ldofs */
	  for(i=0; i<ncomp; i++)
	    for(j=0; j<bas_n; j++)
	      {
		ldofs[i*bas_n+j]=i*vx_nr + node[j];
	      }

	  /* loop over the nodes of the element */
	  for(pt=0; pt<bas_n; pt++)
	    {
	      int sgndetJac;
	      double ds;
	      double tvec[2];
	      double nvec[2];


	      /* evaluate the gradient at this point */
	      /* Jac=0 */
	      for (i=0; i<dim*dim; i++)
		Jac[i]=0.0;
	  
	      /* Jac = sum_{i=nodes} vertex(i)*gradphi_i^T */
	      for (i=0;i<bas_n; i++)
		{
		  for (j=0; j<dim; j++)
		    for (r=0; r<dim; r++)
		      {
			Jac[j*dim+r]+= 
			  m->vertex[ldofs[i]*vx_w + l_mctxVXSTRT+j]
			  * gradp[pt*bas_n*dim +i*dim +r];
		      }
		}

	      /* get detJac */
	      detJac=Jac[0]*Jac[3]-Jac[2]*Jac[1];
	  
	      /* get Jacinv (here direct) */
	      Jacinv[0]=1.0/detJac*Jac[3];
	      Jacinv[1]=-1.0/detJac*Jac[1];
	      Jacinv[2]=-1.0/detJac*Jac[2];
	      Jacinv[3]=1.0/detJac*Jac[0];
	      
	      AdetJac = fabs(detJac);
	      /* get sgndetJac, case detJac=0 is not possible */
	      if (detJac>0) {sgndetJac = 1;}
	      else {sgndetJac = -1;}


	      /* real world gradient, not constant for quadratic elements */
	      /* Jinvgrad= Jacinv * gradphi[k,:,:]  */
	      fdim=dim;
	      fbas_n=bas_n;
	      dgemm_( &fNoTrans, &fNoTrans, &fdim, &fbas_n, &fdim, 
		      &done, Jacinv, &fdim, &(gradp[pt*bas_n*dim]), &fdim,
		      &dzero, Jinvgrad, &fdim );

	      /* compute the tangent vector and ds=norm(tangent) */
	      ds=0.0;
	      for (r=0; r<dim; r++)
		{
		  tvec[r]=0;
		  tvec[r] += (*m).vertex[node[0]*vx_w+MCT2VXSTRT+r]
		    *(gradp[pt*bas_n*dim+0*dim+0]);
		  tvec[r] += (*m).vertex[node[1]*vx_w+MCT2VXSTRT+r]
		    *(gradp[pt*bas_n*dim+1*dim+0]);
		  tvec[r] += (*m).vertex[node[3]*vx_w+MCT2VXSTRT+r]
		    *(gradp[pt*bas_n*dim+3*dim+0]);

		  ds += tvec[r]*tvec[r];
		}
	      ds = sqrt(ds);

	      /* compute the normal vector,
                 
                 rotate the normalised tangent by minus 90 degree,
                 adjust sign according to sign of the element Jacobian */
	      nvec[0] =  sgndetJac*tvec[1]/ds;
	      nvec[1] = -sgndetJac*tvec[0]/ds;

	      /* compute the gradient of the component of u */
	      for (i=0; i<dim*dim; i++) 
		{
		  lgradu[i] = 0.0;
		  lepsu[i]  = 0.0;
		  lsigmu[i] = 0.0;
		}

	      /* loop over all components of u */
	      for(cmp=0; cmp<ncomp; cmp++)
		{
		  /* gradu = sum u_j * gradphi_j */
		  for (r=0; r<dim; r++) /* derivate wrt x or y */
		    for (i=0; i<bas_n; i++)
		      {
			/* du_cmp/dx_r */
			lgradu[r*dim+cmp] += 
			  u->V[ldofs[cmp*bas_n+i]]*Jinvgrad[i*dim+r];
		      }
		}

	      /* eps_i,j= 1/2( du_i/dx_j + du_j/dx_i ) */
	      for (r=0; r<dim; r++) 
		for (i=0; i<dim; i++)
		  {
		    lepsu[r*dim+i]=0.5*(lgradu[r*dim+i] 
					+lgradu[i*dim+r]);
		  }

	      /* [sigma_11]   [ lam+2*mu     lam      0  ] [eps_11]
		 [sigma_22] = [    lam    lam+2*mu    0  ]*[eps_22]
		 [sigma_12]   [     0         0     2*mu ] [eps_12] */
	      lsigmu[0*dim+0]=
		(lambda+2*mu)*lepsu[0*dim+0]+lambda*lepsu[1*dim+1];
	      lsigmu[1*dim+1]=
		lambda*lepsu[0*dim+0]+(lambda+2*mu)*lepsu[1*dim+1];
	      lsigmu[0*dim+1]= 2*mu*lepsu[0*dim+1];
	      lsigmu[1*dim+0]= lsigmu[0*dim+1]; /* symmetry */


	      /* now project sigma to the tangential space Gamma 
		 sigma_Gamma := sigma - (sigma*n)*n^T */
	      {
		double sigm_n[2];
		for (r=0; r<dim; r++) 
		  {
		    sigm_n[r]=0.0;
		    for (i=0; i<dim; i++)
		      {
			sigm_n[r] += lsigmu[r*dim+i]*nvec[i];
		      }
		  }
		
		for (r=0; r<dim; r++) 
		  for (i=0; i<dim; i++)
		    {
		      lsigmu[r*dim+i] -= sigm_n[r]*nvec[i];
		    }
	      }
	      
	      /* add this to sigmGu */
	      for(cmp=0; cmp<ncomp; cmp++)
		{
		  for (r=0; r<dim; r++) /* derivate wrt x or y */
		    {
		      sigmGu->V[r*ncomp*vx_nr + cmp*vx_nr +ldofs[pt]]
			+=lsigmu[cmp*dim+r];
		    }
		} /* end loop over components */
	      
	      weight_count[ldofs[pt]]+=1.0;
	      
	    } /* end loop over local nodes */

	} /* end if Neumann boundary on finest mesh */
    } /* end loop over boundary */

  /* now divide by accumulated the weights */
  for(r=0; r<dim; r++)
    for(cmp=0; cmp<ncomp; cmp++)
      for(i=0; i<vx_nr; i++)
	{
	  if (weight_count[i]!=0.0)
	    {
	      sigmGu->V[r*ncomp*vx_nr + cmp*vx_nr +i]/=weight_count[i];
	    }
	}

  free(lsigmu);
  free(lepsu);
  free(lgradu);
  free(node);
  free(ldofs);
  free(Jinvgrad);
  free(Jacinv);
  free(Jac);
  free(weight_count);
  free(gradp);
  free(phi);

  free(bound_elem);

  return SUCCESS;
}



/*FUNCTION*/
int lame_eppler_conjecture_2010_rem3_3( struct mesh *m,
					double lambda,
					double mu,
					struct vector *u,
					struct vector *sdifference    
/* evaluates the square of the norm of the difference between the
   left-hand-side and right-hand-side in Remark 3.3 of 
     [K. Eppler, Hadamard shape gradients, 2010] 
   (pointwise) on the Neumann boundary i.e.

   difference = (f+H*g)-(-D(Ce(u))[n]*n - div_Gamma{Ce(u)_Gamma}

   sdifference = difference^T*difference

   if the conjecture holds true, this difference should converge to
   zero with mesh refinement, at least for sufficiently smooth bounded
   domains

   the higher order derivatives are evaluated using a ZZ-like
   postprocessing (averaging of gradients + interpolation of the
   averaged). the pointwise results are evaluated at the mesh points,
   thus can be interpolated to give an approximation of the
   distributions (or hopefully their function representants if they
   are smooth enough) 

   Input:  m         - the mesh
           lambda    - Lame' constant 
           mu        - Lame' constant 
           u         - vector containing the solution of a lame problem

   Output: sdifference
                     - square of norm of difference as described above
                       (non-zero only on Neumann boundary)

   Return: SUCCESS - success
           FAIL    - failure, see error message, output will not be
                     valid
*/
			 ){
  FIDX i, j, k, r, d, el, bd, pt, cmp, fu;
  int err, type;


  FIDX dim, basn, bas_n, vx_nr, el_w, vx_w, eg_w, bd_w, bd_nr, fc_w;


  /* double points_t1[2*3]={ 0.0, 0.0,
     1.0, 0.0,
     0.0, 1.0 }; */
  double points_t2[2*6]={ 0.0, 0.0,
			  1.0, 0.0,
			  0.0, 1.0,
			  0.5, 0.0,
			  0.5, 0.5,
			  0.0, 0.5 };
  double *points;

  int gamma_points[]={ 0, 1, 3};


  /* for FORTRAN calls: */
  char   fNoTrans='N';
  double done=1.0, dzero=0.0;
  int fdim, fbas_n;

  double *Jac, *Jacinv;  /* Jacobian of the element mapping and its
			    inverse */
  FIDX   *node;          /* list of local nodes */
  FIDX   *ldofs;         /* list of local dofs */
  double *Jinvgrad;      /* inverse Jacobian times gradphi */
  double detJac;         /* determinant of the Jacobian */
  double AdetJac;        /* abs(detJac) */
  double *phi, *gradp, *hessphi;
  double *DCeu;          /* the local derivatives of the stress tensor */
  double *div_gamma_Ceu; /* the local Gamma-divergence of the
			    Gamma-stress tensor */ 
  double *ldifference;   /* the local difference vector */
  double lsdiff;         /* the squared norm of local difference vector */

  FIDX   *bound_elem;     /* bound_elem[i] stores the id of the
			     element to which the i-th boundary entry
			     belongs */

  double *weight_count;  /* summs the weights of all contributions to
			    each node, (for now this will mean counts
			    the number of elements the node belongs to */
  struct vector sigmu;   /* for interpolation of sigma=Ce(u) by the
			    ZZ-like approach */
  struct vector sig_gamma;/*for interpolation of
			    sigma_gamma=Ce(u)_Gamma by the ZZ-like
			    approach */
 

  FIDX l_mctxELNOD1, l_mctxVXSTRT;

  type=2; /* other types not possible for now */

  vx_nr = m->vx_nr;
  dim   = m->dim;
  vx_w  = m->vx_w;
  el_w  = m->el_w;
  eg_w  = m->eg_w;
  fc_w  = m->fc_w;
  bd_w  = m->bd_w;
  bd_nr = m->bd_nr;

  if ((*u).len!=dim*vx_nr)
    {
      fprintf(stderr,
	      "lame_eppler_conjecture_2010_rem3_3: "
	      "size of u does not match dim*vx_nr\n");
      return FAIL;
    }

  if ((*sdifference).len!=vx_nr)
    {
      fprintf(stderr,
	      "lame_eppler_conjecture_2010_rem3_3: "
	      "size of sigmu does not match vx_nr\n");
      return FAIL;
    }

  switch (type)
    {
    case 2:
      l_mctxVXSTRT = MCT2VXSTRT;
      l_mctxELNOD1 = MCT2ELNOD1;
      points = points_t2;
      basn   = 6;
      break;
    default:
      fprintf(stderr,"lame_eppler_conjecture_2010_rem3_3: unknown type=%d\n",type);
      return FAIL;
    }
  

  /* get the ZZ-approximations of Ce(u) and Ce(u)_Gamma */
  err=vector_alloc(&sigmu, dim*dim*vx_nr);
  FUNCTION_FAILURE_HANDLE( err, vector_alloc,
			   lame_eppler_conjecture_2010_rem3_3);
  err=lame_zz_interpolate_sigma_tx(m, type, dim, u, lambda, mu, &sigmu);
  FUNCTION_FAILURE_HANDLE( err, lame_zz_interpolate_sigma_tx,
			   lame_eppler_conjecture_2010_rem3_3);

  err=vector_alloc(&sig_gamma, dim*dim*vx_nr);
  FUNCTION_FAILURE_HANDLE( err, vector_alloc,
			   lame_eppler_conjecture_2010_rem3_3);
  err=lame_zz_interpolate_sigma_gamma_tx(m, type, dim, u, 
					 lambda, mu, &sig_gamma);
  FUNCTION_FAILURE_HANDLE( err, lame_eppler_conjecture_2010_rem3_3,
			   lame_eppler_conjecture_2010_rem3_3);


  /*
  {
    struct vector cheat_sigma;
    cheat_sigma.len=dim*vx_nr;

    cheat_sigma.V=&sigmu.V[0];
    err=mesh_write_solution_vtk_t2( m, &cheat_sigma, dim, NULL, 0,
				    25, "visual/eppler_sigma_u__1" );
    FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_vtk_t2,
			     lame_eppler_conjecture_2010_rem3_3);

    cheat_sigma.V=&sigmu.V[dim*vx_nr];
    err=mesh_write_solution_vtk_t2( m, &cheat_sigma, dim, NULL, 0,
				    25, "visual/eppler_sigma_u__2" );
    FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_vtk_t2,
			     lame_eppler_conjecture_2010_rem3_3);


    cheat_sigma.V=&sig_gamma.V[0];
    err=mesh_write_solution_vtk_t2( m, &cheat_sigma, dim, NULL, 0,
				    31, "visual/eppler_sigma_gamma_u__1" );
    FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_vtk_t2,
			     lame_eppler_conjecture_2010_rem3_3);
    cheat_sigma.V=&sig_gamma.V[dim*vx_nr];
    err=mesh_write_solution_vtk_t2( m, &cheat_sigma, dim, NULL, 0,
				    31, "visual/eppler_sigma_gamma_u__2" );
    FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_vtk_t2,
			     lame_eppler_conjecture_2010_rem3_3);
  }/*/
  

  /*************************************************************
     build the bound_elem list
  **************************************************************/

  TRY_MALLOC( bound_elem, bd_nr, FIDX, 
	      lame_eppler_conjecture_2010_rem3_3);

  /* init to "not found" (= -1) */
  for (i=0; i<bd_nr; i++) 
    {
      bound_elem[i] = -1;
    }

  /* loop over all elements, find those with boundary edges */
  for (el=0; el<(*m).fc_nr; el++)
    {
      FIDX eg,bd;
      for (i=0; i<3; i++)
	{
	  /* the i-th edge */
	  eg=(*m).face[el*fc_w+MCT2FCEDG1+i];
	  /* belongs to boundary? */
	  bd=(*m).edge[eg*eg_w+MCT2EGBND];
	  if (bd>=0)
	    {
#ifdef DEBUGFEINS
	      /* check sanity */
	      if (bd>=bd_nr) 
		{
		  fprintf(stderr,"lame_eppler_conjecture_2010_rem3_3: \n"
			  "bd out of range ???\n");
		  return FAIL;
		}
	      /* check sanity */
	      if (bound_elem[bd]!=-1) 
		{
		  fprintf(stderr,"lame_eppler_conjecture_2010_rem3_3: \n"
			  "boundary part of more than one element ???\n");
		  return FAIL;
		}
#endif
	      /* store this element for the boundary bd */
	      bound_elem[bd] = el;
	    }
	}
    }
  /**************************************************************
   * end build bound_elem list                                  *
   **************************************************************/

  /* init */
  /* initialise interpolation info  */
  err= eval_basis( dim, tria, type, basn, points,
		   &bas_n, &phi, &gradp, &hessphi);
  FUNCTION_FAILURE_HANDLE( err, eval_basis, lame_eppler_conjecture_2010_rem3_3);
  free( hessphi );

  if (basn!=bas_n)
    {
      fprintf(stderr,"lame_eppler_conjecture_2010_rem3_3: basn!=bas_n\n");
      return FAIL;
    }

  TRY_MALLOC( weight_count, vx_nr, double, lame_eppler_conjecture_2010_rem3_3);

  /* allocate memory for Jac, Jacinf */
  TRY_MALLOC( Jac, dim*dim, double, lame_eppler_conjecture_2010_rem3_3);
  TRY_MALLOC( Jacinv, dim*dim, double, lame_eppler_conjecture_2010_rem3_3);
  /* allocate memory for Jinvgrad */
  TRY_MALLOC( Jinvgrad, dim*bas_n, double, lame_eppler_conjecture_2010_rem3_3);
  /* allocate memory for ldofs */
  TRY_MALLOC( ldofs, dim*bas_n, FIDX, lame_eppler_conjecture_2010_rem3_3);
  TRY_MALLOC( node,  bas_n, FIDX, lame_eppler_conjecture_2010_rem3_3);
  TRY_MALLOC( DCeu, dim*dim*dim, double, lame_eppler_conjecture_2010_rem3_3);
  TRY_MALLOC( div_gamma_Ceu, dim, double, lame_eppler_conjecture_2010_rem3_3);
  TRY_MALLOC( ldifference, dim, double, lame_eppler_conjecture_2010_rem3_3);

  /* seto gradu to zero */
  for(i=0; i<sdifference->len; i++)
    {
      sdifference->V[i]=0.0;
    }

  /* seto weight_count to zero */
  for(i=0; i<vx_nr; i++)
    {
      weight_count[i]=0.0;
    }

  /* loop over boundary */
  for (bd=0; bd<(*m).bd_nr; bd++)
    {
      FIDX edi;
      edi   = (*m).bound[bd*bd_w + MCT2BDEDGE];
      /* only procede if edge belongs to current level, and is of
	 Neumman type (TYPE is 0,2 or 3) */
      if (((*m).edge[edi*eg_w+MCT2EGCHL1]==-1)&&
	  ( ((*m).bound[bd*bd_w + MCT2BDTYPE]==0)
	    ||((*m).bound[bd*bd_w + MCT2BDTYPE]==2)
	    ||((*m).bound[bd*bd_w + MCT2BDTYPE]==3) ))
	{
	  FIDX nodm;
	  el=bound_elem[bd];

	  if (el<0)
	    {
	      fprintf(stderr,"lame_eppler_conjecture_2010_rem3_3: \n"
		      "boundary part without element ???\n");
	      return FAIL;
	    }	    

	  /* set nodes for easier access */
	  for (i=0; i<bas_n; i++) 
	    {
	      node[i]= (*m).elem[el*el_w+MCT1ELNOD1+i];
	    }

	  /* make sure edi is the edge [0-3-1] (or reversed) of local
	     nodes */
	  nodm=(*m).edge[edi*eg_w+MCT2EGNODM];

	  /* fprintf(stderr,"orig elem=(%d %d %d %d %d %d)\n"
	     "nodm= %d\n", 
	     node[0],node[1],node[2],node[3],node[4],node[5],
	     nodm); /* */

	  if (nodm!=node[3])
	    {
	      FIDX rotate=0;
	      FIDX node_old[6];

	      /* find nodm, define how to rotate the element to make
		 this the node[3] */
	      if (nodm==node[4])
		{
		  rotate=2;
		}
	      else if (nodm==node[5])
		{
		  rotate=1;
		}
	      else
		{
		  fprintf(stderr,"lame_eppler_conjecture_2010_rem3_3: \n"
			  "correct rotation of element not possible ???\n");
		  fprintf(stderr,"elem=(%"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX")\n"
			  "nodm= %"dFIDX"\n", 
			  node[0],node[1],node[2],node[3],node[4],node[5],
			  nodm);
		  return FAIL;
		}	

	      
	      /* copy node to node_old */
	      for (i=0; i<6; i++)
		{
		  node_old[i]=node[i];
		}
	      /* write rotated enumeration to node */
	      for (i=0; i<3; i++)
		{
		  int rot_i=(i+rotate)%3;
		  node[rot_i  ] = node_old[i  ];
		  node[rot_i+3] = node_old[i+3];
		}

	      if (rotate==2)
		{ fprintf(stderr,"rot  elem=(%"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX" %"dFIDX")\n"
			  "nodm= %"dFIDX"\n", 
			  node[0],node[1],node[2],node[3],node[4],node[5],
			  nodm); 
		}
	    } /* end rotate element to make edi edge [0-3-1] (or
		 reversed) of local nodes */

	  /* define ldofs */
	  for(i=0; i<dim; i++)
	    for(j=0; j<bas_n; j++)
	      {
		ldofs[i*bas_n+j]=i*vx_nr + node[j];
	      }

	  /* loop over the nodes of the element */
	  for(k=0; k<3; k++)
	    {
	      int sgndetJac;
	      double ds;
	      double tvec[2];
	      double xvec[2];
	      double nvec[2];
	      double g[2], curv;

	      pt=gamma_points[k];

	      /* evaluate the gradient at this point */
	      /* Jac=0 */
	      for (i=0; i<dim*dim; i++)
		Jac[i]=0.0;
	  
	      /* Jac = sum_{i=nodes} vertex(i)*gradphi_i^T */
	      for (i=0;i<bas_n; i++)
		{
		  for (j=0; j<dim; j++)
		    for (r=0; r<dim; r++)
		      {
			Jac[j*dim+r]+= 
			  m->vertex[ldofs[i]*vx_w + l_mctxVXSTRT+j]
			  * gradp[pt*bas_n*dim +i*dim +r];
		      }
		}

	      /* get detJac */
	      detJac=Jac[0]*Jac[3]-Jac[2]*Jac[1];
	  
	      /* get Jacinv (here direct) */
	      Jacinv[0]=1.0/detJac*Jac[3];
	      Jacinv[1]=-1.0/detJac*Jac[1];
	      Jacinv[2]=-1.0/detJac*Jac[2];
	      Jacinv[3]=1.0/detJac*Jac[0];
	      
	      AdetJac = fabs(detJac);
	      /* get sgndetJac, case detJac=0 is not possible */
	      if (detJac>0) {sgndetJac = 1;}
	      else {sgndetJac = -1;}


	      /* real world gradient, not constant for quadratic elements */
	      /* Jinvgrad= Jacinv * gradphi[k,:,:]  */
	      fdim=dim;
	      fbas_n=bas_n;
	      dgemm_( &fNoTrans, &fNoTrans, &fdim, &fbas_n, &fdim, 
		      &done, Jacinv, &fdim, &(gradp[pt*bas_n*dim]), &fdim,
		      &dzero, Jinvgrad, &fdim );

	      /* compute the tangent vector and ds=norm(tangent) */
	      ds=0.0;
	      for (r=0; r<dim; r++)
		{
		  
		  xvec[r]=0;
		  xvec[r] += (*m).vertex[node[0]*vx_w+MCT2VXSTRT+r]
		    *phi[pt*bas_n +0];
		  xvec[r] += (*m).vertex[node[1]*vx_w+MCT2VXSTRT+r]
		    *phi[pt*bas_n +1];
		  xvec[r] += (*m).vertex[node[3]*vx_w+MCT2VXSTRT+r]
		    *phi[pt*bas_n +3];

		  tvec[r]=0;
		  tvec[r] += (*m).vertex[node[0]*vx_w+MCT2VXSTRT+r]
		    *(gradp[pt*bas_n*dim+0*dim+0]);
		  tvec[r] += (*m).vertex[node[1]*vx_w+MCT2VXSTRT+r]
		    *(gradp[pt*bas_n*dim+1*dim+0]);
		  tvec[r] += (*m).vertex[node[3]*vx_w+MCT2VXSTRT+r]
		    *(gradp[pt*bas_n*dim+3*dim+0]);

		  ds += tvec[r]*tvec[r];
		}
	      ds = sqrt(ds);

	      /* compute the normal vector,
                 
                 rotate the normalised tangent by minus 90 degree,
                 adjust sign according to sign of the element Jacobian */
	      nvec[0] =  sgndetJac*tvec[1]/ds;
	      nvec[1] = -sgndetJac*tvec[0]/ds;



	      /* get the boundary function data */
	      fu=(*m).bound[bd*bd_w + MCT2BDFNCT];


	      err=mesh_func_eval(m, fu, xvec, 0.0, 2, g, NULL, NULL );
	      FUNCTION_FAILURE_HANDLE( err, mesh_func_eval, 
				       lame_eppler_conjecture_2010_rem3_3);

	      err=curvature_2d_3points( &(*m).vertex[node[0]*vx_w+MCT2VXSTRT],
					&(*m).vertex[node[1]*vx_w+MCT2VXSTRT],
					&(*m).vertex[node[3]*vx_w+MCT2VXSTRT],
					&curv);
	      FUNCTION_FAILURE_HANDLE( err, curvature_2d_3points, 
				       lame_eppler_conjecture_2010_rem3_3);
	      

	      /* compute the second derivative incredients */
	      for (i=0; i<dim*dim*dim; i++) 
		{
		  DCeu[i] = 0.0;
		}

	      /* loop over all components of sigma u */
	      for(cmp=0; cmp<dim; cmp++) /* row of sigma */
		for (d=0; d<dim; d++)    /* column */
		  for (r=0; r<dim; r++) /* derivate wrt x or y */
		    for (i=0; i<bas_n; i++)
		      {
			/* d(Ceu_(cmp,d)/dx_r */
			DCeu[r*dim*dim+d*dim+cmp] += 
			  sigmu.V[d*dim*vx_nr+ldofs[cmp*bas_n+i]]
			  *Jinvgrad[i*dim+r];
		      }
		

	      /* div acts row wise on the tensors ??? , i.e. 
                 div(sigma)_i= sum_j d(sigma_i,j)/dx_j   ???
		 we assume that for now

		 we have
		 div_gamma(u) = div(u) - (sum_j n_j * du_j/dn)

		 thus
		 div_gamma(sigma)_i= 
   		   div(sigma)_i - (sum_j n_j * d(sigma_i,j)/dn) */
	      for (cmp=0; cmp<dim; cmp++)
		{
		  div_gamma_Ceu[cmp]=0.0;
		  
		  for (r=0; r<dim; r++) /* derivate wrt x or y */
		    for (i=0; i<bas_n; i++)
		      {
			/* +=d(Ceu_(cmp,r)/dx_r */
			div_gamma_Ceu[cmp] += 
			  sig_gamma.V[cmp*dim*vx_nr+ldofs[r*bas_n+i]]
			  *Jinvgrad[i*dim+r];
		      }

		  for (j=0; j<dim; j++) /* column of sig_gamma */
		    for (r=0; r<dim; r++) /* derivate wrt x or y */
		      for (i=0; i<bas_n; i++)
			{
			/* +=d(Ceu_(cmp,r)/dx_r */
			  div_gamma_Ceu[cmp] 
			    -= nvec[j] 
			    *sig_gamma.V[cmp*dim*vx_nr+ldofs[j*bas_n+i]]
			    *Jinvgrad[i*dim+r]*nvec[r];
			}
		}

	      
	      /* the local difference vector and its squared norm */
	      lsdiff=0.0;
	      for (cmp=0; cmp<dim; cmp++)
		{
		  ldifference[cmp]=0.0;

		  ldifference[cmp]+=div_gamma_Ceu[cmp];

		  for (r=0; r<dim; r++) 
		    for (i=0; i<dim; i++)
		      {
			ldifference[cmp]+=	
			  DCeu[r*dim*dim+i*dim+cmp] /* d(Ceu_(cmp,i)/dx_r */
			  * nvec[i] * nvec[r];
		      }

		  /* rhs in Lame equation: f=0 */
		  ldifference[cmp]+=curv*g[cmp];

		  lsdiff+=ldifference[cmp]*ldifference[cmp];
		}


	      sdifference->V[ldofs[pt]]+=lsdiff;
	      
	      weight_count[ldofs[pt]]+=1.0;
	      
	    } /* end loop over local nodes */

	} /* end if Neumann boundary on finest mesh */
    } /* end loop over boundary */
  
  /* now divide by accumulated the weights */
  for(i=0; i<vx_nr; i++)
    {
      if (weight_count[i]!=0.0)
	{
	  sdifference->V[i]/=weight_count[i];
	}
    }

  free(ldifference);
  free(div_gamma_Ceu);
  free(DCeu);
  free(node);
  free(ldofs);
  free(Jinvgrad);
  free(Jacinv);
  free(Jac);
  free(weight_count);
  free(gradp);
  free(phi);

  free(bound_elem);

  vector_free(&sigmu);
  vector_free(&sig_gamma);

  return SUCCESS;
}

