/***********************************************************************

    This file is part of
    FEINS, Finite Element Incompressible Navier-Stokes solver,
    which is expanding to a more general FEM solver and toolbox,
    Copyright (C) 2003--2013, Rene Schneider 
    <rene.schneider@mathematik.tu-chemnitz.de>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program. If not, see <http://www.gnu.org/licenses/>.

    Minor contributions to this program (for example bug-fixes and
    minor extensions) by third parties automatically transfer the
    copyright to the general author of FEINS, to maintain the
    possibility of commercial re-licensing. If you contribute but wish
    to keep the copyright of your contribution, make that clear in
    your contribution!

    Non-GPL licenses to this program are available upon request from
    the author.

************************************************************************/
#include "feins_macros.h"
#include "datastruc.h"
#include "mesh.h"
#include "mesh3d.h"
#include "assembly.h"
#include "sparse.h"
#include "lin_solver.h"
#include "gen_aux.h"

#include <unistd.h>
#include <string.h>

/* for gettimeofday */
#include <sys/types.h>
#include <sys/time.h>

#define TIMEGET {\
   gettimeofday(&tv,&tz); \
   sec=tv.tv_sec; \
   musec=tv.tv_usec; \
   ti=((double)sec+((double)musec)*1e-6); }


int main(int argc, char *argv[])
{
  char solver[50]="no_solver_set";
  struct sparse *Ks;
  struct vector rhs, *x, *old_x;
  struct mesh msh;
  struct projector1 P;
  
  struct multilvl ml;
  struct mgdata mg;
  struct coarse_mat *cmat;

  struct solver_settings set;

  int  err, iter, lvl, lmax, level0;
  FIDX i, vx_nr, vx_nr_old, my_sparse_cols_p_row;

  double resi, atol, rtol;
  double globest;
  FIDX nrmarked;
  int stop;

  struct timeval tv;
  struct timezone tz;
  int sec, musec;
  double t0, ti, t0l;

#ifdef HAVE_OPENMP
#pragma omp parallel
  {
#pragma omp master
    {
      printf("OpenMP num_threads=%d\n",omp_get_num_threads());
      printf("  should be number of phyisical cores, use environment variable "
	     " OMP_NUM_THREADS to modify,\n");
      printf("  e.g. (bash shell): \"export OMP_NUM_THREADS=4\" \n");
      printf("  e.g. (most other): \"set OMP_NUM_THREADS 4\" \n");
    }
  }
#endif


  if (argc>1)
    {
      printf("meshfile: %s \n", argv[1]);
    }
  else
    {
      printf("main: no mesh specified!\n");
      return FAIL;
    }
  err=mesh_read_file_e1( &msh, &set, argv[1] ); /* */
  FUNCTION_FAILURE_HANDLE( err, mesh_read_file_t1, main);

  my_sparse_cols_p_row= FEINS_SPARSE_COLS_P_ROW_E1;
  /* mesh_info(stdout, &msh); /* */

  TIMEGET;
  t0=ti;


  /* set refinement levels */
  level0  = set.refine_ini;
  lmax    = set.refine_steps;

  for (i=0; i<level0 ; i++)
    {
      err=mesh_refine_uniform_e1( &msh );
      FUNCTION_FAILURE_HANDLE( err, mesh_refine_uniform_e1, main);

      mesh_info(stdout, &msh);
    }
  if ( 1==1)
    {
      printf("switching to E2 (hierarchical P2) elements\n");
      err=mesh_switch_type_e1( &msh, 32);
      FUNCTION_FAILURE_HANDLE( err, mesh_switch_type_e1, main);
      my_sparse_cols_p_row=FEINS_SPARSE_COLS_P_ROW_E2;
    }


  /* reset hierarchy */
  msh.hi_nr=0;
  msh.lvl=-1;
  for (i=0; i<msh.eg_nr; i++)
    {
      msh.edge[i*msh.eg_w+MCE1EGLVL]=-1;
    }
  if (set.refine_type!=0)
    {
      err=mesh_prepare_adaptive_green_e1(&msh);
      FUNCTION_FAILURE_HANDLE( err, mesh_prepareadaptive_green_e1, main);
    }

  /* allocate memory for the sparse matrices  */
  TRY_MALLOC( Ks,  lmax+1, struct sparse, main);
  old_x=NULL;
  vx_nr_old=0;
  stop=0;
  lvl=-1;

  while ((stop==0)&&(lvl<=lmax))
    {
      lvl++;
      vx_nr=msh.vx_nr;

      
      /* allocate memory for x, rhs, K, P */
      TRY_MALLOC( x,  1, struct vector, main);
      err=vector_alloc( x, vx_nr );
      FUNCTION_FAILURE_HANDLE( err, vector_alloc, main);
      
      err=vector_alloc( &rhs, vx_nr );
      FUNCTION_FAILURE_HANDLE( err, vector_alloc, main);

      err=sparse_flex_alloc( &Ks[lvl], vx_nr, my_sparse_cols_p_row );
      FUNCTION_FAILURE_HANDLE( err, sparse_flex_alloc, main);

      err=projector1_alloc( &P, vx_nr );
      FUNCTION_FAILURE_HANDLE( err, projector1_alloc, main);
      
      /* init multilvl data */
      err=multilvl_init_tx( &msh, 1, &ml, msh.type);
      FUNCTION_FAILURE_HANDLE( err, multilvl_init_tx, main);

      /* on coarse mesh, init x to zero, 
	 on finer meshes interpolate solution from old mesh to new */
      if (lvl==0)
	{
	  for (i=0; i<vx_nr; i++)
	    {
	      (*x).V[i]=0.0;
	    }
	}
      else
	{
	  FIDX node;

	  err=mg_init_tx( NULL, &msh, &ml, &mg, NULL);
	  FUNCTION_FAILURE_HANDLE( err, mg_init_tx, main);
      
	  /* Remark: vx_nr_old is the number of vertices on the coarser
	     mesh (lvl-1), vx_nr is the number on the finer mesh (lvl) */
	  for (i=0; i<vx_nr_old; i++)
	    {
	      MLVLFINDENTRY(node, i, lvl-1, ml);
	      if (node>=0)
		{
		  mg.xl[node]=(*old_x).V[i];
		}
	    }
	  
	  err=mg_interpolate_tx( &mg, lvl-1, lvl, mg.xl);	
	  FUNCTION_FAILURE_HANDLE( err, mg_interpolate_tx, main);  
	      
	  /* now copy back to the solution vector */    
	  for (i=0; i<vx_nr; i++)
	    {
	      MLVLFINDENTRY(node, i, lvl, ml);
	      if (node>=0)
		{
		  (*x).V[i]=mg.xl[node];
		}
	      else
		{
		  /* new hierarchical dofs are initialised to zero */
		  (*x).V[i]=0.0;
		  fprintf(stderr,"main: interpolation node not found\n");
		}
	    }
	  mg_free(&mg);

	  /* if (lvl==lmax)
	     {
	     err= mesh_write_solution_vtk_e1(&msh, NULL, 0, x, 1, 17+1,
	     "visual/mesh3D" );
	     FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_vtk_e1, main);
	     }/* */
	}

      TIMEGET;
      t0l=ti;

      /* assembly of equation system */
      err=assem_poisson_e1( &Ks[lvl], &rhs, x, &P, &msh );
      FUNCTION_FAILURE_HANDLE( err, assem_poisson_e1, main);

      /*err=sparse_mat_write_file(&Ks[lvl], "visual/3D_poisson_mat.txt");
      FUNCTION_FAILURE_HANDLE( err, sparse_mat_write_file, main); /* */


      /* define the multigrid data */
      err=mg_init_tx( Ks, &msh, &ml, &mg, &P);
      FUNCTION_FAILURE_HANDLE( err, mg_init_tx, main);

      /* use coarse grid solver */
      if (lvl==0)
	{
	  /* generate the coarse grid matrix */
	  FIDX *coarse_bc_nodes;
	  FIDX n_c_bc=0;
	  TRY_MALLOC(cmat, 1,  struct coarse_mat, main);
	  TRY_MALLOC(coarse_bc_nodes, P.len, FIDX, main);
	  for (i=0; i<P.len; i++)
	    {
	      FIDX dof, child;
	      dof=P.V[i];
	      MLVLFINDENTRY(child, dof, 0, ml);
	      if (child>=0)
		{
		  coarse_bc_nodes[n_c_bc]=child-ml.nlevl[0+1];
		  n_c_bc++;
		}
	    }
	  err=coarse_mat_set( &Ks[0], n_c_bc, coarse_bc_nodes,
			      1, cmat );
	  FUNCTION_FAILURE_HANDLE( err, coarse_mat_set, main );
	  free(coarse_bc_nodes);
	}

      /* define solver parameter for the V-cycle */
      mg.vcycles=1;
      mg.smooths=1;
      mg.CGC_scale=1.0;
      mg.cmat=cmat;

      atol = set.solver_atol;
      if (lvl == 0)
	{
	  rtol = set.solver_ini_rtol;
	}
      else
	{
	  rtol = set.solver_ref_rtol;
	} /* */


      /* solve equation system */
      resi=0.0;
      iter=0;

      struct vector rhs_hierarchy[100];
      if (0==1)
      {
	strcpy(solver,"test no-solver");
	struct vector *vecset, compare;
	double *vx_cpy;

	if (lvl==0)
	  {
	    vecset=x;
	  }
	else
	  {
	    vector_alloc(&compare, vx_nr);
	    vecset=&compare;
	  }	  
	vector_alloc(&rhs_hierarchy[lvl],vx_nr);
	for (i=0; i<msh.vx_nr; i++) rhs_hierarchy[lvl].V[i]=rhs.V[i];

	TRY_MALLOC(vx_cpy, vx_nr*3, double, main);

	for (i=0; i<msh.eg_nr; i++)
	  {
	    FIDX nodm=msh.edge[i*msh.eg_w+MCE2EGNODM];
	    if (msh.vertex[nodm*msh.vx_w+MCEXVXTYPE]==2.0)
	      {
		int j, dim=3;
		FIDX nod0=msh.edge[i*msh.eg_w+MCE1EGNOD1+0];
		FIDX nod1=msh.edge[i*msh.eg_w+MCE1EGNOD1+1];

		double xyz[3*3], value[3];

		for (j=0; j<dim; j++)
		  {
		    xyz[0*3+j]=msh.vertex[nod0*msh.vx_w+MCE1VXSTRT+j];
		    xyz[1*3+j]=msh.vertex[nod1*msh.vx_w+MCE1VXSTRT+j];
		    xyz[2*3+j]=0.5*(xyz[0*3+j]+xyz[1*3+j])
		      +msh.vertex[nodm*msh.vx_w+MCE1VXSTRT+j];

		    vx_cpy[nod0*3+j]=xyz[0*3+j];
		    vx_cpy[nod1*3+j]=xyz[1*3+j];
		    vx_cpy[nodm*3+j]=xyz[2*3+j];
		  }

		for (j=0; j<3; j++)
		  {
		    value[j]=
		      +xyz[j*3+0]
		      +xyz[j*3+0]*xyz[j*3+0]
		      +0.5*xyz[j*3+1]*xyz[j*3+1]
		      +1.5*xyz[j*3+2]*xyz[j*3+2]
		      +xyz[j*3+0]*xyz[j*3+1]
		      -xyz[j*3+1]*xyz[j*3+2]
		      ;
		  }
		(*vecset).V[nod0] = value[0];
		(*vecset).V[nod1] = value[1];
		(*vecset).V[nodm] = value[2]-0.5*(value[0]+value[1]);
	      }
	  }
	/* check if the elhier entries make sense */
	if (0==1)
	  {
	    for (i=0; i<msh.eh_nr; i++)
	      {
		int j;
		int *fatherpairs, nfatherpairs;
		int fatherpairs_uni[]={0,4, /* 0 */  4,6,   6,0,
				       1,5, /* 3 */  5,4,   4,1,
				       2,6, /* 6 */  6,5,   5,2,
				       0,7, /* 9 */  4,7,   6,7,
				       1,8, /* 12 */ 5,8,   4,8,
				       2,9, /* 15 */ 6,9,   5,9,
				       7,8, /* 18 */ 8,9,   9,7,
				       7,3, /* 21 */ 8,3,   9,3,
				       4,9  /* 24 */ };
		int fatherpairs_bi[] ={1,8, /* 0 */  3,8,   0,8,
				       2,8  /* 3 */ };
		int ntriples = 6;
		int hier_triples[]={4, 0, 1, /* mid-node, end-nodes 0, 1 */
				    5, 1, 2,
				    6, 2, 0,
				    7, 0, 3,
				    8, 1, 3,
				    9, 2, 3 };

		if (msh.elhier[i*msh.eh_w+MCE2EHCHL1+4] >-1)
		  {
		    fatherpairs=fatherpairs_uni;
		    nfatherpairs=25;
		  }
		else
		  {
		    fatherpairs=fatherpairs_bi;
		    nfatherpairs=4;
		  }
		for (j=0; j<ntriples; j++)
		  {
		    int d;
		    double diff_d;
		    double diff_sum=0.0;
		    FIDX nodm=msh.elhier[i*msh.eh_w+MCE2EHFAT1
					 +hier_triples[j*3+0]];
		    FIDX nod0=msh.elhier[i*msh.eh_w+MCE2EHFAT1
					 +hier_triples[j*3+1]];
		    FIDX nod1=msh.elhier[i*msh.eh_w+MCE2EHFAT1
					 +hier_triples[j*3+2]];

		    for (d=0; d<3; d++)
		      {
			diff_d=vx_cpy[nodm*3+d]
			  -0.5*(vx_cpy[nod0*3+d]+vx_cpy[nod1*3+d]);
			diff_sum+=diff_d*diff_d;
		      }
		    if (diff_sum>1e-10)
		      {
			fprintf(stderr,"elhier %d not OK fathers\n", i);
		      }
		  }
		for (j=0; j<nfatherpairs; j++)
		  {
		    int d;
		    double diff_d;
		    double diff_sum=0.0;
		    FIDX nodm=msh.elhier[i*msh.eh_w+MCE2EHCHL1+j];
		    FIDX nod0=msh.elhier[i*msh.eh_w+MCE2EHFAT1
					 +fatherpairs[j*2+0]];
		    FIDX nod1=msh.elhier[i*msh.eh_w+MCE2EHFAT1
					 +fatherpairs[j*2+1]];

		    for (d=0; d<3; d++)
		      {
			diff_d=vx_cpy[nodm*3+d]
			  -0.5*(vx_cpy[nod0*3+d]+vx_cpy[nod1*3+d]);
			diff_sum+=diff_d*diff_d;
		      }
		    if (diff_sum>0*1e-10)
		      {
			fprintf(stderr,"elhier %d not OK children\n", i);
		      }
		  }
	      }
	  } /* check elhier entries */

	/* check where the vectors differ */
	if (lvl!=0)
	  {
	    int compare_level;
	    for (i=0; i<vx_nr; i++)
	      {
		if (fabs((*x).V[i]-compare.V[i])>1e-10)
		  {
		    fprintf(stderr,
			    "compare vx%03d  x=%10.3e cmp=%10.3e diff=%10.3e\n",
			    i, (*x).V[i], compare.V[i], (*x).V[i]-compare.V[i]);
		  }
	      }
	    vector_free(&compare);

	    /* copy rhs to mg multilvl vector, restrict to level 0,
	       compare with original */
	    for (i=0; i<vx_nr; i++)
	      {
		FIDX node;
		MLVLFINDENTRY(node, i, lvl, ml);
		mg.xl[node]=rhs.V[i];
	      }
	    

	    compare_level=lvl-1;
	    err=mg_restrict_tx( &mg, lvl, compare_level, mg.xl);	
	    FUNCTION_FAILURE_HANDLE( err, mg_interpolate_tx, main);  

	    for (i=0; i<rhs_hierarchy[compare_level].len; i++)
	      {
		FIDX node;
		MLVLFINDENTRY(node, i, compare_level, ml);
		if (fabs(mg.xl[node]-rhs_hierarchy[compare_level].V[i])>1e-10)
		  {
		    fprintf(stderr,
			    "compare rhs vx%03d  rhs_restr=%10.3e "
			    "rhs0=%10.3e diff=%10.3e\n",
			    i, mg.xl[node], rhs_hierarchy[compare_level].V[i], 
			    mg.xl[node]-rhs_hierarchy[compare_level].V[i]);
		  }
	      }
	  }
      }
      
      /* strcpy(solver,"PCG_no");
	 err=PCG( 10000, 2, atol, rtol, 1, x, &resi, &iter, sparse_mul_mat_vec, 
	 projector1_no_precon, &Ks[lvl], &rhs, &P );
	 FUNCTION_FAILURE_HANDLE( err, PCG, main); /* */

      strcpy(solver,"PCG_MG");
      err=PCG( 10000, 2, atol, rtol, 1, x, &resi, &iter, sparse_mul_mat_vec, 
	       gen_proj_MG_tx, &Ks[lvl], &rhs, &mg );
      FUNCTION_FAILURE_HANDLE( err, PCG, main); /* */

      TIMEGET;
  
      printf("%s: %4d iterations, |res|=%8.2e, vx_nr= %9"dFIDX",  "
	     "time_level=%10.3e, time_total=%10.3e\n",
	     solver, iter, resi, vx_nr, ti-t0l, ti-t0); /* */


      mg.cmat=NULL; /* prevent it from beeing destroyed, so we don't
		       need to recompute it every time */
      mg_free(&mg);
      multilvl_free(&ml);


      projector1_free(&P);
      vector_free(&rhs);

      /* Apply mesh refinement */
      {
	FIDX *marker;
	TRY_MALLOC( marker, msh.el_nr, FIDX, main);
	switch(set.refine_type)
	  {
	  case 0:
	    /* Uniform mesh refinement */
	    globest=10*set.refine_stop; /* init to save value */
	    /* OPTIONAL: estimating the error */
	    /* err=lame_error_est_residual_t2(&msh, x, lambda, mu,
	       marker, &nrmarked, &globest,
	       &set, 2); 
	       FUNCTION_FAILURE_HANDLE( err, lame_error_est_residual_t2,
	       main);
	    */
	    /* err=gen_error_est_ZZ_tx(&msh, 1, x, marker, &nrmarked,
	       &globest, &set, msh.type); 
	       FUNCTION_FAILURE_HANDLE( err, gen_error_est_ZZ_tx, main);/* */
	    
	    if ((lvl>=lmax)||
		((globest<set.refine_stop)||(8*vx_nr>set.refine_max_vx)))
	      {
		stop=1;
		lmax=lvl;
	      }
	    else
	      {
		/* refine the mesh */
		err=mesh_refine_uniform_e1( &msh);
		FUNCTION_FAILURE_HANDLE( err, mesh_refine_uniform_e1, main);
	      }
	    break;
	    /* case 1: */
	    /* no residual estimator yet for this problem */
	    /* Adaptive mesh refinement , residual error estimator */
	    /* err=lame_error_est_residual_t2(&msh, x, lambda, mu,
	       marker, &nrmarked, &globest,
	       &set, 2); 
	       FUNCTION_FAILURE_HANDLE( err, lame_error_est_residual_t2, main);
	       
	       if ((lvl>=lmax)||
	       ((globest<set.refine_stop)||
	       (vx_nr+1*nrmarked>set.refine_max_vx)))
	       {
	       stop=1;
	       lmax=lvl;
	       }
	       else
	       {
	       /* refine the mesh */
	    /* err=mesh_refine_adaptive_e1( &msh, marker);
	       FUNCTION_FAILURE_HANDLE( err, mesh_refine_adaptive_e1, main);
	       }
	       break;*/
	  case 2:
	    /* Adaptive mesh refinement, ZZ error estimator */
	    err=gen_error_est_ZZ_tx(&msh, 1, x, marker, &nrmarked,
				    &globest, &set, msh.type); 
	    FUNCTION_FAILURE_HANDLE( err, gen_error_est_ZZ_tx, main);

	    /*#warning "for tests, mark first element"
	      for (i=0; i<msh.el_nr; i++)
	      marker[i]=0;
	      marker[0]=1;
	      nrmarked=1; /* */
	    
	    /* #warning "for tests, mark all elements"
	       for (i=0; i<msh.el_nr; i++)
	       marker[i]=1;
	       nrmarked=msh.el_nr; /* */
	    
	    if ((lvl>=lmax)||
		((globest<set.refine_stop)||
		 (vx_nr+1*nrmarked>set.refine_max_vx)))
	      {
		stop=1;
		lmax=lvl;
	      }
	    else
	      {
		/* refine the mesh */
		err=mesh_refine_adaptive_e1( &msh, marker);
		FUNCTION_FAILURE_HANDLE( err, mesh_refine_adaptive_e1, main);
	      }
	    break;
	  default:
	    printf("Error in main (test_assme3D.c): unknown refinement type!"
		   " type = %"dFIDX"\n",  set.refine_type);
	    return FAIL;
	  }
	free(marker);
	
	/* mesh_info(stdout, &msh); /* */
      }

      if (stop==1)
	{
	  /* err= mesh_write_solution_vtk_e1(&msh, NULL, 0, x, 1, 17+1,
					  "visual/solution" );
	  FUNCTION_FAILURE_HANDLE( err, mesh_write_solution_vtk_e1, main);/* */
	}

      /* keep current solution vector as old solution */
      if (old_x!=NULL)
	{
	  vector_free(old_x);
	  free(old_x);
	}
      old_x=x;
      vx_nr_old=vx_nr;
      x=NULL;
    } /* for lvl */


  /* free remaining memory */
  vector_free(old_x);
  free(old_x);
  
  coarse_mat_free(cmat);
  free(cmat);

  for (lvl=lmax; lvl>=0; lvl--)
    {
      sparse_free(&Ks[lvl]);
    }
  free(Ks);

  mesh_free(&msh);

  return 1;
}
