/* Copyright (c) 2003-2004 Ecole centrale de Lyon
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 2.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "fdtd3d.h"
#include "utils.h"
#include "output.h"
#include "block.h"
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_MPI
#include <mpi.h>
#endif

// Note: we use HH=H/cdtdx instead of H, winning a multiplication per point


#define pos_xp (pos+b->ny*b->nz)
#define pos_yp (pos+b->nz)
#define pos_zp (pos+1)
#define pos_xpn (iy*bxp->nz+iz)
#define pos_ypn (ix*byp->ny*byp->nz+iz)
#define pos_zpn ((ix*bzp->ny+iy)*bzp->nz)

#define pos_xm (pos-b->ny*b->nz)
#define pos_ym (pos-b->nz)
#define pos_zm (pos-1)
#define pos_xmn (((bxm->nx-1)*bxm->ny+iy)*bxm->nz+iz)
#define pos_ymn (((ix+1)*bym->ny-1)*bym->nz+iz)
#define pos_zmn ((ix*bzm->ny+iy+1)*bzm->nz-1)


// Warning: rotE is in fact -rot(E)

#define browse_block_with_rotE(INSTRUCTIONS)                       \
  pos=0;                                                           \
  for(ix=0;ix<b->nx-1;ix++)                                        \
    {                                                              \
      for(iy=0;iy<b->ny-1;iy++)                                    \
	{                                                          \
	  for(iz=0;iz<b->nz-1;iz++)                                \
	    {                                                      \
	      rotEx=Ey[pos_zp]-Ey[pos]+Ez[pos]-Ez[pos_yp];         \
	      rotEy=Ez[pos_xp]-Ez[pos]+Ex[pos]-Ex[pos_zp];         \
	      rotEz=Ex[pos_yp]-Ex[pos]+Ey[pos]-Ey[pos_xp];         \
	      INSTRUCTIONS                                         \
	      pos++;                                               \
	    }                                                      \
	  rotEx=bzp->Ey[pos_zpn]-Ey[pos]+Ez[pos]-     Ez[pos_yp] ; \
	  rotEy=     Ez[pos_xp] -Ez[pos]+Ex[pos]-bzp->Ex[pos_zpn]; \
	  rotEz=     Ex[pos_yp] -Ex[pos]+Ey[pos]-     Ey[pos_xp] ; \
	  INSTRUCTIONS                                             \
	  pos++;                                                   \
	}                                                          \
      for(iz=0;iz<b->nz-1;iz++)                                    \
	{                                                          \
	  rotEx=     Ey[pos_zp] -Ey[pos]+Ez[pos]-byp->Ez[pos_ypn]; \
	  rotEy=     Ez[pos_xp] -Ez[pos]+Ex[pos]-     Ex[pos_zp] ; \
	  rotEz=byp->Ex[pos_ypn]-Ex[pos]+Ey[pos]-     Ey[pos_xp] ; \
	  INSTRUCTIONS                                             \
	  pos++;                                                   \
	}                                                          \
      rotEx=bzp->Ey[pos_zpn]-Ey[pos]+Ez[pos]-byp->Ez[pos_ypn];     \
      rotEy=     Ez[pos_xp] -Ez[pos]+Ex[pos]-bzp->Ex[pos_zpn];     \
      rotEz=byp->Ex[pos_ypn]-Ex[pos]+Ey[pos]-     Ey[pos_xp] ;     \
      INSTRUCTIONS                                                 \
      pos++;                                                       \
    }                                                              \
  for(iy=0;iy<b->ny-1;iy++)                                        \
    {                                                              \
      for(iz=0;iz<b->nz-1;iz++)                                    \
	{                                                          \
	  rotEx=     Ey[pos_zp] -Ey[pos]+Ez[pos]-     Ez[pos_yp] ; \
	  rotEy=bxp->Ez[pos_xpn]-Ez[pos]+Ex[pos]-     Ex[pos_zp] ; \
	  rotEz=     Ex[pos_yp] -Ex[pos]+Ey[pos]-bxp->Ey[pos_xpn]; \
	  INSTRUCTIONS                                             \
	  pos++;                                                   \
	}                                                          \
      rotEx=bzp->Ey[pos_zpn]-Ey[pos]+Ez[pos]-     Ez[pos_yp] ;     \
      rotEy=bxp->Ez[pos_xpn]-Ez[pos]+Ex[pos]-bzp->Ex[pos_zpn];     \
      rotEz=     Ex[pos_yp] -Ex[pos]+Ey[pos]-bxp->Ey[pos_xpn];     \
      INSTRUCTIONS                                                 \
      pos++;                                                       \
    }                                                              \
  for(iz=0;iz<b->nz-1;iz++)                                        \
    {                                                              \
      rotEx=     Ey[pos_zp] -Ey[pos]+Ez[pos]-byp->Ez[pos_ypn];     \
      rotEy=bxp->Ez[pos_xpn]-Ez[pos]+Ex[pos]-     Ex[pos_zp] ;     \
      rotEz=byp->Ex[pos_ypn]-Ex[pos]+Ey[pos]-bxp->Ey[pos_xpn];     \
      INSTRUCTIONS                                                 \
      pos++;                                                       \
    }                                                              \
  rotEx=bzp->Ey[pos_zpn]-Ey[pos]+Ez[pos]-byp->Ez[pos_ypn];         \
  rotEy=bxp->Ez[pos_xpn]-Ez[pos]+Ex[pos]-bzp->Ex[pos_zpn];         \
  rotEz=byp->Ex[pos_ypn]-Ex[pos]+Ey[pos]-bxp->Ey[pos_xpn];         \
  INSTRUCTIONS                                                     \
  pos++;                                                           \
  assert(pos==b->nx*b->ny*b->nz)


#define browse_block_with_rotH(INSTRUCTIONS)                           \
  pos=0;                                                               \
  ix=0;iy=0;iz=0;                                                      \
  rotHx=HHz[pos]-bym->HHz[pos_ymn]+bzm->HHy[pos_zmn]-HHy[pos];         \
  rotHy=HHx[pos]-bzm->HHx[pos_zmn]+bxm->HHz[pos_xmn]-HHz[pos];         \
  rotHz=HHy[pos]-bxm->HHy[pos_xmn]+bym->HHx[pos_ymn]-HHx[pos];         \
  INSTRUCTIONS                                                         \
  pos++;                                                               \
  for(iz=1;iz<b->nz;iz++)                                              \
    {                                                                  \
      rotHx=HHz[pos]-bym->HHz[pos_ymn]+     HHy[pos_zm] -HHy[pos];     \
      rotHy=HHx[pos]-     HHx[pos_zm] +bxm->HHz[pos_xmn]-HHz[pos];     \
      rotHz=HHy[pos]-bxm->HHy[pos_xmn]+bym->HHx[pos_ymn]-HHx[pos];     \
      INSTRUCTIONS                                                     \
      pos++;                                                           \
    }                                                                  \
  for(iy=1;iy<b->ny;iy++)                                              \
    {                                                                  \
      iz=0;                                                            \
      rotHx=HHz[pos]-     HHz[pos_ym] +bzm->HHy[pos_zmn]-HHy[pos];     \
      rotHy=HHx[pos]-bzm->HHx[pos_zmn]+bxm->HHz[pos_xmn]-HHz[pos];     \
      rotHz=HHy[pos]-bxm->HHy[pos_xmn]+     HHx[pos_ym] -HHx[pos];     \
      INSTRUCTIONS                                                     \
      pos++;                                                           \
      for(iz=1;iz<b->nz;iz++)                                          \
	{                                                              \
	  rotHx=HHz[pos]-     HHz[pos_ym] +     HHy[pos_zm] -HHy[pos]; \
	  rotHy=HHx[pos]-     HHx[pos_zm] +bxm->HHz[pos_xmn]-HHz[pos]; \
	  rotHz=HHy[pos]-bxm->HHy[pos_xmn]+     HHx[pos_ym] -HHx[pos]; \
	  INSTRUCTIONS                                                 \
	  pos++;                                                       \
	}                                                              \
    }                                                                  \
  for(ix=1;ix<b->nx;ix++)                                              \
    {                                                                  \
      iy=0;iz=0;                                                       \
      rotHx=HHz[pos]-bym->HHz[pos_ymn]+bzm->HHy[pos_zmn]-HHy[pos];     \
      rotHy=HHx[pos]-bzm->HHx[pos_zmn]+     HHz[pos_xm] -HHz[pos];     \
      rotHz=HHy[pos]-     HHy[pos_xm] +bym->HHx[pos_ymn]-HHx[pos];     \
      INSTRUCTIONS                                                     \
      pos++;                                                           \
      for(iz=1;iz<b->nz;iz++)                                          \
	{                                                              \
	  rotHx=HHz[pos]-bym->HHz[pos_ymn]+     HHy[pos_zm] -HHy[pos]; \
	  rotHy=HHx[pos]-     HHx[pos_zm] +     HHz[pos_xm] -HHz[pos]; \
	  rotHz=HHy[pos]-     HHy[pos_xm] +bym->HHx[pos_ymn]-HHx[pos]; \
          INSTRUCTIONS                                                 \
	  pos++;                                                       \
	}                                                              \
      for(iy=1;iy<b->ny;iy++)                                          \
	{                                                              \
	  iz=0;                                                        \
	  rotHx=HHz[pos]-     HHz[pos_ym] +bzm->HHy[pos_zmn]-HHy[pos]; \
	  rotHy=HHx[pos]-bzm->HHx[pos_zmn]+     HHz[pos_xm] -HHz[pos]; \
	  rotHz=HHy[pos]-     HHy[pos_xm] +     HHx[pos_ym] -HHx[pos]; \
	  INSTRUCTIONS                                                 \
	  pos++;                                                       \
	  for(iz=1;iz<b->nz;iz++)                                      \
	    {                                                          \
	      rotHx=HHz[pos]-HHz[pos_ym]+HHy[pos_zm]-HHy[pos];         \
	      rotHy=HHx[pos]-HHx[pos_zm]+HHz[pos_xm]-HHz[pos];         \
	      rotHz=HHy[pos]-HHy[pos_xm]+HHx[pos_ym]-HHx[pos];         \
	      INSTRUCTIONS                                             \
	      pos++;                                                   \
	    }                                                          \
	}                                                              \
    }                                                                  \
  assert(pos==b->nx*b->ny*b->nz)


static void iter_H_plain_block(varBlock *b)
{
  int pos;
  // TODO: benchmark when we replace pos by a macro based on ix,iy,iz.
  int ix,iy,iz;
  varBlock *bxp=b->block_xp;
  varBlock *byp=b->block_yp;
  varBlock *bzp=b->block_zp;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double rotEx,rotEy,rotEz;

  browse_block_with_rotE(
			 HHx[pos]+=rotEx;
			 HHy[pos]+=rotEy;
			 HHz[pos]+=rotEz;
			 );
}

static void iter_E_plain_block(varBlock *b)
{
  int pos;
  int ix,iy,iz;
  varBlock *bxm=b->block_xm;
  varBlock *bym=b->block_ym;
  varBlock *bzm=b->block_zm;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double *cn=b->cn;
  double rotHx,rotHy,rotHz;

  browse_block_with_rotH(
			 Ex[pos]+=cn[pos]*rotHx;
			 Ey[pos]+=cn[pos]*rotHy;
			 Ez[pos]+=cn[pos]*rotHz;
			 );
}

static void iter_E_abs_block(varBlock *b)
{
  int pos;
  int ix,iy,iz;
  varBlock *bxm=b->block_xm;
  varBlock *bym=b->block_ym;
  varBlock *bzm=b->block_zm;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double *sumEx=b->absor->sumEx;
  double *sumEy=b->absor->sumEy;
  double *sumEz=b->absor->sumEz;
  double *cn=b->cn;
  double *dec=b->absor->dec;
  double rotHx,rotHy,rotHz;

  browse_block_with_rotH(
			 Ex[pos]=( (1-dec[pos])*Ex[pos] + cn[pos]*rotHx
				   - SQUARE(dec[pos]*dec[pos])*sumEx[pos] )/(1+dec[pos]);
			 sumEx[pos]+=Ex[pos];
			 Ey[pos]=( (1-dec[pos])*Ey[pos] + cn[pos]*rotHy
				   - SQUARE(dec[pos]*dec[pos])*sumEy[pos] )/(1+dec[pos]);
			 sumEy[pos]+=Ey[pos];
			 Ez[pos]=( (1-dec[pos])*Ez[pos] + cn[pos]*rotHz
				   - SQUARE(dec[pos]*dec[pos])*sumEz[pos] )/(1+dec[pos]);
			 sumEz[pos]+=Ez[pos];
			 );
}

static void iter_H_PML_x_block(varBlock *b)
{
  int pos;
  int ix,iy,iz;
  varBlock *bxp=b->block_xp;
  varBlock *byp=b->block_yp;
  varBlock *bzp=b->block_zp;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double *sumrotEx=b->PML->sumrotEx;
  double decay=b->PML->x_decay;
  double x_rotcoef=b->PML->x_rotcoef;
  double x_sumrotcoef=b->PML->x_sumrotcoef;
  double rotEx,rotEy,rotEz;

  browse_block_with_rotE(
			 HHx[pos]+=x_rotcoef*rotEx+x_sumrotcoef*sumrotEx[pos];
			 sumrotEx[pos]+=rotEx;
			 HHy[pos]=HHy[pos]*decay+rotEy;
			 HHz[pos]=HHz[pos]*decay+rotEz;
			 );
}

static void iter_E_PML_x_block(varBlock *b)
{
  int pos;
  int ix,iy,iz;
  varBlock *bxm=b->block_xm;
  varBlock *bym=b->block_ym;
  varBlock *bzm=b->block_zm;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double *sumrotHx=b->PML->sumrotHHx;
  double *cn=b->cn;
  double decay=b->PML->x_decay;
  double x_rotcoef=b->PML->x_rotcoef;
  double x_sumrotcoef=b->PML->x_sumrotcoef;
  double rotHx,rotHy,rotHz;

  browse_block_with_rotH(
			 Ex[pos]+=cn[pos]*(x_rotcoef*rotHx+x_sumrotcoef*sumrotHx[pos]);
			 sumrotHx[pos]+=rotHx;
			 Ey[pos]=Ey[pos]*decay+cn[pos]*rotHy;
			 Ez[pos]=Ez[pos]*decay+cn[pos]*rotHz;
			 );
}

static void iter_H_PML_y_block(varBlock *b)
{
  int pos;
  int ix,iy,iz;
  varBlock *bxp=b->block_xp;
  varBlock *byp=b->block_yp;
  varBlock *bzp=b->block_zp;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double *sumrotEy=b->PML->sumrotEy;
  double decay=b->PML->y_decay;
  double y_rotcoef=b->PML->y_rotcoef;
  double y_sumrotcoef=b->PML->y_sumrotcoef;
  double rotEx,rotEy,rotEz;

  browse_block_with_rotE(
			 HHx[pos]=HHx[pos]*decay+rotEx;
			 HHy[pos]+=y_rotcoef*rotEy+y_sumrotcoef*sumrotEy[pos];
			 sumrotEy[pos]+=rotEy;
			 HHz[pos]=HHz[pos]*decay+rotEz;
			 );
}

static void iter_E_PML_y_block(varBlock *b)
{
  int pos;
  int ix,iy,iz;
  varBlock *bxm=b->block_xm;
  varBlock *bym=b->block_ym;
  varBlock *bzm=b->block_zm;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double *sumrotHy=b->PML->sumrotHHy;
  double *cn=b->cn;
  double decay=b->PML->y_decay;
  double y_rotcoef=b->PML->y_rotcoef;
  double y_sumrotcoef=b->PML->y_sumrotcoef;
  double rotHx,rotHy,rotHz;

  browse_block_with_rotH(
			 Ex[pos]=Ex[pos]*decay+cn[pos]*rotHx;
			 Ey[pos]+=cn[pos]*(y_rotcoef*rotHy+y_sumrotcoef*sumrotHy[pos]);
			 sumrotHy[pos]+=rotHy;
			 Ez[pos]=Ez[pos]*decay+cn[pos]*rotHz;
			 );
}

static void iter_H_PML_z_block(varBlock *b)
{
  int pos;
  int ix,iy,iz;
  varBlock *bxp=b->block_xp;
  varBlock *byp=b->block_yp;
  varBlock *bzp=b->block_zp;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double *sumrotEz=b->PML->sumrotEz;
  double decay=b->PML->z_decay;
  double z_rotcoef=b->PML->z_rotcoef;
  double z_sumrotcoef=b->PML->z_sumrotcoef;
  double rotEx,rotEy,rotEz;

  browse_block_with_rotE(
			 HHx[pos]=HHx[pos]*decay+rotEx;
			 HHy[pos]=HHy[pos]*decay+rotEy;
			 HHz[pos]+=z_rotcoef*rotEz+z_sumrotcoef*sumrotEz[pos];
			 sumrotEz[pos]+=rotEz;
			 );
}

static void iter_E_PML_z_block(varBlock *b)
{
  int pos;
  int ix,iy,iz;
  varBlock *bxm=b->block_xm;
  varBlock *bym=b->block_ym;
  varBlock *bzm=b->block_zm;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double *sumrotHz=b->PML->sumrotHHz;
  double *cn=b->cn;
  double decay=b->PML->z_decay;
  double z_rotcoef=b->PML->z_rotcoef;
  double z_sumrotcoef=b->PML->z_sumrotcoef;
  double rotHx,rotHy,rotHz;

  browse_block_with_rotH(
			 Ex[pos]=Ex[pos]*decay+cn[pos]*rotHx;
			 Ey[pos]=Ey[pos]*decay+cn[pos]*rotHy;
			 Ez[pos]+=cn[pos]*(z_rotcoef*rotHz+z_sumrotcoef*sumrotHz[pos]);
			 sumrotHz[pos]+=rotHz;
			 );
}

static void iter_H_PML_block(varBlock *b)
{
  int pos;
  int ix,iy,iz;
  varBlock *bxp=b->block_xp;
  varBlock *byp=b->block_yp;
  varBlock *bzp=b->block_zp;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double *sumrotEx=b->PML->sumrotEx;
  double *sumrotEy=b->PML->sumrotEy;
  double *sumrotEz=b->PML->sumrotEz;
  double *sumHHx=b->PML->sumHHx;
  double *sumHHy=b->PML->sumHHy;
  double *sumHHz=b->PML->sumHHz;
  double x_decay=b->PML->x_decay;
  double y_decay=b->PML->y_decay;
  double z_decay=b->PML->z_decay;
  double x_rotcoef=b->PML->x_rotcoef;
  double y_rotcoef=b->PML->y_rotcoef;
  double z_rotcoef=b->PML->z_rotcoef;
  double x_sumrotcoef=b->PML->x_sumrotcoef;
  double y_sumrotcoef=b->PML->y_sumrotcoef;
  double z_sumrotcoef=b->PML->z_sumrotcoef;
  double x_sumcoef=b->PML->x_sumcoef;
  double y_sumcoef=b->PML->y_sumcoef;
  double z_sumcoef=b->PML->z_sumcoef;
  double rotEx,rotEy,rotEz;

  browse_block_with_rotE(
			 HHx[pos]=HHx[pos]*x_decay+x_rotcoef*rotEx+x_sumrotcoef*sumrotEx[pos]-x_sumcoef*sumHHx[pos];
			 sumHHx[pos]+=HHx[pos];
			 sumrotEx[pos]+=rotEx;
			 HHy[pos]=HHy[pos]*y_decay+y_rotcoef*rotEy+y_sumrotcoef*sumrotEy[pos]-y_sumcoef*sumHHy[pos];
			 sumHHy[pos]+=HHy[pos];
			 sumrotEy[pos]+=rotEy;
			 HHz[pos]=HHz[pos]*z_decay+z_rotcoef*rotEz+z_sumrotcoef*sumrotEz[pos]-z_sumcoef*sumHHz[pos];
			 sumHHz[pos]+=HHz[pos];
			 sumrotEz[pos]+=rotEz;
			 );
}

static void iter_E_PML_block(varBlock *b)
{
  int pos;
  int ix,iy,iz;
  varBlock *bxm=b->block_xm;
  varBlock *bym=b->block_ym;
  varBlock *bzm=b->block_zm;
  double *HHx=b->HHx;
  double *HHy=b->HHy;
  double *HHz=b->HHz;
  double *Ex=b->Ex;
  double *Ey=b->Ey;
  double *Ez=b->Ez;
  double *sumrotHx=b->PML->sumrotHHx;
  double *sumrotHy=b->PML->sumrotHHy;
  double *sumrotHz=b->PML->sumrotHHz;
  double *sumEx=b->PML->sumEx;
  double *sumEy=b->PML->sumEy;
  double *sumEz=b->PML->sumEz;
  double *cn=b->cn;
  double x_decay=b->PML->x_decay;
  double y_decay=b->PML->y_decay;
  double z_decay=b->PML->z_decay;
  double x_rotcoef=b->PML->x_rotcoef;
  double y_rotcoef=b->PML->y_rotcoef;
  double z_rotcoef=b->PML->z_rotcoef;
  double x_sumrotcoef=b->PML->x_sumrotcoef;
  double y_sumrotcoef=b->PML->y_sumrotcoef;
  double z_sumrotcoef=b->PML->z_sumrotcoef;
  double x_sumcoef=b->PML->x_sumcoef;
  double y_sumcoef=b->PML->y_sumcoef;
  double z_sumcoef=b->PML->z_sumcoef;
  double rotHx,rotHy,rotHz;

  browse_block_with_rotH(
			 Ex[pos]=Ex[pos]*x_decay+cn[pos]*(x_rotcoef*rotHx+x_sumrotcoef*sumrotHx[pos])-x_sumcoef*sumEx[pos];
			 sumEx[pos]+=Ex[pos];
			 sumrotHx[pos]+=rotHx;
			 Ey[pos]=Ey[pos]*y_decay+cn[pos]*(y_rotcoef*rotHy+y_sumrotcoef*sumrotHy[pos])-y_sumcoef*sumEy[pos];
			 sumEy[pos]+=Ey[pos];
			 sumrotHy[pos]+=rotHy;
			 Ez[pos]=Ez[pos]*z_decay+cn[pos]*(z_rotcoef*rotHz+z_sumrotcoef*sumrotHz[pos])-z_sumcoef*sumEz[pos];
			 sumEz[pos]+=Ez[pos];
			 sumrotHz[pos]+=rotHz;
			 );
}

#undef pos_xp
#undef pos_yp
#undef pos_zp
#undef pos_xpn
#undef pos_ypn
#undef pos_zpn

#undef pos_xm
#undef pos_ym
#undef pos_zm
#undef pos_xmn
#undef pos_ymn
#undef pos_zmn

#undef browse_block_with_rotE
#undef browse_block_with_rotH




static double integ_poynt_x(const varOutputPoynt *p,varBlock *start_block,varBlockList *b)
{
  varBlock *cur_block;
  double res=0;
  int miny,minz,maxy,maxz;
  int px = p->x1 - start_block->off_x;
  int i,j,pos;

  while(p->y2 > start_block->off_y)
    {
      cur_block=start_block;
      miny=MAX(0,p->y1-cur_block->off_y);
      maxy=MIN(cur_block->ny,p->y2-cur_block->off_y);
      while(p->z2 > cur_block->off_z)
	{
#ifdef HAVE_MPI
	  if(cur_block->mpi_node==b->mpi_node)
#endif
	    {
	      minz=MAX(0,p->z1-cur_block->off_z);
	      maxz=MIN(cur_block->nz,p->z2-cur_block->off_z);
	      for(i=miny;i<maxy;i++)
		for(j=minz;j<maxz;j++)
		  {
		    pos=((px*cur_block->ny+i)*cur_block->nz+j);
		    res+=cur_block->Ey[pos]*cur_block->HHz[pos]-cur_block->HHy[pos]*cur_block->Ez[pos];
		  }
	    }
	  cur_block=cur_block->block_zp;
	}
      start_block=start_block->block_yp;
    }
  return res;
}

static double integ_poynt_y(const varOutputPoynt *p,varBlock *start_block,varBlockList *b)
{
  varBlock *cur_block;
  double res=0;
  int minz,minx,maxz,maxx;
  int py = p->y1 - start_block->off_y;
  int i,j,pos;

  while(p->z2 > start_block->off_z)
    {
      cur_block=start_block;
      minz=MAX(0,p->z1-cur_block->off_z);
      maxz=MIN(cur_block->nz,p->z2-cur_block->off_z);
      while(p->x2 > cur_block->off_x)
	{
#ifdef HAVE_MPI
	  if(cur_block->mpi_node==b->mpi_node)
#endif
	    {
	      minx=MAX(0,p->x1-cur_block->off_x);
	      maxx=MIN(cur_block->nx,p->x2-cur_block->off_x);
	      for(i=minz;i<maxz;i++)
		for(j=minx;j<maxx;j++)
		  {
		    pos=((j*cur_block->ny+py)*cur_block->nz+i);
		    res+=cur_block->Ez[pos]*cur_block->HHx[pos]-cur_block->HHz[pos]*cur_block->Ex[pos];
		  }
	    }
	  cur_block=cur_block->block_xp;
	}
      start_block=start_block->block_zp;
    }
  return res;
}

static double integ_poynt_z(const varOutputPoynt *p,varBlock *start_block,varBlockList *b)
{
  varBlock *cur_block;
  double res=0;
  int minx,miny,maxx,maxy;
  int pz = p->z1 - start_block->off_z;
  int i,j,pos;

  while(p->x2 > start_block->off_x)
    {
      cur_block=start_block;
      minx=MAX(0,p->x1-cur_block->off_x);
      maxx=MIN(cur_block->nx,p->x2-cur_block->off_x);
      while(p->y2 > cur_block->off_y)
	{
#ifdef HAVE_MPI
	  if(cur_block->mpi_node==b->mpi_node)
#endif
	    {
	      miny=MAX(0,p->y1-cur_block->off_y);
	      maxy=MIN(cur_block->ny,p->y2-cur_block->off_y);
	      for(i=minx;i<maxx;i++)
		for(j=miny;j<maxy;j++)
		  {
		    pos=((i*cur_block->ny+j)*cur_block->nz+pz);
		    res+=cur_block->Ex[pos]*cur_block->HHy[pos]-cur_block->HHx[pos]*cur_block->Ey[pos];
		  }
	    }
	  cur_block=cur_block->block_yp;
	}
      start_block=start_block->block_xp;
    }
  return res;
}

#ifdef HAVE_MPI

static int field_buflen;
static int cur_send_request;
static double **before_buffer;
static double **after_buffer;
static MPI_Request *before_reqs;
static MPI_Request *after_reqs;

static inline void field_x_pack(double *dest, varBlock *block, double *field)
{
  memcpy(dest,field,block->ny*block->nz*sizeof(double));
}

static inline void field_x_unpack(double *src, varBlock *block, double *field)
{
  memcpy(field,src,block->ny*block->nz*sizeof(double));
}

static inline void field_y_pack(double *dest, varBlock *block, double *field)
{
  int i;
  for(i=0;i<block->nx;i++)
    memcpy(dest+i*block->nz,field+i*block->ny*block->nz,block->nz*sizeof(double));
}

static inline void field_y_unpack(double *src, varBlock *block, double *field)
{
  int i;
  for(i=0;i<block->nx;i++)
    memcpy(field+i*block->ny*block->nz,src+i*block->nz,block->nz*sizeof(double));
}

static inline void field_z_pack(double *dest, varBlock *block, double *field)
{
  int i,j;
  for(i=0;i<block->nx;i++)
    for(j=0;j<block->ny;j++)
      dest[i*block->ny+j]=field[(i*block->ny+j)*block->nz];
}

static inline void field_z_unpack(double *src, varBlock *block, double *field)
{
  int i,j;
  for(i=0;i<block->nx;i++)
    for(j=0;j<block->ny;j++)
      field[(i*block->ny+j)*block->nz]=src[i*block->ny+j];
}

static void initiate_receive(int num, double **buf, MPI_Request *reqs, int tag)
{
  int i;
  for(i=0;i<num;i++)
    MPI_Irecv(buf[i], field_buflen, MPI_DOUBLE, MPI_ANY_SOURCE,
	      tag, MPI_COMM_WORLD, reqs+i);
}

static void receive_H_msgs(varBlockList *b)
{
  varBlock *bl;
  int i;
  int *p;

  // MPI_Waitall(b->mpi_blocks_before, before_reqs, MPI_STATUSES_IGNORE);

  for(i=0;i<b->mpi_blocks_before;i++)
    {
      p=(int *) before_buffer[i];
      bl=b->blocks+p[0];
      switch(p[1])
	{
	case DIR_X:
	  field_x_unpack(before_buffer[i]+1, bl, bl->HHy+(bl->nx-1)*bl->ny*bl->nz);
	  field_x_unpack(before_buffer[i]+1+bl->ny*bl->nz, bl, bl->HHz+(bl->nx-1)*bl->ny*bl->nz);
	  break;
	case DIR_Y:
	  field_y_unpack(before_buffer[i]+1, bl, bl->HHz+(bl->ny-1)*bl->nz);
	  field_y_unpack(before_buffer[i]+1+bl->nz*bl->nx, bl, bl->HHx+(bl->ny-1)*bl->nz);
	  break;
	case DIR_Z:
	  field_z_unpack(before_buffer[i]+1, bl, bl->HHx+bl->nz-1);
	  field_z_unpack(before_buffer[i]+1+bl->nx*bl->ny, bl, bl->HHy+bl->nz-1);
	  break;
	default:
	  panic("Cannot find field direction in MPI message.");
	}
    }
}

static void block_propagate_H(varBlock *bl, varBlock *first_block)
{
  int i;
  int num=bl-first_block;
  int *p;

  if(bl->block_xp->mpi_node != bl->mpi_node)
    {
      i=cur_send_request++;
      p=(int *) after_buffer[i]; // Gruik gruik
                                 // The first 8 bytes are used to store
                                 // block number and direction
      p[0]=num;
      p[1]=DIR_X;
      field_x_pack(after_buffer[i]+1, bl, bl->HHy+(bl->nx-1)*bl->ny*bl->nz);
      field_x_pack(after_buffer[i]+1+bl->ny*bl->nz, bl, bl->HHz+(bl->nx-1)*bl->ny*bl->nz);
      // printf("Node %i sending block %ix to node %i.\n",bl->mpi_node,num,bl->block_xp->mpi_node);
      MPI_Isend(after_buffer[i], 1+2*bl->ny*bl->nz, MPI_DOUBLE, bl->block_xp->mpi_node, TAG_HFIELD, MPI_COMM_WORLD, after_reqs+i);
    }
  if(bl->block_yp->mpi_node != bl->mpi_node)
    {
      i=cur_send_request++;
      p=(int *) after_buffer[i];
      p[0]=num;
      p[1]=DIR_Y;
      field_y_pack(after_buffer[i]+1, bl, bl->HHz+(bl->ny-1)*bl->nz);
      field_y_pack(after_buffer[i]+1+bl->nz*bl->nx, bl, bl->HHx+(bl->ny-1)*bl->nz);
      // printf("Node %i sending block %iy to node %i.\n",bl->mpi_node,num,bl->block_yp->mpi_node);
      MPI_Isend(after_buffer[i], 1+2*bl->nz*bl->nx, MPI_DOUBLE, bl->block_yp->mpi_node, TAG_HFIELD, MPI_COMM_WORLD, after_reqs+i);
    }
  if(bl->block_zp->mpi_node != bl->mpi_node)
    {
      i=cur_send_request++;
      p=(int *) after_buffer[i];
      p[0]=num;
      p[1]=DIR_Z;
      field_z_pack(after_buffer[i]+1, bl, bl->HHx+bl->nz-1);
      field_z_pack(after_buffer[i]+1+bl->nx*bl->ny, bl, bl->HHy+bl->nz-1);
      // printf("Node %i sending block %iz to node %i.\n",bl->mpi_node,num,bl->block_zp->mpi_node);
      MPI_Isend(after_buffer[i], 1+2*bl->nx*bl->ny, MPI_DOUBLE, bl->block_zp->mpi_node, TAG_HFIELD, MPI_COMM_WORLD, after_reqs+i);
    }
}

static void receive_E_msgs(varBlockList *b)
{
  varBlock *bl;
  int i;
  int *p;

  // MPI_Waitall(b->mpi_blocks_after, after_reqs, MPI_STATUSES_IGNORE);

  for(i=0;i<b->mpi_blocks_after;i++)
    {
      p=(int *) after_buffer[i];
      bl=b->blocks+p[0];
      switch(p[1])
	{
	case DIR_X:
	  field_x_unpack(after_buffer[i]+1, bl, bl->Ey);
	  field_x_unpack(after_buffer[i]+1+bl->ny*bl->nz, bl, bl->Ez);
	  break;
	case DIR_Y:
	  field_y_unpack(after_buffer[i]+1, bl, bl->Ez);
	  field_y_unpack(after_buffer[i]+1+bl->nz*bl->nx, bl, bl->Ex);
	  break;
	case DIR_Z:
	  field_z_unpack(after_buffer[i]+1, bl, bl->Ex);
	  field_z_unpack(after_buffer[i]+1+bl->nx*bl->ny, bl, bl->Ey);
	  break;
	default:
	  panic("Cannot find field direction in MPI message.");
	}
    }
}

static void block_propagate_E(varBlock *bl, varBlock *first_block)
{
  int i;
  int num=bl-first_block;
  int *p;

  if(bl->block_xm->mpi_node != bl->mpi_node)
    {
      i=cur_send_request++;
      p=(int *) before_buffer[i];
      p[0]=num;
      p[1]=DIR_X;
      field_x_pack(before_buffer[i]+1, bl, bl->Ey);
      field_x_pack(before_buffer[i]+1+bl->ny*bl->nz, bl, bl->Ez);
      MPI_Isend(before_buffer[i], 1+2*bl->ny*bl->nz, MPI_DOUBLE, bl->block_xm->mpi_node, TAG_EFIELD, MPI_COMM_WORLD, before_reqs+i);
    }
  if(bl->block_ym->mpi_node != bl->mpi_node)
    {
      i=cur_send_request++;
      p=(int *) before_buffer[i];
      p[0]=num;
      p[1]=DIR_Y;
      field_y_pack(before_buffer[i]+1, bl, bl->Ez);
      field_y_pack(before_buffer[i]+1+bl->nz*bl->nx, bl, bl->Ex);
      MPI_Isend(before_buffer[i], 1+2*bl->nz*bl->nx, MPI_DOUBLE, bl->block_ym->mpi_node, TAG_EFIELD, MPI_COMM_WORLD, before_reqs+i);
    }
  if(bl->block_zm->mpi_node != bl->mpi_node)
    {
      i=cur_send_request++;
      p=(int *) before_buffer[i];
      p[0]=num;
      p[1]=DIR_Z;
      field_z_pack(before_buffer[i]+1, bl, bl->Ex);
      field_z_pack(before_buffer[i]+1+bl->nx*bl->ny, bl, bl->Ey);
      MPI_Isend(before_buffer[i], 1+2*bl->nx*bl->ny, MPI_DOUBLE, bl->block_zm->mpi_node, TAG_EFIELD, MPI_COMM_WORLD, before_reqs+i);
    }
}

#endif // HAVE_MPI

int RunSimul(SimulDesc *thesimul)
{
  int max_iteration=thesimul->simul->it_max;
  int iteration,i;
  float time_begin,time_end;
  varBlockList *b=index_array_to_blocks_with_pml(thesimul);
  varBlock *curBlock;
  double cdtdx=CELERITY*thesimul->simul->delta_t/thesimul->space->delta_x;
  int total_cells=thesimul->space->nx*thesimul->space->ny*thesimul->space->nz;

  varBlock *inj_block=search_block(b,thesimul->inj->x,thesimul->inj->y,thesimul->inj->z);
  int inj_pos_in_block=offset_in_block(inj_block,thesimul->inj->x,thesimul->inj->y,thesimul->inj->z);

  double *output_buffer=NULL;
  double *field_buf=NULL; // A temporary buffer to store the
                          // field in a block. Only for MPI.

  int nb_ponctoutput=thesimul->output->nb_poncts;
  varBlock **ponct_out_blocks=myalloc(nb_ponctoutput*sizeof(varBlock *));
  int *ponct_out_offsets=myalloc(nb_ponctoutput*sizeof(int));
  varOutputPonct *tmp_ponct;

  int nb_poyntoutput=thesimul->output->nb_poynts;
  varBlock **poynt_out_init_blocks=myalloc(nb_poyntoutput*sizeof(varBlock *));
  varOutputPoynt *tmp_poynt;

#ifdef HAVE_MPI
  double **mpi_buffers;
  MPI_Request *mpi_reqs;

  // The maximum send buffer we'll need consists in 2 fields for
  // each allocated block.
  field_buflen=2*SQUARE(MAX(thesimul->space->num_pml,thesimul->space->block_size))+1;
  // We allocate 8 extra bytes at the beginning for the 2 ints
  // (block number and direction)
  
  mpi_buffers=myalloc((b->mpi_blocks_before+b->mpi_blocks_after)*sizeof(double *));
  before_buffer=mpi_buffers;
  after_buffer=mpi_buffers+b->mpi_blocks_before;
  mpi_reqs=myalloc((b->mpi_blocks_before+b->mpi_blocks_after)*sizeof(MPI_Request));
  before_reqs=mpi_reqs;
  after_reqs=mpi_reqs+b->mpi_blocks_before;
  for(i=0;i<b->mpi_blocks_before;i++)
    before_buffer[i]=myalloc(field_buflen*sizeof(double));
  for(i=0;i<b->mpi_blocks_after;i++)
    after_buffer[i]=myalloc(field_buflen*sizeof(double));
#endif // HAVE_MPI

  // Only allocate output buffer on first node.
  if(thesimul->output->nb_cartos)
    {
#ifdef HAVE_MPI
      if(b->mpi_node==0)
	{
	  field_buf=myalloc(CUBE(MAX(thesimul->space->num_pml,thesimul->space->block_size))*sizeof(double));
	  output_buffer=myalloc(total_cells*sizeof(double));
	}
#else // HAVE_MPI
      output_buffer=myalloc(total_cells*sizeof(double));
#endif // HAVE_MPI
    }

  for(i=0;i<nb_ponctoutput;i++)
    {
      tmp_ponct = &(thesimul->output->poncts[i]);
      ponct_out_blocks[i]=search_block(b,tmp_ponct->x,tmp_ponct->y,tmp_ponct->z);
      ponct_out_offsets[i]=offset_in_block(ponct_out_blocks[i],tmp_ponct->x,tmp_ponct->y,tmp_ponct->z);
    }

  for(i=0;i<nb_poyntoutput;i++)
    {
      tmp_poynt = &(thesimul->output->poynts[i]);
      poynt_out_init_blocks[i] = search_block(b,tmp_poynt->x1,tmp_poynt->y1,tmp_poynt->z1);
    }

  time_begin=get_time();
  for(iteration=0;iteration<max_iteration;iteration++)
    {
      if(
#ifdef HAVE_MPI
	 b->mpi_node == 0 &&
#endif
	 iteration%10 == 0 )
	{
	  printf("\rIteration #%i --- %i %% completed",iteration,iteration*100/max_iteration);
	  fflush(stdout);
	}

      // Dipole injection
#ifdef HAVE_MPI
      if(b->mpi_node == inj_block->mpi_node)
#endif
	{
	  inj_block->Ex[inj_pos_in_block] += thesimul->inj->dir_x*thesimul->inj->injection[iteration];
	  inj_block->Ey[inj_pos_in_block] += thesimul->inj->dir_y*thesimul->inj->injection[iteration];
	  inj_block->Ez[inj_pos_in_block] += thesimul->inj->dir_z*thesimul->inj->injection[iteration];
	}

      // iterate H
#ifdef HAVE_MPI
      cur_send_request=0;
      initiate_receive(b->mpi_blocks_before, before_buffer, before_reqs, TAG_HFIELD);
      // Go in the reverse order so that the field to be sent
      // is computed first
      for(i=b->mpi_endblock;i-->b->mpi_startblock;)
#else
      for(i=0;i<b->n;i++)
#endif
	{
	  curBlock=b->blocks+i;
	  if(curBlock->PML==NULL)
	    iter_H_plain_block(curBlock);
	  else
	    {
	      switch(curBlock->PML->block_type)
		{
		case BLOCK_PML_FACE_X:
		  iter_H_PML_x_block(curBlock);
		  break;
		case BLOCK_PML_FACE_Y:
		  iter_H_PML_y_block(curBlock);
		  break;
		case BLOCK_PML_FACE_Z:
		  iter_H_PML_z_block(curBlock);
		  break;
		default:
		  iter_H_PML_block(curBlock);
		}
	    }
#ifdef HAVE_MPI
	  block_propagate_H(curBlock,b->blocks);
#endif
	}
#ifdef HAVE_MPI
      MPI_Waitall(b->mpi_blocks_after+b->mpi_blocks_before, mpi_reqs, MPI_STATUSES_IGNORE);
      receive_H_msgs(b);
      // MPI_Barrier(MPI_COMM_WORLD);
#endif
      
      // iterate E
#ifdef HAVE_MPI
      cur_send_request=0;
      initiate_receive(b->mpi_blocks_after, after_buffer, after_reqs, TAG_EFIELD);
      for(i=b->mpi_startblock;i<b->mpi_endblock;i++)
#else
      for(i=0;i<b->n;i++)
#endif
	{
	  curBlock=b->blocks+i;
	  if(curBlock->PML==NULL)
	    {
	      if(curBlock->absor==NULL)
		iter_E_plain_block(curBlock);
	      else
		iter_E_abs_block(curBlock);
	    }
	  else
	    {
	      switch(curBlock->PML->block_type)
		{
		case BLOCK_PML_FACE_X:
		  iter_E_PML_x_block(curBlock);
		  break;
		case BLOCK_PML_FACE_Y:
		  iter_E_PML_y_block(curBlock);
		  break;
		case BLOCK_PML_FACE_Z:
		  iter_E_PML_z_block(curBlock);
		  break;
		default:
		  iter_E_PML_block(curBlock);
		}
	    }
#ifdef HAVE_MPI
	  block_propagate_E(curBlock,b->blocks);
#endif
	}
#ifdef HAVE_MPI
      MPI_Waitall(b->mpi_blocks_before+b->mpi_blocks_after, mpi_reqs, MPI_STATUSES_IGNORE);
      receive_E_msgs(b);
      // MPI_Barrier(MPI_COMM_WORLD);
#endif

      // Cartography output
      for(i=0;i<thesimul->output->nb_cartos;i++)
	if(iteration>=thesimul->output->cartos[i].begin && iteration<thesimul->output->cartos[i].end && (iteration-thesimul->output->cartos[i].begin)%thesimul->output->cartos[i].interv==0)
	  switch(thesimul->output->cartos[i].field)
	    {
	    case FIELD_E:
	      output_carto(b, thesimul->space, iteration, max_iteration, "E", FIELD_E, 1, output_buffer, field_buf);
	      break;
	    case FIELD_H:
	      output_carto(b, thesimul->space, iteration, max_iteration, "H", FIELD_H, cdtdx, output_buffer, field_buf);
	      break;
	    default:
	      panic("unknown field");
	    }

      // Ponctual output
      for(i=0;i<nb_ponctoutput;i++)
#ifdef HAVE_MPI
	if(ponct_out_blocks[i]->mpi_node==b->mpi_node)
#endif
	  {
	    tmp_ponct = &(thesimul->output->poncts[i]);
	    switch(tmp_ponct->field)
	      {
	      case FIELD_E:
		tmp_ponct->valx[iteration]=ponct_out_blocks[i]->Ex[ponct_out_offsets[i]];
		tmp_ponct->valy[iteration]=ponct_out_blocks[i]->Ey[ponct_out_offsets[i]];
		tmp_ponct->valz[iteration]=ponct_out_blocks[i]->Ez[ponct_out_offsets[i]];
		break;
	      case FIELD_H:
		tmp_ponct->valx[iteration]=ponct_out_blocks[i]->HHx[ponct_out_offsets[i]]*cdtdx;
		tmp_ponct->valy[iteration]=ponct_out_blocks[i]->HHy[ponct_out_offsets[i]]*cdtdx;
		tmp_ponct->valz[iteration]=ponct_out_blocks[i]->HHz[ponct_out_offsets[i]]*cdtdx;
		break;
	      }
	  }
      
      // Poynting output
      for(i=0;i<nb_poyntoutput;i++)
	{
	  tmp_poynt = &(thesimul->output->poynts[i]);
	  switch(tmp_poynt->dir)
	    {
	    case DIR_X:
	      tmp_poynt->val[iteration]=integ_poynt_x(tmp_poynt,poynt_out_init_blocks[i],b);
	      break;
	    case DIR_Y:
	      tmp_poynt->val[iteration]=integ_poynt_y(tmp_poynt,poynt_out_init_blocks[i],b);
	      break;
	    case DIR_Z:
	      tmp_poynt->val[iteration]=integ_poynt_z(tmp_poynt,poynt_out_init_blocks[i],b);
	      break;
	    }
	}
    }
  // End of the main loop
  time_end=get_time();
#ifdef HAVE_MPI
  if(b->mpi_node == 0)
#endif
    printf("\rSimulation took %.2f seconds.                   \nEach of the %i iterations took %.3f seconds.\n",time_end,max_iteration,(time_end-time_begin)/max_iteration);
  free(output_buffer);
  
  // Ponctual output
  for(i=0;i<nb_ponctoutput;i++)
    {
      char filename[FILENAME_BUF];
      tmp_ponct = &(thesimul->output->poncts[i]);
      
#ifdef HAVE_MPI
      // MPI_Barrier(MPI_COMM_WORLD);
      if(b->mpi_node)
	{
	  if(ponct_out_blocks[i]->mpi_node==b->mpi_node)
	    {
	      MPI_Send(tmp_ponct->valx, max_iteration, MPI_DOUBLE, 0,
		       TAG_PONCTOUT, MPI_COMM_WORLD);
	      MPI_Send(tmp_ponct->valy, max_iteration, MPI_DOUBLE, 0,
		       TAG_PONCTOUT, MPI_COMM_WORLD);
	      MPI_Send(tmp_ponct->valz, max_iteration, MPI_DOUBLE, 0,
		       TAG_PONCTOUT, MPI_COMM_WORLD);
	    }
	  continue;
	}
      else if(ponct_out_blocks[i]->mpi_node)
	{
	  MPI_Recv(tmp_ponct->valx, max_iteration, MPI_DOUBLE,
		   ponct_out_blocks[i]->mpi_node, 
		   TAG_PONCTOUT, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	  MPI_Recv(tmp_ponct->valy, max_iteration, MPI_DOUBLE,
		   ponct_out_blocks[i]->mpi_node, 
		   TAG_PONCTOUT, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	  MPI_Recv(tmp_ponct->valz, max_iteration, MPI_DOUBLE,
		   ponct_out_blocks[i]->mpi_node, 
		   TAG_PONCTOUT, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	}
#endif // HAVE_MPI
      if(snprintf(filename,FILENAME_BUF-1,"%c_%i_%i_%i",(tmp_ponct->field==FIELD_E)?'E':'H',tmp_ponct->x,tmp_ponct->y,tmp_ponct->z)<0)
	perror("ponct_output");
      else
	output_datfile3_fft(tmp_ponct->valx,tmp_ponct->valy,tmp_ponct->valz,max_iteration,filename,thesimul->simul->delta_t);
    }
  free(ponct_out_blocks);
  free(ponct_out_offsets);

  // Poynting output
  free(poynt_out_init_blocks);
  for(i=0;i<nb_poyntoutput;i++)
    {
      char filename[FILENAME_BUF];
      tmp_poynt = &(thesimul->output->poynts[i]);
      int res=-1;

#ifdef HAVE_MPI
      double *tmpdata=NULL;

      if(b->mpi_node==0)
	tmpdata=myalloc(max_iteration*sizeof(double));

      MPI_Reduce(tmp_poynt->val, tmpdata, max_iteration, MPI_DOUBLE,
		 MPI_SUM, 0, MPI_COMM_WORLD);

      if(b->mpi_node==0)
	{
	  memcpy(tmp_poynt->val, tmpdata, max_iteration*sizeof(double));
	  free(tmpdata);
	}
      else
	continue;
#endif // HAVE_MPI
      
      switch(tmp_poynt->dir)
	{
	case DIR_X:
	  res=snprintf(filename,FILENAME_BUF-1,"poynt%i_x=%i",i,tmp_poynt->x1);
	  break;
	case DIR_Y:
	  res=snprintf(filename,FILENAME_BUF-1,"poynt%i_y=%i",i,tmp_poynt->y1);
	  break;
	case DIR_Z:
	  res=snprintf(filename,FILENAME_BUF-1,"poynt%i_z=%i",i,tmp_poynt->z1);
	  break;
	}
      if(res<0)
	perror("poynt_output");
      else
	{
	  int cons=1;
	  if(thesimul->output->total_loss && i<6 && i%2==0)
	    cons=-1;
	  output_datfile_const(tmp_poynt->val,max_iteration,filename,thesimul->simul->delta_t,cons*cdtdx*SQUARE(thesimul->space->delta_x));
	}
    }
  if(thesimul->output->total_loss
#ifdef HAVE_MPI
     && b->mpi_node==0
#endif
     )
    {
      double *t=myalloc(max_iteration*sizeof(double));
      for(i=0;i<max_iteration;i++)
	t[i]=-thesimul->output->poynts[0].val[i]+thesimul->output->poynts[1].val[i]
	    -thesimul->output->poynts[2].val[i]+thesimul->output->poynts[3].val[i]
	  -thesimul->output->poynts[4].val[i]+thesimul->output->poynts[5].val[i];
      
      output_datfile_const(t,max_iteration,"poynt_total",thesimul->simul->delta_t,cdtdx*SQUARE(thesimul->space->delta_x));
      free(t);
    }

#ifdef HAVE_MPI
  for(i=0;i<b->mpi_blocks_before;i++)
    free(before_buffer[i]);
  for(i=0;i<b->mpi_blocks_after;i++)
    free(after_buffer[i]);
  free(mpi_buffers);
  free(mpi_reqs);
#endif
  destroy_blocklist(b);
  return 0;
}
