// Implementation of elliptic curves method (ecm) V1.34
// written by Thorsten Reinecke, 1999-06-23
// last change: 2004-10-28

/*! @file
 * @brief
 * contains implementation of elliptic curves
 */

#include "elliptic_curve.H"
#include "modulo.H"
#include <cmath>
#include "mpz_multi_invert.cc"

#include "PersistentData.H"

#if (CONTINUATION_METHOD > 0)
 extern void get_fft_parameter(unsigned int &pg_returnvalue, unsigned int &pms_returnvalue,
                               const double phase2, const mpz_t N);
#endif

#ifdef REACT_ON_SIGUSR
 #include "usr_signals.H"
 extern Cusr_signal_proxy USRSignalHandler;
#endif


// Bemerkung:
// Die ursprngliche Fassung der Weierstrass-Form y^2=x^2+a*x+b
// wurde in dieser Version zugunsten der Montgomery-Form B*y^2=x^3+A*x^2+x
// modifiziert.

// default: Phase 1 wird nun in der Form (x::z) durchlaufen,
// erst fr Phase 2 wird in die Form (x:y:1) konvertiert.
// Das alte Verhalten kann durch Setzen des Defines SLOW_WEIERSTRASS
// aktiviert werden.

// Phase 2 (in improved continuation) ist nun deutlich schneller.
// Hier ist die Form (x:y:1) ideal, da die Punkte mit mit nur
// einer Subtraktion+Multiplikation aufgesammelt werden knnen und
// nur eine aufwendige Berechnung (gcd zum Faktortest + Berechnung
// der nchsten Intervallgrenze) pro Intervall anfllt.
// Die Form (x::z) wre hier aufwendiger.

// Phase 2 (fft continuation) ist asymptotisch schneller
// als die improved continuation. Das liegt daran, dass die innerhalb
// der Suchintervalle liegenden Punkte als Nullstellen eines Polynoms
// interpretiert werden knnen. Wenn nun viele Suchintervalle berprft
// werden mssen, knnen asymptotisch schnelle Verfahren angewandt werden,
// die Polynome in mehreren Punkten zugleich auswerten.
// Die aktuelle Implementierung der fft-continuation ist noch nicht ausgereift,
// zeigt sich aber bereits jetzt fr grere Suchrume als berlegen.
// Nachteil: hoher Speicherbedarf



void elliptic_curves::check_curve(const mpz_t x, const mpz_t y) const
{
  //cerr << "------- validating elliptic curve... ------------" << endl;
  // checking, whether (x,y) is a valid point on the curve
  // b*y^2 == x^3 + a*x^2 + x^2 (Montgomery form)
  mpz_t x1,y1;
  mpz_init(x1); mpz_init(y1);
  mpz_add(x1,x,a); mpz_mul(x1,x1,x); mpz_add_ui(x1,x1,1);
  mpz_mul(x1,x1,x); mpz_mod(x1,x1,n);
  mpz_mul(y1,y,y); mpz_mul(y1,y1,b); mpz_mod(y1,y1,n);
  const int fl = mpz_cmp(x1,y1);
  mpz_clear(x1); mpz_clear(y1);
  if (fl!=0)
   {
     cerr << "tested point is invalid!" << endl;
     cerr << "(x:y:1)=(" << x << ":" << y << ":1)" << endl;
     exit(1);
   }
}


void elliptic_curves::XZ_mul2(mpz_t xr, mpz_t zr,
                              const mpz_t x1, const mpz_t z1)
{
  // to compute:
  // xr=(x^2-z^2)^2
  // zr=4*x*z*(x^2+A*x*z+z^2)

  // fast version:
  // xr=(x^2-z^2)^2=((x+z)*(x-z))^2=(x+z)^2*(x-z)^2
  // 4*x*z=(x+z)^2-(x-z)^2
  // zr/(4*x*z)=x^2+A*x*z+z^2 = (x-z)^2 + 2*x*z + a*x*z = (x-z)^2+(a+2)*x*z
  // und wenn wir b=(a+2)/4 setzen, gilt:
  // zr=4*x*z * ( (x-z)^2 + b*4*x*z )
  // IMPORTANT: b=(a+2)/4 must be set globally (beforehand) !!!
  
  mpz_add(h,x1,z1); mpz_mulmod(h,h,h,n); // h=(x1+z1)^2
  mpz_sub(k,x1,z1); mpz_mulmod(k,k,k,n); // k=(x1-z1)^2
  mpz_mulmod(xr,h,k,n); // xr=h*k
 
  mpz_sub(m,h,k); // m=h-k=4*x1*z1
  mpz_mulmod(h,b,m,n); // h=b*m
  mpz_add(k,k,h); // k=(x1-z1)^2+b*m
  mpz_mulmod(zr,k,m,n); // zr=4*x1*z1*k
}
const int COST_XZ_mul2 = 5; // cost of the above operation (measured in mpz_mul's)

void elliptic_curves::XZ_mul2plus1(mpz_t xr, mpz_t zr,
                                   const mpz_t Xp0, const mpz_t Zp0,
                                   const mpz_t Xp1, const mpz_t Zp1,
                                   const mpz_t x1, const mpz_t z1)
{
  // fast version
  mpz_sub(h,Xp1,Zp1); mpz_add(m,Xp0,Zp0);
  mpz_mulmod(h,h,m,n); // h=(Xp1-Zp1)*(Xp0+Zp0)
  mpz_add(k,Xp1,Zp1); mpz_sub(m,Xp0,Zp0);
  mpz_mulmod(k,k,m,n); // k=(Xp1+Zp1)*(Xp0-Zp0)

  // now compute: m=(h+k)^2, h=(h-k)^2
  //              xr=z1*m,   zr=x1*h
  mpz_add(m,h,k); mpz_sub(h,h,k);
  mpz_mulmod(m,m,m,n); mpz_mul(m,m,z1); // kein mulmod wg. Parameterkonsistenz!
  mpz_mulmod(h,h,h,n); mpz_mul(h,h,x1);
  modulo(xr,m,n); // xr=z1*m
  modulo(zr,h,n); // zr=x1*h
}
const int COST_XZ_mul2plus1 = 6; // cost of the above operation (measured in mpz_mul's)


#if 0 /* 1->binary (standard) algorithm for multiplication */
      /* 0->Montgomery's PRAC(tical) algorithm (which is faster!) */
void elliptic_curves::XZ_multiply(mpz_t xr, mpz_t zr,
                                  const mpz_t x1, const mpz_t z1,
                                  NATNUM K)
{
#ifdef DEBUG_ECM
  cout << "XZ_multiply " << K << " -> " << endl;
#endif
  
  bool s[100];
  signed int i=0;

  K>>=1;
  while (K>1)
   {
     s[i++]=(K&1);
     K>>=1;
   }
#ifdef DEBUG_ECM  
  int value=1, valuep1=2;
#endif
  mpz_t Xp0,Zp0,Xp1,Zp1;
  mpz_init_set(Xp0,x1); mpz_init_set(Zp0,z1);
  mpz_init(Xp1); mpz_init(Zp1);
  XZ_mul2(Xp1,Zp1,x1,z1);

  while (--i>=0)
   {
     if (s[i])
      {
#ifdef DEBUG_ECM
        value=value+valuep1; valuep1=valuep1+valuep1;
#endif
        XZ_mul2plus1(Xp0,Zp0,Xp0,Zp0,Xp1,Zp1,x1,z1);
        XZ_mul2(Xp1,Zp1,Xp1,Zp1);
      }
     else
      {
#ifdef DEBUG_ECM
        valuep1=value+valuep1; value=value+value;
#endif
        XZ_mul2plus1(Xp1,Zp1,Xp0,Zp0,Xp1,Zp1,x1,z1);
        XZ_mul2(Xp0,Zp0,Xp0,Zp0);
      }
     //cout << i << ". " << s[i] << " " << value << "," << valuep1 << endl;
   }
  
  // let's assume that K was an odd value, then we have:
  // result = value+valuep1
#ifdef DEBUG_ECM
  cout << "value=" << value+valuep1 << endl;
#endif
  XZ_mul2plus1(xr,zr,Xp0,Zp0,Xp1,Zp1,x1,z1);

  mpz_clear(Xp0); mpz_clear(Zp0);
  mpz_clear(Xp1); mpz_clear(Zp1);
}

#else

// two simple procedures for swapping values:
inline void swap(PmpzPoint &A, PmpzPoint &B)
{
  PmpzPoint H;
  H=A; A=B; B=H;
}

inline void swap(PmpzPoint &A, PmpzPoint &B, PmpzPoint &C)
{
  PmpzPoint H;
  H=A; A=B; B=C; C=H;
}


const unsigned int elliptic_curves::cost_of_evaluating_lucas_chain(const NATNUM K, const double alpha)
{
  // like PRAC, but this one only counts the addition steps for a
  // particular alpha

  const NATNUM r=static_cast<NATNUM>(rint(K/alpha));
  register unsigned int COST=0;

  //cout << "PRAC-init (COST)" << endl;
     
     COST+=COST_XZ_mul2;
     NATNUM d=K-r;
     NATNUM e=2*r-K;

     while (d!=e)
      {
        //cout << "inner loop: " << d << "," << e << endl;
        if (d<e)
         { 
           std::swap(e,d);
         }
        // now: d>=e

        if ( ((d+e)%3==0) && (d*4<=e*5) )
         {
           // condition 1
           const NATNUM d_new=(2*d-e)/3;
           e=(2*e-d)/3; d=d_new;
           COST+=3*COST_XZ_mul2plus1;
           continue;
         }
        if ( ((d-e)%6==0) && (d*4<=e*5) )
         {
           // condition 2
           d=(d-e)>>1;
           COST+=COST_XZ_mul2plus1 + COST_XZ_mul2;
           continue;
         }
        if (d<=4*e)
         {
           // condition 3
           d=d-e;
           COST+=COST_XZ_mul2plus1;
           continue;
         }
        if ( ((d-e)&1)==0 )
         {
           // condition 4
           d=(d-e)>>1;
           COST+=COST_XZ_mul2plus1 + COST_XZ_mul2;
           continue;
         }
        if ((d&1)==0)
         {
           // condition 5
           d>>=1;
           COST+=COST_XZ_mul2plus1 + COST_XZ_mul2;
           continue;
         }
        if (d%3==0)
         {
           // condition 6
           d=(d/3)-e;
           COST+=COST_XZ_mul2 + 3*COST_XZ_mul2plus1;
           continue;
         }
        if ((d+e)%3==0)
         {
           // condition 7
           d=(d-2*e)/3;
           COST+=COST_XZ_mul2 + 3*COST_XZ_mul2plus1;
           continue;
         }
        if ((d-e)%3==0)
         {
           // condition 8
           d=(d-e)/3;
           COST+=COST_XZ_mul2 + 3*COST_XZ_mul2plus1;
           continue;
         }
        if ((e&1)==0)
         {
           // condition 9
           e>>=1;
           COST+=COST_XZ_mul2plus1 + COST_XZ_mul2;
           continue;
         }
        cerr << "Error in PRAC-Algorithm" << endl;
        exit(1);
      }
     COST+=COST_XZ_mul2plus1;

  //cout << "PRAC-exit (COST)" << endl;
  return COST;
}
 
/* Peter L. Montgomery's PRAC-Algorithm */
void elliptic_curves::XZ_multiply(mpz_t xr, mpz_t zr,
                                  const mpz_t x1, const mpz_t z1,
                                  NATNUM K)
{
#ifdef DEBUG_ECM
  cout << "XZ_multiply (PRAC)" << K << " -> " << endl;
#endif
  
  static const double alpha_continued_fraction [25] = 
   {
     /* decimal value          ,   continued fraction form                  */
     1.618033988749894848204587, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.723606797749978969640917, /* 1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.381966011250105151795413, /* 1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.580178728295464104708674, /* 1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.632839806088706285425954, /* 1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.612429949509495001921461, /* 1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.617214616534403862663845, /* 1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.620181980807415764823945, /* 1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.618347119656228057976350, /* 1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.617914406528817893862475, /* 1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.618079668469895811954673, /* 1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.618016541142025285987741, /* 1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.618040653214942998209496, /* 1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.618031443161248228921464, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.618034961079766208915596, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,... */
     1.618033617353155449748868, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,... */
     1.618034130610858549698459, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,... */
     1.618033934563833141806412, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,... */
     1.618034009447129396675689, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,... */
     1.618033980844254824942950, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,... */
     1.618033991769580649023070, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,... */
     1.618033987596477509789958, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,... */
     1.618033989190461068579593, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,... */
     1.618033988581613526362231, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,... */
     1.618033988814172593483292, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,... */
   };

  static unsigned int first_best [25] =
   {          1,        11,        23,       103,       173,
            367,       491,       673,      1433,      4871,
          11311,     25873,     67057,    173669,    401393,
        1039387,   2774557,   7121347,  18732257,  48650851,
      126814879, 333390199, 867971879,         0,         0 };

  unsigned int best_i = 0; // (initial) Index of alpha (with minimum cost)

//----------------------------------------------------------------------------
#if 0 /* only needed to get first_best values during implementation phase */
  {
    cout << "PRAC-implementation: searching for optimal lucas chains..." << endl;
    for (unsigned K=3; K<4000000000; K+=2) if (probab_prime(K))
     {
       // searching for an alpha giving a short lucas chain...
       unsigned int best_i = 0; // (initial) Index of alpha (with minimum cost)
       unsigned int best_cost=cost_of_evaluating_lucas_chain(K,alpha_continued_fraction[0]);
       for (unsigned int i=1; i<25; ++i)
        {
          const unsigned int cost=cost_of_evaluating_lucas_chain(K,alpha_continued_fraction[i]);
          if (cost<best_cost) { best_cost=cost; best_i=i; }
        }
      
       static unsigned int index = 0;
       if (best_i>index)
        {
          index=best_i;
          cout << endl << "K[" << best_i << "]=" << K << endl;
        }
     }
    exit(0);
    /* note:
      if you find a better order or better values for
      "alpha_continued_fraction" or "first_best" let me know...
      I have not spend much time to find these ones ;)
    */  
  }
#endif
//----------------------------------------------------------------------------


#if 1 /* following loop tries to optimize lucas chain, (may be left out) */
  {
    // searching for an alpha giving a short lucas chain...
    unsigned int best_cost=cost_of_evaluating_lucas_chain(K,alpha_continued_fraction[0]);
    for (unsigned int i=1; K>=first_best[i] && i<23; ++i)
     {
       const unsigned int cost=cost_of_evaluating_lucas_chain(K,alpha_continued_fraction[i]);
       if (cost<best_cost) { best_cost=cost; best_i=i; }
     }
  }
#endif

 
  mpz_set(A->x,x1); mpz_set(A->z,z1);
  //cout << "PRAC-init" << endl;

  const NATNUM r=static_cast<NATNUM>(rint(K/alpha_continued_fraction[best_i]));
  // if a better value for r(p) is known, use it instead
     
     mpz_set(B->x,A->x); mpz_set(B->z,A->z);
     mpz_set(C->x,A->x); mpz_set(C->z,A->z);
     XZ_mul2(A,A);
     NATNUM d=K-r;
     NATNUM e=2*r-K;

     while (d!=e)
      {
        //cout << "inner loop: " << d << "," << e << endl;
        if (d<e)
         { 
           std::swap(e,d);
           swap(A,B);
         }
        // now: d>=e

        if ( ((d+e)%3==0) && (d*4<=e*5) )
         {
           // condition 1
           const NATNUM d_new=(2*d-e)/3;
           e=(2*e-d)/3; d=d_new;
           XZ_mul2plus1(T1,A,B,C);
           XZ_mul2plus1(T2,T1,A,B);
           XZ_mul2plus1(B,B,T1,A);
           swap(A,T2);
           continue;
         }
        if ( ((d-e)%6==0) && (d*4<=e*5) )
         {
           // condition 2
           d=(d-e)>>1;
           XZ_mul2plus1(B,A,B,C);
           XZ_mul2(A,A);
           continue;
         }
        if (d<=4*e)
         {
           // condition 3
           d=d-e;
           XZ_mul2plus1(T1,B,A,C);
           swap(B,T1,C);
           continue;
         }
        if ( ((d-e)&1)==0 )
         {
           // condition 4
           d=(d-e)>>1;
           XZ_mul2plus1(B,B,A,C);
           XZ_mul2(A,A);
           continue;
         }
        if ((d&1)==0)
         {
           // condition 5
           d>>=1;
           XZ_mul2plus1(C,C,A,B);
           XZ_mul2(A,A);
           continue;
         }
        if (d%3==0)
         {
           // condition 6
           d=(d/3)-e;
           XZ_mul2(T1,A);
           XZ_mul2plus1(T2,A,B,C);
           XZ_mul2plus1(A,T1,A,A);
           XZ_mul2plus1(T1,T1,T2,C);
           swap(C,B,T1);
           continue;
         }
        if ((d+e)%3==0)
         {
           // condition 7
           d=(d-2*e)/3;
           XZ_mul2plus1(T1,A,B,C);
           XZ_mul2plus1(B,T1,A,B);
           XZ_mul2(T1,A); XZ_mul2plus1(A,A,T1,A);
           continue;
         }
        if ((d-e)%3==0)
         {
           // condition 8
           d=(d-e)/3;
           XZ_mul2plus1(T1,A,B,C);
           XZ_mul2plus1(C,C,A,B);
           swap(B,T1);
           XZ_mul2(T1,A); XZ_mul2plus1(A,A,T1,A);
           continue;
         }
        if ((e&1)==0)
         {
           // condition 9
           e>>=1;
           XZ_mul2plus1(C,C,B,A);
           XZ_mul2(B,B);
           continue;
         }
        cerr << "Error in PRAC-Algorithm" << endl;
        exit(1);
      }
     XZ_mul2plus1(A,A,B,C);

  //cout << "PRAC-exit" << endl;
  mpz_set(xr,A->x); mpz_set(zr,A->z);
}
#endif


bool elliptic_curves::mul2(mpz_t xr, mpz_t yr, const mpz_t x1, const mpz_t y1)
{
  // !!! IMPORTANT: Don't use NResidues in this function !!!
  mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n);
  if (!mpz_invert(k,k,n))
    {
      mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n); mpz_gcd(k,k,n);
      factor_found(k);
      return true; // we're finished with this curve
    }
  mpz_mul(m,x1,x1); mpz_mod(m,m,n); mpz_mul_ui(m,m,3);
  mpz_mul(h,a,x1); mpz_mul_ui(h,h,2); mpz_add(m,m,h); mpz_add_ui(m,m,1);
  mpz_mul(m,m,k); mpz_mod(m,m,n);
  mpz_mul(x3,m,m); mpz_mul(x3,x3,b); mpz_sub(x3,x3,a); mpz_sub(x3,x3,x1); mpz_sub(x3,x3,x1); mpz_mod(x3,x3,n);
  mpz_sub(y3,x1,x3); mpz_mul(y3,y3,m); mpz_sub(y3,y3,y1); mpz_mod(yr,y3,n);
  mpz_set(xr,x3);
#ifdef DEBUG_ECM
  cout << "mul2 " << xr << "," << yr << endl;
  check_curve(xr,yr);
#endif
  return false; // no factor found
}

bool elliptic_curves::sub(mpz_t xr, mpz_t yr,
                          const mpz_t x1, const mpz_t y1,
                          const mpz_t x2, const mpz_t y2)
{
  bool flag;
  mpz_t y2_inv;

  mpz_init(y2_inv);
  mpz_neg(y2_inv, y2);
  mpz_mod(y2_inv, y2_inv, n);
  flag = add(xr,yr,x1,y1,x2,y2_inv);
  mpz_clear(y2_inv);
  return flag;
}


bool elliptic_curves::add(mpz_t xr, mpz_t yr,
                          const mpz_t x1, const mpz_t y1,
                          const mpz_t x2, const mpz_t y2)
{
  // !!! IMPORTANT: Don't use NResidues in this function !!!
  if (mpz_cmp(x1,x2)) // x1!=x2
   {
     mpz_sub(k,x2,x1); mpz_mod(k,k,n);
     if (!mpz_invert(k,k,n))
      {
        mpz_sub(k,x2,x1); mpz_mod(k,k,n); mpz_gcd(k,k,n);
        factor_found(k);
        return true; // we're finished with this curve
      }
     mpz_sub(m,y2,y1); mpz_mul(m,m,k); mpz_mod(m,m,n);
     mpz_mul(x3,m,m); mpz_mul(x3,x3,b); mpz_sub(x3,x3,a); mpz_sub(x3,x3,x1); mpz_sub(x3,x3,x2); mpz_mod(x3,x3,n);
     mpz_sub(y3,x1,x3); mpz_mul(y3,y3,m); mpz_sub(y3,y3,y1); mpz_mod(y3,y3,n);
   }
  else // x1==x2
   {
     if (mpz_cmp(y1,y2)==0) // y1==y2
       {
         mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n);
         if (!mpz_invert(k,k,n))
          {
            mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n); mpz_gcd(k,k,n);
            factor_found(k);
            return true; // we're finished with this curve
          }
	 mpz_mul(m,x1,x1); mpz_mod(m,m,n); mpz_mul_ui(m,m,3);
         mpz_mul(h,a,x1); mpz_mul_ui(h,h,2); mpz_add(m,m,h);
         mpz_add_ui(m,m,1);
         mpz_mul(m,m,k); mpz_mod(m,m,n);
         mpz_mul(x3,m,m); mpz_mul(x3,x3,b); mpz_sub(x3,x3,a); mpz_sub(x3,x3,x1); mpz_sub(x3,x3,x2); mpz_mod(x3,x3,n);
         mpz_sub(y3,x1,x3); mpz_mul(y3,y3,m); mpz_sub(y3,y3,y1); mpz_mod(y3,y3,n);
       }
      else // y1!=y2
       {
         cout << "Elliptic curves: feature not implemented." << endl;
         return true;
       } 
   }

  mpz_set(xr,x3); mpz_set(yr,y3);
#ifdef DEBUG_ECM
  cout << "add" << endl;
  check_curve(xr,yr);
#endif
  return false; // no factor found
}

bool elliptic_curves::add(mpz_t xr, mpz_t yr,
                          const mpz_t x1, const mpz_t y1,
                          const mpz_t x2, const mpz_t y2,
                          mpz_t k)
{
  // remark: this "add" is analogous to the original function,
  // but we have an additional "k" as last argument (which is already inverted),
  // so that we do not need to compute "k".

  // !!! IMPORTANT: Don't use NResidues in this function !!!
  if (mpz_cmp(x1,x2)) // x1!=x2
   {
     mpz_sub(m,y2,y1); mpz_mul(m,m,k); mpz_mod(m,m,n);
     mpz_mul(x3,m,m); mpz_mul(x3,x3,b); mpz_sub(x3,x3,a); mpz_sub(x3,x3,x1); mpz_sub(x3,x3,x2); mpz_mod(x3,x3,n);
     mpz_sub(y3,x1,x3); mpz_mul(y3,y3,m); mpz_sub(y3,y3,y1); mpz_mod(y3,y3,n);
   }
  else // x1==x2
   {
     if (mpz_cmp(y1,y2)==0) // y1==y2
       {
         mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n);
         if (!mpz_invert(k,k,n))
          {
            mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n); mpz_gcd(k,k,n);
            factor_found(k);
            return true; // we're finished with this curve
          }
	 mpz_mul(m,x1,x1); mpz_mod(m,m,n); mpz_mul_ui(m,m,3);
         mpz_mul(h,a,x1); mpz_mul_ui(h,h,2); mpz_add(m,m,h);
         mpz_add_ui(m,m,1);
         mpz_mul(m,m,k); mpz_mod(m,m,n);
         mpz_mul(x3,m,m); mpz_mul(x3,x3,b); mpz_sub(x3,x3,a); mpz_sub(x3,x3,x1); mpz_sub(x3,x3,x2); mpz_mod(x3,x3,n);
         mpz_sub(y3,x1,x3); mpz_mul(y3,y3,m); mpz_sub(y3,y3,y1); mpz_mod(y3,y3,n);
       }
      else // y1!=y2
       {
         cout << "Elliptic curves: feature not implemented." << endl;
         return true;
       } 
   }

  mpz_set(xr,x3); mpz_set(yr,y3);
#ifdef DEBUG_ECM
  cout << "add" << endl;
  check_curve(xr,yr);
#endif
  return false; // no factor found
}

bool elliptic_curves::init_arithmetic_progression(
  mpz_t* const x, mpz_t* const y, const mpz_t startx, const mpz_t starty,
  const NATNUM startpos, const unsigned int delta, const unsigned int grad)
{
  for (unsigned int i=0; i<=grad; i++)
   {
     NATNUM e = startpos + (i*delta);
     mpz_set(x[i], startx); mpz_set(y[i], starty);
     for (unsigned int j=0; j<grad; j++)
      if (mul(x[i],y[i],x[i],y[i],e)) return true;
   }
  for (unsigned int i=1; i<=grad; i++)
   {
     for (unsigned int j=grad; j>=i; j--)
      if (sub(x[j],y[j],x[j],y[j],x[j-1],y[j-1])) return true;
   }
  return false;
}

bool elliptic_curves::arithmetic_progression(mpz_t* const x, mpz_t* const y, const int anz)
{
 // this function "adds" a vector of points in the following manner:
 // for i=0 to anz-1 do (x[i],y[i]:1):=(x[i],y[i]:1)+(x[i+1]:y[i+1]:1); od
 // This is needed for successive evaluation of polynomial points in arithmetic progression

#if 0
 // slow version
 bool flag=false;
 for (int i=0; i<anz && !flag; ++i)
   flag=add(x[i],y[i],x[i],y[i],x[i+1],y[i+1]);
 return flag;
#else
 // fast version
 // Trick: use multi-invert
 mpz_t* const h = new mpz_t[anz];
 mpz_t* const r = new mpz_t[anz];
 bool flag=false;
 for (int i=0; i<anz; ++i)
  {
    mpz_init(h[i]); mpz_init(r[i]);
    mpz_sub(h[i],x[i+1],x[i]);
    if (mpz_cmp_ui(h[i],0)==0)
     {
       mpz_mul_ui(h[i],y[i],2); mpz_mul(h[i],h[i],b); mpz_mod(h[i],h[i],n);
     }
  }
 // compute efficiently the inverse for each element
 if (mpz_multi_invert(r,h,anz,n))
  {
    // the inverse do exist -> hand them over to the add-routine
    for (int i=0; i<anz && !flag; ++i)
      flag=add(x[i],y[i],x[i],y[i],x[i+1],y[i+1],r[i]);
  }
 else
  {
    // there is at least one element, that is not invertible -> call the original add-routine
    for (int i=0; i<anz && !flag; ++i)
      flag=add(x[i],y[i],x[i],y[i],x[i+1],y[i+1]);
  }
 for (int i=0; i<anz; ++i)
  {
    mpz_clear(h[i]); mpz_clear(r[i]);
  }
 delete [] h; delete [] r;
 return flag;
#endif 
}

bool elliptic_curves::mul(mpz_t xr, mpz_t yr,
                          const mpz_t x1, const mpz_t y1,
                          NATNUM K)
{
  // !!! IMPORTANT: Don't use NResidues in this function !!!

  mpz_set(xh_mul,x1); mpz_set(yh_mul,y1);
#ifdef DEBUG_ECM
  if (K==0) { cerr << "multiplication by 0 is forbidden!"; exit(1); }
#endif
  while ((K&1)==0)
   {
     K>>=1;
     if (mul2(xh_mul,yh_mul,xh_mul,yh_mul)) return true; // factor found
   }
  // now we have (K&1)==1
  mpz_set(xr,xh_mul); mpz_set(yr,yh_mul);
  while (K>>=1)
   {
     if (mul2(xh_mul,yh_mul,xh_mul,yh_mul)) return true; // factor found
     if (K&1) if (add(xr,yr,xr,yr,xh_mul,yh_mul)) return true; // factor found
   }
#ifdef DEBUG_ECM
  cout << "mul" << endl;
  check_curve(xr,yr);
#endif
  return false; // no factor found
}


void elliptic_curves::go(const int ecm_sigma, const NATNUM phase1, const NATNUM phase2)
{
  using numtheory::is_prime;
  using numtheory::gcd;

  CPersistentDataCollection RecoveryData("ecm-recover.dat");

  mpz_t x,y,z, saved_x,saved_y, xh,yh;
  mpz_init(x); mpz_init(y); mpz_init(z);
  mpz_init(saved_x); mpz_init(saved_y);
  mpz_init(xh); mpz_init(yh);
  sigma=ecm_sigma;

  RecoveryData.RegisterVar("x",x);
  RecoveryData.RegisterVar("y",y);
  RecoveryData.RegisterVar("z",z);
  RecoveryData.RegisterVar("saved_x",saved_x);
  RecoveryData.RegisterVar("saved_y",saved_y);
  RecoveryData.RegisterVar("xh",xh);
  RecoveryData.RegisterVar("yh",yh);
  RecoveryData.RegisterVar("sigma",sigma);

  RecoveryData.RegisterVar("phase",phase);
  RecoveryData.RegisterVar("a",a);
  RecoveryData.RegisterVar("b",b);

  NATNUM i,d;
  RecoveryData.RegisterVar("i",i);
  RecoveryData.RegisterVar("d",d);

  if (sigma==-1)
   {
     // Recovery-Mode
     RecoveryData.Load();
     if (phase==1)
      {
        cout << "continue recovered elliptic curve..." << endl;
        goto StartPhase1;
      }
     MARK;
     cerr << "recovery of elliptic curve failed!" << endl;
     goto done;
   }

  cout << "--------------------------------------------------" << endl;

#if 1

  /*
     Activate the following line to enter Sigma manually.
     Most probably only sensible to check a given ecm-factorization.
     Or maybe you want to set a fixed Sigma for profiling purpose
     to check the timing...
  */
  //cout << "Sigma= "; std::cin >> sigma;
  // sigma=251975622; // for testing P8152

  mpz_set_ui(x,sigma); mpz_mul(x,x,x); mpz_sub_ui(x,x,5); mpz_mod(x,x,n);
  mpz_set_ui(y,sigma); mpz_mul_ui(y,y,4); mpz_mod(y,y,n);
  mpz_powm_ui(b,x,3,n); mpz_mul(b,b,y); mpz_mul_ui(b,b,4); mpz_mod(b,b,n);
  mpz_invert(a,b,n);
  mpz_sub(xh,y,x); mpz_powm_ui(xh,xh,3,n);
  mpz_mul_ui(yh,x,3); mpz_add(yh,yh,y); mpz_mul(xh,xh,yh);
  mpz_mul(a,a,xh); mpz_sub_ui(a,a,2); mpz_mod(a,a,n);

  mpz_powm_ui(x,x,3,n); mpz_powm_ui(z,y,3,n);

//#ifdef SLOW_WEIERSTRASS
  // we need to convert the curve, if we want to use the form (x:y:1);
  // and doing this is a good idea, anyway.
  mpz_invert(z,z,n); mpz_mul(x,x,z); mpz_mod(x,x,n);
  mpz_set_ui(y,1);
  mpz_powm_ui(xh,x,3,n); mpz_powm_ui(yh,x,2,n); mpz_mul(yh,yh,a);
  mpz_add(xh,xh,yh); mpz_add(b,xh,x); mpz_mod(b,b,n);
  mpz_set_ui(z,1); // (x:y:z)=(x:1:1)
//#endif

#else
  cout << "Alternative Sigma-Methode" << endl;
  //cout << "Sigma= "; cin >> sigma;
  if (sigma==2) { cerr << "wrong sigma (must be <>2)!" << endl; return; }
  mpz_set_ui(a,sigma); mpz_set_ui(y,1);
  mpz_set_ui(x,2); 
  mpz_mul_ui(b,a,4); mpz_add_ui(b,b,10);
#endif

  cout << "Trying Elliptic Curve Method with sigma=" << sigma << endl;
#ifdef VERBOSE
  cout << "x=" << x << endl;
  cout << "y=" << y << endl;
  cout << "a=" << a << endl;
  cout << "b=" << b << endl;
#endif

#ifdef DEBUG_ECM
  check_curve(x,y);
#endif

#ifndef SLOW_WEIERSTRASS
  // we expect b=(a+2)/4 for the fast version of XZ_mal2-function!!
  mpz_set_ui(h,4); mpz_invert(h,h,n);
  mpz_add_ui(b,a,2); mpz_mul(b,b,h); mpz_mod(b,b,n);
#endif

#if defined(NRESIDUE) && !defined(SLOW_WEIERSTRASS)
 // convert the essential parameters to NResidue
 NResidue.convert(a); NResidue.convert(b);
 NResidue.convert(x); NResidue.convert(y); NResidue.convert(z);
#endif


  // Phase 0: small prime numbers (and high powers)
  phase=0;
#ifdef VERBOSE_NOTICE
  cout << "Elliptic Curve Phase 0" << endl;
#endif
#ifdef SLOW_WEIERSTRASS
  for (int j=1; j<=50; j++) if (mul2(x,y,x,y)) goto done; // powers of 2
  for (int j=1; j<=25; j++) if (mul(x,y,x,y,3)) goto done; // powers of 3
#else
  for (int j=1; j<=50; j++) XZ_mul2(x,z,x,z); // powers of 2
  for (int j=1; j<=25; j++) 
    { 
      XZ_mul2(xh,yh,x,z);
      XZ_mul2plus1(x,z,xh,yh,x,z,x,z); // powers of 3
    }
#endif
  d=4; i=1; // using +4+2 to avoid divisions by 2,3
  while (i<500)
   {
     do { i+=d; d=6-d; } while (!is_prime(i));
     NATNUM j=phase2;
     NATNUM k=1;
     do { k*=i; j/=i; } while (j>=i);
#ifdef SLOW_WEIERSTRASS
     if (mul(x,y,x,y,k)) goto done;
     if (mul(x,y,x,y,k)) goto done; // do it twice...
#else
     XZ_multiply(x,z,x,z,k);
     XZ_multiply(x,z,x,z,k); // do it twice...
#endif
   }

#ifndef SLOW_WEIERSTRASS
  mpz_gcd(h,z,n); if (mpz_cmp_ui(h,1)) { factor_found(h); goto done; }
#endif

StartPhase1:

#ifdef DEBUG_ECM
  check_curve(x,y);
#endif

  {
   // Phase 1: prime numbers (plus minor powers of them)
   phase=1;
   const unsigned int D=2*3*5*7*11*13*2; // *17
   i=i-(i%D); // i with i=5 (mod 6) for speeding up computation of primes
#ifdef VERBOSE_NOTICE
   cout << "Elliptic Curve Phase 1" << endl;
#endif
   while (i<=phase1)
    {
      RecoveryData.Save(); // make state persistent for recovery
#ifdef REACT_ON_SIGUSR
      if (USRSignalHandler.got_SIGUSR1()) goto done;
      if (USRSignalHandler.got_SIGUSR2()) break;
#endif
      // Primzahlen aussieben
      bool sieve[D];
      sieve[1]=(i!=0); // silly special case: 1 is not a prime!
      for (unsigned int j=3; j<=D; j+=2) sieve[j]=true; // initialize sieve
      for (unsigned int p=5, dp=2; p*p<i+D; p+=dp, dp=6-dp)
       for (unsigned int j=p-(i%p); j<D; j+=p) sieve[j]=false;
       /* 
         Bemerkung:
          a) constraint p*p<D+i is to strong for the interval,
             but this doesn't matter because of Phase 0.
             (-> prime numbers below sqrt(D) remain undetected) 
          b) p-(i%p) equals p, if i is divisible by p;
             but: we don't need the sieve entry for index 0!
       */
            
      for (unsigned int j=1, dj=4; j<D; j+=dj, dj=6-dj)
       {
         //if (sieve[j]!=is_prime(i+j))
         //  { cerr << "inconsistency: " << i+j << endl; }
         if (sieve[j])
           {
             NATNUM i2 = i+j;
             NATNUM j2=phase2, k=1;
             do { k*=i2; j2/=i2; } while (j2>=i2);
#ifdef SLOW_WEIERSTRASS
             if (mul(x,y,x,y,k)) goto done;
#else
             XZ_multiply(x,z,x,z,k);
#endif
             if (i2>=phase1) { i=i2; d=dj; break; }
           }
       }
      // high D values decrease the frequency of gcd computations.
      // in seldom cases factors remain unsplit.
      // -> if ECM fails for this reason, you may decrease D.

#ifndef SLOW_WEIERSTRASS
      mpz_gcd(h,z,n); if (mpz_cmp_ui(h,1)) { factor_found(h); goto done; }
#endif

      if (i<=phase1) i+=D;
#ifdef VERBOSE_NOTICE
      cout << "Phase1: " << i << "/" << phase1 << "\r" << flush;
#endif
    }

   if (i%D==0)
    {
      i--; d=4; while (!is_prime(i)) { i-=d; d=6-d; } // adjust i to previous prime number
    }
#ifdef VERBOSE_NOTICE
   cout << "Elliptic Curve Phase 1, up to " << i << endl;
#endif
   if (i>=phase2) goto done;
  }


#ifndef SLOW_WEIERSTRASS
  // once again check for factors...
  mpz_gcd(h,z,n); if (mpz_cmp_ui(h,1)) { factor_found(h); goto done; }
  mpz_gcd(h,x,n); if (mpz_cmp_ui(h,1)) { factor_found(h); goto done; }

#if defined(NRESIDUE) && !defined(SLOW_WEIERSTRASS)
 // convert the essential parameters from NResidue to normal form
 NResidue.convert_back(a); NResidue.convert_back(b);
 NResidue.convert_back(x); NResidue.convert_back(y); NResidue.convert_back(z);
#endif

  // converting back to (x:y:1)-form
  mpz_set_ui(y,1);
  mpz_invert(h,z,n); mpz_mul(x,x,h); mpz_mod(x,x,n); // x=x/z

  mpz_powm_ui(xh,x,3,n); mpz_powm_ui(yh,x,2,n); mpz_mul(yh,yh,a);
  mpz_add(xh,xh,yh); mpz_add(b,xh,x); mpz_mod(b,b,n);
  mpz_set_ui(z,1); // (x:y:z)=(x:1:1)

  check_curve(x,y);
#endif

  phase=2; // for statistical reasons: we are now in continuation phase (phase 2)

#if (CONTINUATION_METHOD==0)

/* IMPORTANT NOTE:
   In phase 2 never use NResidues for calculating points on the curve.
   But it should be worthwhile - if NRESIDUE is defined - to work with
   "bfact" (and a transformed copy of "x" named "saved_x") as NResidues
   to speed up the modular multiplications with "collector".
*/

  {
   // Phase 2: isolated (prime) number:
   // ecm will succeed in this phase, if
   // curve is decomposable into small factors except for
   // one larger factor (which has to be found in this phase).
   cout << "Elliptic Curve Phase 2 (improved)" << endl;
   mpz_set(saved_x,x); mpz_set(saved_y,y); // Basis fr Phase2

   // precomputing most common used differences:
   // pre_mul_size ist die Intervallgre, in der auch gesiebt wird
   unsigned int pre_mul_size = 2*3*5; // 2*3 ist mindestens erforderlich!
   {
     // the more different small primefactors are inside the set, the
     // less coprime numbers we get for our interval.
     // -> the less memory needs to be allocated.
     // Neglecting the cost for computing the prime numbers, we would prefer
     // an interval size of sqrt(phase2).
     unsigned int p=5;
     do
      { 
        do p+=2; while (!is_prime(p));
        pre_mul_size*=p;
      } while (pre_mul_size<=phase2/pre_mul_size);

     if (pre_mul_size>3*sqrt(phase2)) // heuristics for threshold
      {
        pre_mul_size/=p; // biggest prime factor was too big -> divide
        // fill up with additional factors to get optimalen size:
        pre_mul_size*=static_cast<unsigned int>(ceil(sqrt(phase2)/pre_mul_size));
      }
#ifdef VERBOSE_INFO
     cout << "pre_mul_size=" << pre_mul_size << endl;
#endif
   }

   mpz_t* const bfact = new mpz_t[pre_mul_size];
    // hier wird der zu einer Differenz gehrige x-Kurvenwert abgelegt

   /* Im folgenden wird die Tabelle bfact aufgebaut, die von dem erreichten
      Startpunkt P=(saved_x:saved_y) die Punkte k*P fr 0<=k<pre_mul_size
      enthlt. Genauer: bfact[k]=X-Wert von k*P (den Y-Wert bentigen wir
      fr sptere Zugriffe nicht mehr).
      Die Tabelle wir spter bentigt, um in pre_mul_size-Schritten auf der
      elliptischen Kurve voranspringen zu knnen. Einen Punkt m*P knnen
      wir dann erreichen, in dem wir m1=floor(m/pre_mul_size), m2=m%premulsize
      setzen und m*P=(m1*premulsize)*P+m2*P berechnen.
      Fr einzelne Wertberechnungen bringt uns diese Form nichts, aber fr
      viele sukzessive Werte reicht es dann, den Punkt Q=premulsize*P
      zu berechnen, womit m*P=m1*Q+m2*P gilt.
      Wir haben nun erreicht, dass
        1. m2*P =:R[m2] fr 0<=m2<pre_mul_size ber Tabellenzugriffe geholt
           werden kann. -> "kostenlos"
        2. m1*premulsize*P=m1*Q =:S ist fr Werte innerhalb eines Intervalls
           der Gre premulsize ebenfalls konstant, solange m1 sich nicht
           ndert!
       -> Die Kosten reduzieren sich innerhalb eines Intervalls auf
          die Kosten der Addition von S+R[m2].
     
      Da wir in der improved-standard-continuation fr k lediglich alle
      aufeinanderfolgenden Primzahlen (die grer sind als der letzte Wert
      aus Phase 1) durchlaufen wollen, knnen wir fr premulsize das
      Produkt kleiner Primzahlen whlen und brauchen fr m2 nur die zu
      premulsize teilerfremden Zahlen zu nehmen.
      Um diese Punkte R[m2] an diesen Punkten zu berechnen, empfiehlt es sich
      fr groes premulsize, die Punkte hnlich der standart-continuation
      zu ermitteln; d.h. in Differenzen voranzuspringen und die Punkte
      ber Additionen der (vorberechneten) "Differenzpunkte" zu bestimmen.

      Der vorangegangene Kommentar beschreibt zwar nicht exakt unsere
      tatschliche Vorgehensweise, aber er umschreibt die dahinterliegende
      "Idee", die umgesetzt wird...
   */
      
   const int table_dxy_size = 23;
   mpz_t table_dxy[table_dxy_size][2];

   if (mul2(x,y,saved_x,saved_y)) goto done; // differences between prime numbers are even (for p>3)
   mpz_set(xh,x); mpz_set(yh,y); // save (x,y) to (xh,yh)
 
   mpz_init_set(table_dxy[2][0],x);
   mpz_init_set(table_dxy[2][1],y);
   for (int j=4; j<table_dxy_size; j+=2) 
    {
      if (add(x,y,x,y,xh,yh)) goto done;
      mpz_init_set(table_dxy[j][0],x);
      mpz_init_set(table_dxy[j][1],y);
    }

   mpz_set(x,xh); mpz_set(y,yh); // fetch back (x,y)
   for (int j=pre_mul_size-1, jlast=pre_mul_size-1; j>=0; j-=2)
    {
      if (gcd(j,pre_mul_size)==1) // j coprime?
       {
         /* 
            compute current point (premulsize-1-j)*(xh,yh)
            and store the X-ordinate in bfact[j]
         */
         int dj=jlast-j;
         if (dj>=table_dxy_size)
          { 
            cerr << "increase table_dxy_size to " << dj+1 << "!" << endl;
            exit(1);
          }
         if (dj>0) // special case: don't add, if dj=0
          if (add(x,y,x,y,table_dxy[dj][0],table_dxy[dj][1])) goto done;
         jlast=j;
         mpz_init_set(bfact[j],x);
#ifdef NRESIDUE
         NResidue.convert(bfact[j]);
#endif
       }
    }

   for (int j=2; j<table_dxy_size; j+=2) // delete table of points of differences
    {
      mpz_clear(table_dxy[j][0]);
      mpz_clear(table_dxy[j][1]);
    }


   NATNUM next_i;
   mpz_t collector;
   mpz_init(collector); mpz_set_ui(collector,1);

   // vielleicht etwas umstndlich, aber dafr verstndlich:
   if (mul(xh,yh,saved_x,saved_y,pre_mul_size)) goto done; // Intervall-Summand
   // (xh,yh)=premulsize*(saved_x,saved_y) ist nun der Intervallsummand

   if (i<pre_mul_size) 
     { 
       /* phase1<pre_mul_size,
          d.h. in der nachfolgenden Routine treten negative Werte auf,
          und das ergibt keinen Sinn.
          Normalerweise sollte ein kleiner Teil des berlappenden
          Restes von Phase 1 und dem Start von Phase 2 nicht stren.
          Wie auch immer, durch Erzeugung der bfact-Tabelle sollten
          alle Primzahlen von 1 bis pre_mul_size bereits ebenfalls
          abgetestet sein...
          Nun wird jedenfalls der Start der Phase 2 ein Intervall hher
          angesetzt.
        */
#ifdef VERBOSE_INFO
       cout << "need to increase start value for phase 2..." << endl;
#endif
       i+=pre_mul_size;
     }
   i=((i/pre_mul_size)*pre_mul_size)-1; d=2; // constraint i=5 (mod 6) for computing prime numbers efficiently
   if (mul(x,y,saved_x,saved_y,i)) goto done; // initial value

   next_i = i+1+pre_mul_size; // Intervallgrenze setzen

   bool* const sieve = new bool[pre_mul_size];
   for (int i1=pre_mul_size-1; i1>=0; i1-=2) sieve[i1]=false;

   for (unsigned int p=5, dp=2; p<pre_mul_size && p*p<next_i; p+=dp, dp=6-dp )
    for (unsigned int i1=p-((next_i-pre_mul_size)%p); i1<pre_mul_size; i1+=p) sieve[i1]=true;

   while (i<=phase2) 
    { 
      int runden=25000; // check for gcd (to detect factors) at regular intervals
      while (runden>0)
	{
          // compute next prime number...
          do { i+=d; d=6-d; } while (i<next_i && sieve[i%pre_mul_size]);

         /* falls pre_mul_size gro genug gewhlt wurde, wre statt
            "while"-Schleife auch eine "if"-Bedingung ausreichend... */
         while (i>next_i) // do we need to compute a new interval?
          { 
            next_i += pre_mul_size;
            if (add(x,y,x,y,xh,yh)) goto done_phase2;
#ifdef NRESIDUE
            mpz_set(saved_x,x);
            NResidue.convert(saved_x);
#endif
            for (int i1=pre_mul_size-1; i1>=0; i1-=2) sieve[i1]=false;
            for (unsigned int p=5, dp=2; p<pre_mul_size && p*p<next_i; p+=dp, dp=6-dp)
             for (unsigned int i1=p-((next_i-pre_mul_size)%p); i1<pre_mul_size; i1+=p) sieve[i1]=true;
          }

          // collect possible factor(s) inside "collector"... 
#ifdef NRESIDUE
          mpz_sub(h,saved_x,bfact[next_i-i]);
#else
          mpz_sub(h,x,bfact[next_i-i]);
#endif        
          mpz_mul(collector,collector,h); modulo(collector,collector,n);
	  --runden;
	}

      /* Bemerkung:
         Wenn wir das in "collector" gebildete Produkt als Auswertung eines
         Polynoms (x-c1)*(x-c2)*...*(x-ck) auffassen, dann bietet es sich
         an, eine Polynomauswertung zu implementieren, die schneller
         arbeitet als die k-fache Multiplikation.
         (-> Siehe nachfolgend implementierte fft-continuation,
             die alternativ aktiviert werden kann.)
      */

      // und erst nach "runden"-vielen Durchlufen die aufgesammelten Zahlen
      // auf grten gemeinsamen Teiler testen...
      mpz_gcd(collector,collector,n);
      if (mpz_cmp_ui(collector,1)) { factor_found(collector); goto done_phase2; }
      mpz_set_ui(collector,1); // sptere Multiplikation erleichtern...
#ifdef VERBOSE_NOTICE
      cout << "Phase2: " << i << "/" << phase2 << "\r" << flush;
#endif
    }

 done_phase2:
  mpz_clear(collector);
#ifdef VERBOSE_NOTICE
  cout << "Elliptic Curve Phase 2, up to " << i << endl;
#endif
   for (int j=pre_mul_size-1; j>=0; j-=2)
    {
      // freeing memory for precomputed differences
      // !! refer also allocation of these variables!
      if (gcd(j,pre_mul_size)==1) mpz_clear(bfact[j]);
    }
   delete [] sieve;
   delete [] bfact;
  }

// end of ecm improved standard continuation

#else
 //---------------------------------------------------------------
 // ecm fft-continuation
 //---------------------------------------------------------------

/* Dies ist eine fr fft-continuation umgemodelte Version der vorangegangenen
   improved standard continuation. fft bedeutet in diesem Zusammenhang, dass
   multipoint polynomial evaluation verwendet wird. Fr die konkrete
   Implementierung siehe das Modul "polynomial.cc".  Die eigentliche
   Optimierungsarbeit mu in dem oben genannten Modul vorgenommen werden.
*/


/* IMPORTANT NOTE:
   In phase 2 (fft) never use NResidues for calculating points on the curve.
*/

  {
   // Phase 2: hope to find an isolated (prime-)number:
   // we have success, if the curve is completely composed of many small
   // numbers and one isolated number.
#ifdef VERBOSE_NOTICE
   cout << "Elliptic Curve Phase 2 (improved with pairing & fft)" << endl;
#endif
   mpz_set(saved_x,x); mpz_set(saved_y,y); // base for Phase2

   // precomputing most common used differences:
   // pre_mul_size is the size of the interval per polynomial
   // -> see fft_param.cc
   unsigned int pg=256, pms=2*1050;
   get_fft_parameter(pg,pms,phase2,n); // get parameter (pg,pms) for "phase2"

   /* Am Rande:
      Die Schleifen wurden von "int" auf "NATNUM" umgestellt, so dass
      der Datentyp jetzt in Grenzen whlbar ist.
      Mit "long long" kann man dann auch bei 32-Bit-Rechnern bis zur
      Speichergrenze gehen. Bei 64-Bit-Rechnern drfte "int" statt "long long"
      dann wieder gengend gro sein.

      Bei 1 GB main memory und pg=262144 geht dem Rechner bereits der
      Speicher aus. (Der Speicherbedarf hngt natrlich auch von der Gre
      der zu faktorisierenden Zahl ab.) Wenn der Rechner nur im Hintergrund
      faktorisieren soll und Memory geswapt wird, ist das kontraproduktiv.
      -> Die Werte sind also individuell zu tunen.
   */

   const unsigned int pre_mul_size = pms;
   const unsigned int Polynom_Index = pg;

   //cout << "size of polynomial: " << Polynom_Index << endl;
   //cout << "single interval size:" << pre_mul_size << endl;

   mpz_t* const  collector = new mpz_t[Polynom_Index];
   for (unsigned int j=0; j<Polynom_Index; ++j) mpz_init(collector[j]);

   /*
      Fr den Aufbau des Polynoms:
      Nullstellen des Polynoms entsprechen in etwa der bfact-Tabelle der
      improved standard continuation, siehe auch dortige Kommentare.

      (k+d)^2 = k^2 + r mit r=d^2+2*k*d 

      setze d=2 und starte mit k=1, d=2, r=4+2*1*2=8
      wir erhalten:
       ... (k+2)^2=k^2+r=1+8=9, r=r+2*d^2=8+8=16,
       ... (k+2)^2=k^2+r=9+16=25, r=r+2*d^2=16+8=24,
       ... (k+2)^2=k^2+r=25+24=49, r=r+2*d^2=24+8=32,
       usw.

       Es wird nun eine Tabelle von Punkten der Form (k+d)^2 * P gebildet.
       Die Punkte werden in arithmetischer Progression sukzessive ermittelt.
       (xd[0]:yd[0]:0) ist dabei der jeweils gewnschte Punkt,
       (xd[i]:yd[i]:0), i=1..2 sind Hilfspunkte fr die arithmetische
       Progression zu berechnen.
       (vgl. Kapitel 5.9 in der Dissertation von Peter L. Montgomery)
   */

   const int d_exp = 12; // appropriate values are 2, 6, 12, 24, recommended: 12
   mpz_t xd[d_exp+1], yd[d_exp+1];
   for (int j=0; j<=d_exp; ++j) { mpz_init(xd[j]); mpz_init(yd[j]); }
   if (init_arithmetic_progression(xd, yd, saved_x, saved_y, 1, 2, d_exp)) goto done;

   unsigned int Polynom_Index_i = 0;
   for (unsigned int j=1; j<pre_mul_size/2; j+=2)
    {
      if (gcd(j,pre_mul_size)==1) // j coprime?
       {
         /* 
            aktueller Punkt x,y=j^2*(saved_x,saved_y),
            die X-Ordinate fr Polynomnullstelle ablegen,
         */
         //cout << j << "/" << pre_mul_size/2 << endl;
         mpz_set(collector[Polynom_Index_i++],xd[0]);
       }
      if (arithmetic_progression(xd,yd,d_exp)) goto done;
    }

#ifdef VERBOSE_INFO
   cout << "original size of polynomial: " << Polynom_Index_i << endl;
#endif

   polynomial::TPolynom Polynom = NULL; // reference for the polynomial (which will be constructed now)
 
   // fill up the leavings (up to the next binary power) with zeros
   /* Bemerkung: Besser wre es, Punkte auerhalb des Suchbereichs
                 zu whlen (Motto: doppeltes, aber auerhalb des Suchbereichs
                 lchriges Netz)
    */
   { 
     unsigned int anz_Punkte=Polynom_Index_i; 
     while (anz_Punkte<Polynom_Index) mpz_set_ui(collector[anz_Punkte++],0);
     Polynom_Index_i=polynomial::construct_polynomial_from_roots(Polynom,collector,anz_Punkte,n);
#ifdef VERBOSE_INFO
     cout << "polynomial created. size of polynomial = " << Polynom_Index_i << endl;
#endif
   }
   

   i=i-(i%pre_mul_size); // i is now a multiple of pre_mul_size
   // we use "i" as follows:
   //  "i" points (except for in the inner loop) to the start of
   // the interval [i,i+pre_mul_size].
   // Whereas in the inner loop "i" points to the middle of the interval.

   // Intervallhochzhler beim Pairing initialisieren:
   if (init_arithmetic_progression(xd, yd, saved_x, saved_y, i+pre_mul_size/2, pre_mul_size, d_exp)) goto done_phase2;

   while (i<=phase2)
    { 

#ifdef VERBOSE_NOTICE
      cout << "Phase2: " << i << "/" << phase2 << ", preparing new interval" << endl;
#endif
#ifdef VERBOSE_INFO
      cout << "Computing and collecting values..." << endl;
#endif

      i+=pre_mul_size/2; // set i to middle of interval
      // collecting values
      unsigned int collector_index=0;
      while (collector_index<(Polynom_Index_i-1)/2)
       {
         // now: (xd[0]:yd[0]:1)=i^2*(saved_x:saved_y:1)
         mpz_set(collector[collector_index++],xd[0]);
         // determine the next point by now...
         if (arithmetic_progression(xd,yd,d_exp)) goto done_phase2;
         i += pre_mul_size; // and set up a new interval
       }
      i-=pre_mul_size/2; // set i to start of interval

#ifdef REACT_ON_SIGUSR
      if (USRSignalHandler.got_SIGUSR1()) goto done_phase2;
      if (USRSignalHandler.got_SIGUSR2()) continue;
#endif

      // starting evaluation of polynomial
#ifdef VERBOSE_NOTICE
      cout << "Phase2: " << i << "/" << phase2 << ", evaluating" << endl;
#endif
#ifdef VERBOSE_INFO
      cout << "starting multipoint polynomial evaluation" << endl;
      cout << "Parameter: polynomial size: " << Polynom_Index_i
           << ", points to evaluate: " << collector_index << endl;
#endif
      polynomial::multipoint_eval(collector,Polynom,Polynom_Index_i,collector,collector_index,n);
#ifdef VERBOSE_INFO
      cout << "polynomial evaluation finished, computing gcd." << endl;
#endif
      mpz_set(h,collector[0]);
      for (unsigned int j=1; j<collector_index; ++j) { mpz_mul(h,h,collector[j]); mpz_mod(h,h,n); }
      mpz_gcd(h,h,n);
      /* Hinweis: Backtracking ist mglich, falls
                  Mehrfachfaktor entdeckt werden sollte:
                  a) gcd(collector[j],n) getrennt berechnen (geschieht nunmehr)
                  b) Polynomnullstellen einzeln untersuchen, falls auch dies scheitert.
                     (dies geschieht nicht)
      */
      if (mpz_cmp_ui(h,1)) // factor found?
        {
          if (mpz_probab_prime_p(h,probab_prime_checks))
            { factor_found(h); goto done_phase2; }
          else // try to split composite factors
            {
              for (unsigned int j=0; j<collector_index; ++j)
               {
                 mpz_gcd(h,collector[j],n);
                 if (mpz_cmp_ui(h,1)) factor_found(h);
               }
              goto done_phase2; // the curve is done...
            }
        }

      /* Bemerkung:
         Wenn wir das in "collector" gebildete Produkt als Auswertung
         eines Polynoms (x-c1)*(x-c2)*...*(x-ck) auffassen, dann
         bietet es sich an, eine Polynomauswertung zu implementieren, die
         schneller arbeitet als die k-fache Multiplikation.

         Folgende Vorgehensweise ist hier (in etwa) implementiert
          1. Nullstellenform des Polynoms ermitteln
             -> P(x)=(x-c1)*(x-c2)*...*(x-ck)
          2. P(x) ausmultiplizieren
             -> P(x)=a0 + a1*x + a2*x^2 + ... + a[k-1]*x^(k-1) + x^k
          3. k Werte (x1 ... x[k]) aufsammeln, fr die P(x) ausgewertet
             werden soll (-> k Intervallwerte von x bestimmen) 
          4. r[i]=P(x[i]) fr i=1..k bestimmen,
             z.B. mit schneller Auswertung ber
             r[i]=P(x) modulo (x-x[i]) fr i=1...k simultan auswerten.
             (-> divide and conquer)
          5. Produkt R=r1*r2*...r[k] modulo N berechnen
             und gcd(R,N) bestimmen. 
          6. Falls Schritt 5 keinen Teiler geliefert hat und die
             Obergrenze nicht erreicht ist, ab Schritt 3 fortfahren.

         Bemerkung: Um gengend viele gleiche Polynome auswerten zu knnen,
           sollte man hierbei auf Primzahltests verzichten und immer
           alle Nullstellen auswerten. (-> Nur ein Polynom bestimmen.)
           Mit asymptotisch schnellen Algorithmen sollte dies dann in etwa
           der FFT-continuation entsprechen.
      */

    }

 done_phase2:
  for (int j=0; j<=d_exp; ++j) { mpz_clear(xd[j]); mpz_clear(yd[j]); }
  for (unsigned int j=0; j<Polynom_Index; ++j) mpz_clear(collector[j]);
  delete [] collector;
  for (unsigned int j=0; j<Polynom_Index_i; ++j) mpz_clear(Polynom[j]);
  delete [] Polynom;
#ifdef VERBOSE_NOTICE
  cout << "Elliptic Curve Phase 2, up to " << i << endl;
#endif
  }

// end of ecm fft continuation


#endif

done:
  RecoveryData.ClearStream(); // no recovery necessary anymore for this curve
  mpz_clear(x); mpz_clear(y); mpz_clear(z);
  mpz_clear(saved_x); mpz_clear(saved_y);
  mpz_clear(xh); mpz_clear(yh);
}
