/*! @file
 * @brief
 * mpqs sieving stuff that needs to be included by Sieving.cc
 */


#include "mpqsPolynom.H"
extern CmpqsPolynom Polynom; // object to manage polynomial computations for multipolynomial sieve (MPQS)


#ifdef USE_FIBHEAP
 #include "fibheap.H"
 FibHeap <TSieve_Delta> Sieve_Delta_Heap, Sieve_Delta_HeapOfSquares;
 #include <queue>
#elif defined(USE_FAKEHEAP)
 #include "fakeheap.H"
 FakeHeap <TSieve_Delta> Sieve_Delta_Heap;
 #include <queue>
 std::priority_queue<TSieve_Delta> Sieve_Delta_HeapOfSquares;    
#else
 #include <queue>
 std::priority_queue<TSieve_Delta> Sieve_Delta_Heap, Sieve_Delta_HeapOfSquares;
#endif


extern bool is_dynamic_factor(const int number); 

//! contains data structures for accessing dynamic factors
namespace DynamicFactorArrays
{
  // this is for much faster access to the data
  // (in streaming manner, possibly using SSE instructions!)
  const unsigned int MaxDynamicFactors = 2500000;
  static int DynFactors[MaxDynamicFactors]; // write once (per entry) - read many access
  static int DynFactorReciprocals[MaxDynamicFactors]; // write once (per entry) - read many access
  static int SQRT_kNs[MaxDynamicFactors]; // write once (per entry) - read many access
  static signed int PolyDs[MaxDynamicFactors]; // read/write many access

  unsigned int DynamicFactorsInUse = 0; // already declared in Sieving.H
  double DYNFB_threshold = 0.0; // already declared in Sieving.H

  void compute_Deltas_for_DynamicFactors(const int offset)
  {
#if defined(VERBOSE)
    cout << "FYI: we have " << DynamicFactorsInUse << " dynamic factors in use for sieving." << endl;
#endif
    for (unsigned int i=0; i<DynamicFactorsInUse; ++i)
     {
       register const int DynFac=DynFactors[i];
       const int &DynFacReciprocal = DynFactorReciprocals[i];

       TSieve_Delta sieb_delta;
       int d1,d2;
       
       sieb_delta.factor=DynFac;
       const unsigned int PolyB = mpz_remainder_ui(Polynom.get_B(),DynFac);

       if (PolyDs[i]<0 || !CDeltaComputations::D_difference_to_last)
        {
           //cout << "get MPQS-PolyD for dynamic factor " << DynFac << endl;
           PolyDs[i]=mpz_remainder_ui(Polynom.get_D(),DynFac);
        }
       else
        {
           //cout << "updating MPQS-PolyD for dynamic factor " << DynFac << endl;
           PolyDs[i]+=CDeltaComputations::D_difference_to_last;
        }
       while (PolyDs[i]>=DynFac) PolyDs[i]-=DynFac;
       unsigned int inv_A2 = numtheory::squaremod(PolyDs[i],DynFac)<<1;
       if (inv_A2>=static_cast<unsigned int>(DynFac)) inv_A2-=DynFac;
       inv_A2 = fastinvmod(inv_A2,DynFac);

       d1=SQRT_kNs[i]-PolyB; if (d1<0) d1+=DynFac;
       d1=mulmod(d1,inv_A2,DynFac);
       sieb_delta.delta=normalized_signed_mod(d1-offset,DynFac,DynFacReciprocal);
       sieb_delta.delta+=offset;
     #ifdef USE_FAKEHEAP
       while (sieb_delta.delta<=LogicalSieveSize) // shortcut: only re-insert, if still inside the sieve interval!
        {
          Sieve_Delta_Heap.push(sieb_delta);
          sieb_delta.delta+=sieb_delta.factor;
        }
     #else
       if (sieb_delta.delta<=LogicalSieveSize) // shortcut: only re-insert, if still inside the sieve interval!
         Sieve_Delta_Heap.push(sieb_delta);
     #endif
       
       d2=DynFac-SQRT_kNs[i]-PolyB; if (d2<0) d2+=DynFac;
       d2=mulmod(d2,inv_A2,DynFac);
       sieb_delta.delta=normalized_signed_mod(d2-offset,DynFac,DynFacReciprocal);
       sieb_delta.delta+=offset;
       if ( (sieb_delta.delta<=LogicalSieveSize)  && (d1!=d2) ) // shortcut: only re-insert, if still inside the sieve interval!
        {
          Sieve_Delta_Heap.push(sieb_delta);
     #ifdef USE_FAKEHEAP
          sieb_delta.delta+=sieb_delta.factor;
          while (sieb_delta.delta<=LogicalSieveSize)
           {
             Sieve_Delta_Heap.push(sieb_delta);
             sieb_delta.delta+=sieb_delta.factor;
           }
     #endif
        }
     }
  }

} // end of namespace DynamicFactorArrays

void TDynamicFactorRelation::append_DynamicFactor_for_sieving(const int DynFac)
{
  using namespace DynamicFactorArrays;
  if (DynamicFactorsInUse>=MaxDynamicFactors)
   {
     MARK; cerr << "you may want to increase MaxDynamicFactors and recompile..." << endl;
     return; //exit(19);
   }

  // if we use partial sieving, then we cannot be sure, that a
  // already known dynamic factor gets redetected: redetected dynamic factors
  // *must not* appended again!!
  // we could avoid this check for the "complete interval sieving", but I doubt that
  // this saves us much time...
  if (is_dynamic_factor(DynFac))
   {
#ifdef VERBOSE_INFO
     cout << "append dynamic factor: " << DynFac << " is already appended!" << endl;
#endif
     return; // don't append this dynamic factor twice...
   }
  DynFactors[DynamicFactorsInUse] = DynFac;
  DynFactorReciprocals[DynamicFactorsInUse] = numtheory::reciprocal(DynFac);
  SQRT_kNs[DynamicFactorsInUse]=SQRT_kN_mod_PrimeNumber(DynFac);
  PolyDs[DynamicFactorsInUse]=-1; // tripper update at first access
  ++DynamicFactorsInUse; // now we have one more in use

  DYNFB_threshold-=(4.0*LogicalSieveSize)/DynFac;
  //if ((DynamicFactorsInUse&255)==255) cout << "#DYNFB: " << DynamicFactorsInUse << ", countdown now: " << DYNFB_threshold << endl;
}



void do_sieving_Squares()
{
  // Sieve squares of static (and dynamic) factors.
  // Important: Only subtract the single logarithm of the factors
  //            (because if a square divides, then the factor has divided already!)
  // Remark: To be more efficient, at most one hit in the sieve interval
  //         is considered.

  //cout << "Start sieving squares" << endl;
  if (Sieve_Delta_HeapOfSquares.empty()) return; // there's nothing to sieve...
  TSieve_Delta sieb_delta = Sieve_Delta_HeapOfSquares.top();
  register int delta = sieb_delta.delta-SieveOffset;

#if 0
  while (delta<0) // do we need to skip any factors? (for partial sieving!)
    {
      cout << "sieving squares: " << sieb_delta.factor << " skipped!" << endl;
      Sieve_Delta_HeapOfSquares.pop(); // pop element out of priority queue
      if (Sieve_Delta_HeapOfSquares.empty()) return;
      sieb_delta = Sieve_Delta_HeapOfSquares.top();
      delta = sieb_delta.delta-SieveOffset;
    }
#endif
  
  while (delta<PhysicalSieveSize)
    {
      Sieve_Delta_HeapOfSquares.pop(); // pop element out of priority queue
      SieveArray[delta]-=log_DynamicLargePrime(sieb_delta.factor); // mark factor at sieve position
      if (Sieve_Delta_HeapOfSquares.empty()) break;
      sieb_delta = Sieve_Delta_HeapOfSquares.top();
      delta = sieb_delta.delta-SieveOffset;
    }
  //cout << "Finished sieving squares" << endl;
}


// new version, patch contributed by Alexis Michon
void do_sieving_DynamicFactors()
{
  if (Sieve_Delta_Heap.empty()) return; // there's nothing to sieve...

  short int HitCount=0;
  TSieve_Delta sieb_delta = Sieve_Delta_Heap.top();
  register int delta = sieb_delta.delta-SieveOffset;

#if 0
  while (delta<0) // do we need to skip any factors? (for partial sieving!)
    {
      cout << "sieving dynamic factors: " << sieb_delta.factor << " skipped!" << endl;
      Sieve_Delta_Heap.pop(); // pop element out of priority queue
      if (Sieve_Delta_Heap.empty()) return;
      sieb_delta = Sieve_Delta_Heap.top();
      delta = sieb_delta.delta-SieveOffset;
    }
#endif

  while (delta < PhysicalSieveSize) // still in the current (physical) interval?
  {
	Sieve_Delta_Heap.pop(); // pop element out of priority queue
	SieveArray[delta]-=log_DynamicLargePrime(sieb_delta.factor); // mark factor at sieve position
  
        // to speedup refactorization of relations, a dynamic factor hit is memorized here
	Hits[HitCount].Faktor=sieb_delta.factor;

#ifdef USE_FAKEHEAP
	if (Sieve_Delta_Heap.empty()) 
	{
		if (SieveArray[delta]<0) // found a (possible) relation?
		if (SieveControl::Hit_after_DirtyCorrection(delta+SieveOffset,SieveArray[delta]))
		{
			SieveArray[delta]=1; // stall further hits at this position...
			StaticRelations::insert(new CRelation(delta+SieveOffset,HitCount+1)); // and insert as a hit
		}
		return;
	}
#else
	sieb_delta.delta += sieb_delta.factor; // next hit-position
	if (sieb_delta.delta<=LogicalSieveSize) // shortcut: push only, if it will be inside the sieve interval
	{
		Sieve_Delta_Heap.push(sieb_delta); // pushing to priority queue again
	}
	else
	if (Sieve_Delta_Heap.empty()) 
	{
		if (SieveArray[delta]<0) // found a (possible) relation?
		 if (SieveControl::Hit_after_DirtyCorrection(delta+SieveOffset,SieveArray[delta]))
		 {
			SieveArray[delta]=1; // stall further hits at this position...
			StaticRelations::insert(new CRelation(delta+SieveOffset,HitCount+1)); // and insert as a hit
		 }
		return;
	}
#endif
  
	sieb_delta = Sieve_Delta_Heap.top(); // get next entry out of the priority queue
	register int delta2; delta2 = sieb_delta.delta-SieveOffset;
	if (delta==delta2)
  	{
		++HitCount;
#ifdef DEBUG
                if (HitCount>=CHits::MaxHits) { cerr << "increase MaxHits!"; exit(1); }
#endif
		continue;
	}
	if (SieveArray[delta]<0) // found a (possible) relation?
	 if (SieveControl::Hit_after_DirtyCorrection(delta+SieveOffset,SieveArray[delta]))
	 {
		SieveArray[delta]=1; // stall further hits at this position...
		StaticRelations::insert(new CRelation(delta+SieveOffset, HitCount+1)); // and insert as a hit
	 }
	delta=delta2;
	HitCount=0; // reset HitCount for next hit
  }
  return; // done with this interval
}



#if 0
#include <map>
void Werteverteilung_im_Sieb_ausgeben(void)
{
  // if you want to view more stats, you can use this function
  // to get the distribution of hits in the sieve
  // (don't activate this for "production use", as this is very slow!)
  typedef map<TSieveElement,unsigned int> TStats;
  static TStats S;
  for (int i=0; i<PhysicalSieveSize; ++i) S[SieveArray[i]]++;

  static int z = 1000;
  if (--z==0)
   {
     z=1000;

     for (TStats::const_iterator pos=S.begin(); pos!=S.end(); ++pos)
      cout << static_cast<signed long int>(pos->first) << " : " << pos->second << endl;

     cout << "press <Enter> to continue" << endl;
     string s; cin >> s;
   }
}
#endif


void CDeltaComputations::recompute_all_Deltas(const bool DoReallyAll /* =true */)
{
  // While changing polynomials, the old Deltavalues get void.
  // They must be recomputed.

  //cout << "recomputing deltas" << endl;

  // at first, remove any (possible remaining) dynamic factors from the queue:
#ifdef USE_FAKEHEAP
//  cout << "clearing Fakeheap with " << Sieve_Delta_Heap.size() << " elements..." << endl;
  Sieve_Delta_Heap.clear();
#else
  while (!Sieve_Delta_Heap.empty()) Sieve_Delta_Heap.pop(); // for priority_queue (in case that no clear-method is defined)
#endif

  // do the same with squares:
  //Sieve_Delta_HeapOfSquares.clear();
  while (!Sieve_Delta_HeapOfSquares.empty()) Sieve_Delta_HeapOfSquares.pop(); // for priority_queue (in case that no clear-method is defined)
 
  
  // first, compute the Deltas in/of the static factorbase
  unsigned int inv_A2_modp, PolyB;
  mpz_t inv_A2, P, wuq, x, y;

  mpz_init(inv_A2); mpz_init(P); mpz_init(wuq); mpz_init(x); mpz_init(y);

  // for computing Deltas, we need to query the mpqs coefficient "A2" and
  // compute "A2 mod p", if we want to sieve with p. This is slow. 
  // For gods sake we can do it much faster:
  mpqsDmodPUpdater.update();
   // .. and now the new A2's are available via "CDeltaComputations::get_A2_mod_FBPrime(int Nr)"
   // instead of the more slowly version "Polynom.get_A2_mod(const unsigned int m)"


  // factorbase: compute first sieve hit; ignore primenumber 2, because computation of root for this value is too complicated...
  // ignore PrimeNumbers[0]==-1 because of sign
  if (PrimeNumbers[1]!=2) 
   {
     inv_A2_modp=fastinvmod_23bit(Polynom.get_A2_mod(PrimeNumbers[1]),PrimeNumbers[1]);
     PolyB=mpz_remainder_ui(Polynom.get_B(),PrimeNumbers[1]);
     register signed int d = PolyB%PrimeNumbers[1];
     register signed int h = inv_A2_modp%PrimeNumbers[1];
     d=mulmod(d,h,PrimeNumbers[1]);
     h=mulmod(SQRT_kN_of_PrimeNumbers[1],h,PrimeNumbers[1]);
     d=-d; d+=h; if (d<0) d+=PrimeNumbers[1];
     Delta_of_PrimeNumbers[1][0]=d;
     d-=h; if (d<0) d+=PrimeNumbers[1];
     d-=h; if (d<0) d+=PrimeNumbers[1];
     Delta_of_PrimeNumbers[1][1]=d;
   }

  int i=2;
  while(i<StaticFactorbase::Size() && PrimeNumbers[i+1]<46340) // do the paired part (magic 46340 is sqrt(2^31))
    {
#if 0 || defined(DEBUG)
      if (D_mod_FBPrime[i]!=PrimeNumbers[i]*PrimeNumbers[i+1])
       {
         MARK; cerr << "implementation error."
                    << "The D_mod_FBPrime[] structure is a bit tricky..." << endl
                    << "review the source code." << endl;
         exit(2);
       }
      if (get_A2_mod_FBPrimePair(i)!=Polynom.get_A2_mod(D_mod_FBPrime[i]))
       {
         MARK;
         cout << "StaticFactorbase::Size(): " << StaticFactorbase::Size() << endl;
         cout << "PrimeNumbers[i], PrimeNumbers[i+1]: " << PrimeNumbers[i] << ", " << PrimeNumbers[i+1] << endl;
         cout << i << ": " << get_A2_mod_FBPrimePair(i) << " ?? " << Polynom.get_A2_mod(D_mod_FBPrime[i]) << endl;
         exit(2);
       }
#endif
      const int N=D_mod_FBPrime[i]; // = PrimeNumbers[i]*PrimeNumbers[i+1];
      inv_A2_modp=fastinvmod(get_A2_mod_FBPrimePair(i),N); // fast version
      //inv_A2_modp=fastinvmod(Polynom.get_A2_mod(N),N); // slow version
      PolyB=mpz_remainder_ui(Polynom.get_B(),N);
      //cout << "paired: i=" << i << " N=" << N << endl;

      for (signed int j=2; --j>=0; ++i) // this is no typo, it's really ++i!
       {
#if 1 && defined(ASM_CMOV)
         asm volatile(\
          "mov %[inv_A2_modp],%%edx \n\t" \
          "mov %[inv_A2_modp],%%eax \n\t" \
          "shr $16,%%edx \n\t" \
          "divw %%si \n\t" \
          "movzxw %%dx,%%ebx \n\t" \
          "mov %[PolyB],%%edx \n\t" \
          "movzxw %%dx,%%eax \n\t" \
          "shr $16,%%edx \n\t" \
          "divw %%si \n\t" \
          "movw %%dx,%%ax \n\t" \
          "mulw %%bx \n\t" \
          "divw %%si \n\t" \
          "mov %%ebx,%%eax \n\t" \
          "movzx %%dx,%%ebx \n\t" \
          "mulw %[SQRTkN](,%[i],4) \n\t" \
          "divw %%si \n\t" \
          "# %%edx and %%ebx are now swapped! \n\t" \
          "xorl %%eax,%%eax \n\t" \
          "negl %%ebx \n\t" \
          "addl %%edx,%%ebx \n\t" \
          "cmovsl %%esi,%%eax \n\t" \
          "addl %%eax,%%ebx \n\t" \
          "xorl %%eax,%%eax \n\t" \
          "movl %%ebx,%[Deltas](,%[i],8) \n\t" \
          "subl %%edx,%%ebx \n\t" \
          "cmovsl %%esi,%%eax \n\t" \
          "addl %%eax,%%ebx \n\t" \
          "xorl %%eax,%%eax \n\t" \
          "subl %%edx,%%ebx \n\t" \
          "cmovsl %%esi,%%eax \n\t" \
          "addl %%eax,%%ebx \n\t" \
          "movl %%ebx,4+%[Deltas](,%[i],8) \n"
          : /* empty output list, we're writing to memory */
          : "S" (PrimeNumbers[i]), [inv_A2_modp] "g" (inv_A2_modp),
            [PolyB] "g" (PolyB), [SQRTkN] "o" (SQRT_kN_of_PrimeNumbers[0]),
            [Deltas] "o" (Delta_of_PrimeNumbers[0][0]), [i] "r" (i)
          : "cc", "memory", "eax","ebx","edx");
#else
         register signed int d = PolyB%PrimeNumbers[i];
         register signed int h = inv_A2_modp%PrimeNumbers[i];
         d=mulmod(d,h,PrimeNumbers[i]);
         h=mulmod(SQRT_kN_of_PrimeNumbers[i],h,PrimeNumbers[i]);
         d=-d; d+=h; if (d<0) d+=PrimeNumbers[i];
         Delta_of_PrimeNumbers[i][0]=d;
         d-=h; if (d<0) d+=PrimeNumbers[i];
         d-=h; if (d<0) d+=PrimeNumbers[i];
         Delta_of_PrimeNumbers[i][1]=d;
#endif

#if 0 || defined(DEBUG)
         // we may want to check, whether above computations are valid:
         Polynom.get_values(Delta_of_PrimeNumbers[i][0],x,y);
         if (mpz_mod_ui(y,y,PrimeNumbers[i])!=0) 
	  { 
	    cerr << "Delta0 " << Delta_of_PrimeNumbers[i][0] << " doesn't divide for "
	         << PrimeNumbers[i] << "! (remainder: " << y << ")" << endl;
	    Delta_of_PrimeNumbers[i][0]-=mpz_get_ui(y);
	    if (Delta_of_PrimeNumbers[i][0]<0) Delta_of_PrimeNumbers[i][0]+=PrimeNumbers[i];
          }
         Polynom.get_values(Delta_of_PrimeNumbers[i][1],x,y);
         if (mpz_mod_ui(y,y,PrimeNumbers[i])!=0)
	  { 
	    cerr << "Delta1 " << Delta_of_PrimeNumbers[i][1] << " doesn't divide for "
	         << PrimeNumbers[i] << "! (remainder: " << y << ")" << endl;
	    Delta_of_PrimeNumbers[i][1]-=mpz_get_ui(y);
	    if (Delta_of_PrimeNumbers[i][1]<0) Delta_of_PrimeNumbers[i][1]+=PrimeNumbers[i];
	  }
#endif
       }
    }
  // it guaranteed here that i%2=0

  // factorbase: compute first sieve hit (unpaired part)
  for ( ; i<StaticFactorbase::Size(); ++i) // do the unpaired part
    {
      //inv_A2_modp=fastinvmod(Polynom.get_A2_mod(PrimeNumbers[i]),PrimeNumbers[i]); // slow version
      inv_A2_modp=fastinvmod_23bit(get_A2_mod_FBPrime(i),PrimeNumbers[i]); // fast version

#ifdef DEBUG
      if ( (inv_A2_modp != fastinvmod(get_A2_mod_FBPrime(i),PrimeNumbers[i]))
         || (inv_A2_modp != fastinvmod(Polynom.get_A2_mod(PrimeNumbers[i]),PrimeNumbers[i])) )
       {
         MARK; cerr << "Inconsistency detected! please review source code!" << endl;
         cerr << "i=" << i << endl;
         cerr << "inv_A2_modp=" << inv_A2_modp << endl;
         cerr << "but using method 1 we get: " << fastinvmod(Polynom.get_A2_mod(PrimeNumbers[i]),PrimeNumbers[i]) << endl;
         cerr << "and using method 2 we get: " << fastinvmod(get_A2_mod_FBPrime(i),PrimeNumbers[i]) << endl;
         exit(2);
       }
#endif

      PolyB=mpz_remainder_ui(Polynom.get_B(),PrimeNumbers[i]);
      
      register int d;
      d=SQRT_kN_of_PrimeNumbers[i]-PolyB; if (d<0) d+=PrimeNumbers[i];
      Delta_of_PrimeNumbers[i][0]=mulmod(d,inv_A2_modp,PrimeNumbers[i]);
      
      d=PrimeNumbers[i]-SQRT_kN_of_PrimeNumbers[i]-PolyB; if (d<0) d+=PrimeNumbers[i];
      Delta_of_PrimeNumbers[i][1]=mulmod(d,inv_A2_modp,PrimeNumbers[i]);

#if 0 || defined(DEBUG)
      // we may want to check, whether above computations are valid:
      Polynom.get_values(Delta_of_PrimeNumbers[i][0],x,y);
      if (mpz_mod_ui(y,y,PrimeNumbers[i])!=0) 
	{ 
	  cerr << "Delta0 " << Delta_of_PrimeNumbers[i][0] << " doesn't divide for "
	       << PrimeNumbers[i] << "! (remainder: " << y << ")" << endl;
	  Delta_of_PrimeNumbers[i][0]-=mpz_get_ui(y);
	  if (Delta_of_PrimeNumbers[i][0]<0) Delta_of_PrimeNumbers[i][0]+=PrimeNumbers[i];
	}
      Polynom.get_values(Delta_of_PrimeNumbers[i][1],x,y);
      if (mpz_mod_ui(y,y,PrimeNumbers[i])!=0)
	{ 
	  cerr << "Delta1 " << Delta_of_PrimeNumbers[i][1] << " doesn't divide for "
	       << PrimeNumbers[i] << "! (remainder: " << y << ")" << endl;
	  Delta_of_PrimeNumbers[i][1]-=mpz_get_ui(y);
	  if (Delta_of_PrimeNumbers[i][1]<0) Delta_of_PrimeNumbers[i][1]+=PrimeNumbers[i];
	}
#endif
    }


  for (int i=0; i<NumberOf_more_PrimePowers; ++i)
    {
      // compute Deltas for these numbers (which are powers of prime numbers of the factorbase)
      // (and sieve them together with the static factors)

      inv_A2_modp=fastinvmod(Polynom.get_A2_mod(PrimePowers[i]),PrimePowers[i]);
      PolyB=mpz_remainder_ui(Polynom.get_B(),PrimePowers[i]);

      register int d;
      d=SQRT_kN_of_PrimePowers[i]-PolyB; if (d<0) d+=PrimePowers[i];
      Delta_of_PrimePowers[i][0]=mulmod(d,inv_A2_modp,PrimePowers[i]);
      
      d=PrimePowers[i]-SQRT_kN_of_PrimePowers[i]-PolyB; if (d<0) d+=PrimePowers[i];
      Delta_of_PrimePowers[i][1]=mulmod(d,inv_A2_modp,PrimePowers[i]);
    }

  if (!DoReallyAll) goto done;


#if 1 /* Test!!!! 2004-03-09 */
  for (int i=FB_maxQuadrate+1; i<StaticFactorbase::Size() && PrimeNumbers[i]<25000; ++i)
    {
      if (MPQS_Multiplier%PrimeNumbers[i]!=0)
	{
          // For all/some "large" squares of the factorbase we determine the Deltas,
          // but these squares are sieved in conjunction with the dynamic factors.
          const unsigned int P = PrimeNumbers[i]*PrimeNumbers[i];
          const unsigned int wuq = SQRT_kN_of_PrimeSquares[i];
          inv_A2_modp=fastinvmod(Polynom.get_A2_mod(P),P);
          PolyB=mpz_remainder_ui(Polynom.get_B(),P);
      
          // Delta1
          signed int d = wuq>=PolyB ? wuq-PolyB : wuq-PolyB+P; // overflow?
          d=mulmod(d,inv_A2_modp,P);
          d+=LogicalSieveSize; d%=P; d-=LogicalSieveSize;
	  if (d<LogicalSieveSize) // value in interval?
	    {
	      TSieve_Delta sieb_delta;
	      sieb_delta.factor=PrimeNumbers[i];
	      sieb_delta.delta=d;
	      Sieve_Delta_HeapOfSquares.push(sieb_delta); // and store
	      //cout << "Square of " << sieb_delta.factor << " pushed!" << endl;
#if defined(DEBUG)
              Polynom.get_values(sieb_delta.delta,x,y);
              if (!mpz_divisible_ui_p(y,P))
               {
                 MARK; cerr << "wrong delta position for square!" << endl; exit(1);
               } //else { cout << "square of " << sieb_delta.factor << " pushed and okay (delta1)" << endl; };
#endif
	    }

	  // Delta2
          unsigned int h = wuq+PolyB; d=P-mulmod(h,inv_A2_modp,P); d+=LogicalSieveSize; d%=P; d-=LogicalSieveSize;
	  if (d<LogicalSieveSize) // value in interval?
	    {
	      TSieve_Delta sieb_delta;
	      sieb_delta.factor=PrimeNumbers[i];
	      sieb_delta.delta=d;
	      Sieve_Delta_HeapOfSquares.push(sieb_delta); // and store
	      //cout << "Square of " << sieb_delta.factor << " pushed!" << endl;
#if defined(DEBUG)
              Polynom.get_values(sieb_delta.delta,x,y);
              if (!mpz_divisible_ui_p(y,P))
               {
                 MARK; cerr << "wrong delta position for square!" << endl; exit(1);
               } //else { cout << "square of " << sieb_delta.factor << " pushed and okay (delta2)" << endl; };
#endif
	    }
	  
	}
    }
#endif



#ifdef SIEVING_LARGE_SQUARES
  for (int i=StaticFactorbase::Size()-1; i>FB_maxQuadrate && PrimeNumbers[i]>25000; --i)
    {
      if (MPQS_Multiplier%PrimeNumbers[i]!=0)
	{
          // For all "large" squares of the factorbase we determine the Deltas,
          // but these squares are sieved in conjunction with the dynamic factors.
	  mpz_set_ui(P,PrimeNumbers[i]); mpz_mul(P,P,P); // get square of the prime number
	  mpz_invert(inv_A2,Polynom.get_A2(),P);

          // now compute square roots modulo P (these values are not memorized; this would take too much space)
          // compute square roots of kN modulo Primzahl^2
	  const int Wu = SQRT_kN_of_PrimeNumbers[i];
	  mpz_set_ui(y,2*Wu);
	  if (mpz_invert(y,y,P)==0)
	    {
	      cerr << "PQ-root for " << PrimeNumbers[i] << ": inverse doesn't exist!" << endl;
	      exit(1);
	    }
	  
	  mpz_mod(x,kN,P); mpz_set_ui(wuq,Wu); mpz_mul_ui(wuq,wuq,Wu); mpz_sub(x,x,wuq);
	  mpz_mul(x,x,y); mpz_add_ui(x,x,Wu); mpz_mod(wuq,x,P);

#ifdef DEBUG
          // we may want to check, whether above computations are valid:
          mpz_mul(x,wuq,wuq); mpz_sub(x,kN,x); mpz_mod(x,x,P);
          if (mpz_cmp_ui(x,0)!=0)
           {
             cerr << "PQ-root incorrect!" << endl;
             cerr << "Primzahl=" << PrimeNumbers[i] << endl;
             cerr << "kN= " << kN << endl;
             cerr << "SQRT(kN) mod p=" << SQRT_kN_of_PrimeNumbers[i] << endl;
             cerr << "SQRT(kN) mod p^2=" << wuq << endl;
             cerr << "but we got (wrong value): " << x << endl;
             exit(1); 
           }
#endif
	  // and now the Deltas:

	  // Delta1
	  mpz_sub(x,wuq,Polynom.get_B()); mpz_mul(x,x,inv_A2); mpz_add_ui(x,x,LogicalSieveSize); mpz_mod(x,x,P);
	  mpz_sub_ui(x,x,LogicalSieveSize);
	  if (mpz_cmp_ui(x,LogicalSieveSize)<=0) // does the value lie in the interval?
	    {
	      TSieve_Delta sieb_delta;
	      sieb_delta.factor=PrimeNumbers[i];
	      sieb_delta.delta=mpz_get_si(x);
	      Sieve_Delta_HeapOfSquares.push(sieb_delta); // and store
	      //cout << "Square of " << sieb_delta.factor << " pushed!" << endl;
#ifdef DEBUG
              Polynom.get_values(sieb_delta.delta,x,y);
              if (!mpz_divisible_p(y,P))
               {
                 MARK; cerr << "wrong delta position for square!" << endl; exit(1);
               } //else { cout << "square of " << sieb_delta.factor << " pushed and okay (delta1)" << endl; };
#endif
	    }
	  
	  // Delta2
	  mpz_add(x,wuq,Polynom.get_B()); mpz_neg(x,x); mpz_mul(x,x,inv_A2); mpz_add_ui(x,x,LogicalSieveSize); mpz_mod(x,x,P);
	  mpz_sub_ui(x,x,LogicalSieveSize);
	  if (mpz_cmp_ui(x,LogicalSieveSize)<=0) // value inside interval?
	    {
	      TSieve_Delta sieb_delta;
	      sieb_delta.factor=PrimeNumbers[i];
	      sieb_delta.delta=mpz_get_si(x);
	      Sieve_Delta_HeapOfSquares.push(sieb_delta); // and store
	      //cout << "Square of " << sieb_delta.factor << " pushed!" << endl;
#ifdef DEBUG
              Polynom.get_values(sieb_delta.delta,x,y);
              if (!mpz_divisible_p(y,P))
               {
                 MARK; cerr << "wrong delta position for square!" << endl; exit(1);
               } //else { cout << "square of " << sieb_delta.factor << " pushed and okay (delta2)" << endl; };
#endif
	    }
	  
	}
    }
#endif

  

#ifdef SIEVING_MORE_LARGE_SQUARES
  // and push more squares outside of the static factorbase:
  {
    unsigned int Primzahl = biggest_Prime_in_Factorbase;
    const unsigned int UpperLimit = 1000000; // or maybe 4*biggest_Prime_in_Factorbase;
    cout << "pushing more prime squares up to " << UpperLimit << endl; 
    while (Primzahl<UpperLimit)
     {
       do // compute next prime number of the static factorbase
	{
	  do Primzahl+=2; 
           while( Primzahl%3==0 || Primzahl%5==0 || Primzahl%7==0
                  || Primzahl%11==0 || Primzahl%13==0 || Primzahl%17==0
                  || !probab_prime(Primzahl)); // next prime number
	  mpz_set_ui(P,Primzahl);
	}
       while (mpz_legendre(kN,P)<0); // prime number valid for factorbase?

       mpz_mul(P,P,P); // compute square
       mpz_invert(inv_A2,Polynom.get_A2(),P);

       // now compute square roots modulo P (these values are not memorized; this would take too much space)
       // compute square roots of kN modulo Primzahl^2
       const int Wu = SQRT_kN_mod_PrimeNumber(Primzahl);
       mpz_set_ui(y,2*Wu);
       if (mpz_invert(y,y,P)==0)
        {
          cerr << "PQ-root for " << Primzahl << ": inverse doesn't exist!" << endl;
          exit(1);
	}
	  
       mpz_mod(x,kN,P); mpz_set_ui(wuq,Wu); mpz_mul_ui(wuq,wuq,Wu); mpz_sub(x,x,wuq);
       mpz_mul(x,x,y); mpz_add_ui(x,x,Wu); mpz_mod(wuq,x,P);

#ifdef DEBUG
       // we may want to check, whether above computations are valid:
       mpz_mul(x,wuq,wuq); mpz_sub(x,kN,x); mpz_mod(x,x,P);
       if (mpz_cmp_ui(x,0)!=0)
        {
          cerr << "PQ-root incorrect!" << endl;
          cerr << "Primzahl=" << Primzahl << endl;
          cerr << "kN= " << kN << endl;
          cerr << "SQRT(kN) mod p=" << Wu << endl;
          cerr << "SQRT(kN) mod p^2=" << wuq << endl;
          cerr << "but we got (wrong value): " << x << endl;
          exit(1); 
        }
#endif
       // and now compute the Deltas:

       // Delta1
       mpz_sub(x,wuq,Polynom.get_B()); mpz_mul(x,x,inv_A2); mpz_add_ui(x,x,LogicalSieveSize); mpz_mod(x,x,P);
       mpz_sub_ui(x,x,LogicalSieveSize);
       if (mpz_cmp_ui(x,LogicalSieveSize)<=0) // value inside interval?
        {
          TSieve_Delta sieb_delta;
          sieb_delta.factor=Primzahl;
          sieb_delta.delta=mpz_get_si(x);
          Sieve_Delta_HeapOfSquares.push(sieb_delta); // and store

          // IMPORTANT: The prime number will be placed into Sieve_Delta_HeapOfSquares *and*
          // additionally into Sieve_Delta_Heap (if it is a square of a unknown/unresolved dynamic factor).
          // Please remember: Known dynamic factors will be sieved anyway, so sieving two times again
          // for squares should be avoided.
          // Doing this, we can assure, that
          //  - the value (factor instead of factor^2) fits into integer range,
          //  - the value is hitting twice (as square) inside the sieve.
          // The (possible) effect, that a square is counted with 3 hits remains still possible (whenever
          // a new dynamic factor is detected during sieving and deactivated FAKEHEAP), but this case
          // is unlikely and harmless. (In the worst case a non-relation will falsely be considered as
          // a relation and rejected afterwards.)
          if (!is_dynamic_factor(Primzahl)) Sieve_Delta_Heap.push(sieb_delta); // and store
#ifdef VERBOSE
          cout << "Pushed square of " << sieb_delta.factor << " (Delta1)!" << endl;
#endif
#ifdef DEBUG
          Polynom.get_values(sieb_delta.delta,x,y);
          if (!mpz_divisible_p(y,P))
           {
             MARK; cerr << "wrong delta position for square!" << endl; exit(1);
           } else { cout << "large square of " << Primzahl << " pushed and okay (delta1)" << endl; };
#endif
        }

       // Delta2
       mpz_add(x,wuq,Polynom.get_B()); mpz_neg(x,x); mpz_mul(x,x,inv_A2); mpz_add_ui(x,x,LogicalSieveSize); mpz_mod(x,x,P);
       mpz_sub_ui(x,x,LogicalSieveSize);
       if (mpz_cmp_ui(x,LogicalSieveSize)<=0) // value inside interval?
        {
          TSieve_Delta sieb_delta;
          sieb_delta.factor=Primzahl;
          sieb_delta.delta=mpz_get_si(x);
          Sieve_Delta_HeapOfSquares.push(sieb_delta); // and push into priority queue
          // see remark for Delta1
          if (!is_dynamic_factor(Primzahl)) Sieve_Delta_Heap.push(sieb_delta); // push into priority queue
#ifdef VERBOSE
          cout << "Pushed square of " << sieb_delta.factor << " (Delta2)!" << endl;
#endif
#ifdef DEBUG
          Polynom.get_values(sieb_delta.delta,x,y);
          if (!mpz_divisible_p(y,P))
           {
             MARK; cerr << "wrong delta position for square!" << endl; exit(1);
           } else { cout << "large square of " << Primzahl << " pushed and okay (delta2)" << endl; };
#endif
        }

     }
  }
#endif


#if defined(VERBOSE) || defined(DEBUG)
  if (!Sieve_Delta_HeapOfSquares.empty())
   cout << Sieve_Delta_HeapOfSquares.size() << " squares have been pushed." << endl;
#endif
  
  // and now do the same computations for dynamic factorbase:
  DynamicFactorArrays::compute_Deltas_for_DynamicFactors(-LogicalSieveSize);

done:
  mpz_clear(inv_A2); mpz_clear(P); mpz_clear(wuq); mpz_clear(x); mpz_clear(y);
 // cout << "Finished computation of Deltas." << endl;
}

namespace statistical_data
{
  extern bool StatusReport(const bool force_now = false);
}

void select_sieve_method();
void (*do_sieving)() = select_sieve_method;

void do_sieving_partial_intervals()
{
  const int ScanningRange = (5*LogicalSieveSize)/(16*PhysicalSieveSize);
  static signed int StartPSI[2]; // contains starting indices
  static unsigned int StopPSI[2]; // contains stopping indices
  static int* HelperTables[2][2] = { {NULL,NULL}, {NULL,NULL} };
  static int* SieveImages[1024] = { 0 };

  {
    // this blocks triggers recalibration of sieving thresholds
    // from time to time.
    static unsigned int counter = 1;
    if (--counter==0)
     { 
       counter=0x20000;
       SieveControl::RecalibrateThresholds();
       cout << "Sieving partial intervals." << endl;
       for (int i=0; i<2; ++i)
        {
          StartPSI[i]=StopPSI[i]=SieveControl::BestPSI[i];
          StartPSI[i]-=ScanningRange; StopPSI[i]+=ScanningRange;
          if (StartPSI[i]<0) StartPSI[i]=0;
          if (StopPSI[i]>=2*static_cast<unsigned int>(LogicalSieveSize)/PhysicalSieveSize)
           {
             cout << "CUTTING RANGE!" << endl;
             StopPSI[i]=2*LogicalSieveSize/PhysicalSieveSize-1;
           }
          CSieveStaticFactors::create_Tables_for_init_LocalDeltas(HelperTables[i][0],HelperTables[i][1],-LogicalSieveSize+PhysicalSieveSize*StartPSI[i]);
          cout << "Physical intervals " << StartPSI[i] << " to " << StopPSI[i] << " marked for sieving." << endl;
#if 0
          cout << "creating Threshold Sieve Image for Interval " << SieveControl::BestPSI[i] << endl;
          SieveControl::create_sieve_image(SieveImages[SieveControl::BestPSI[i]],-LogicalSieveSize+PhysicalSieveSize*SieveControl::BestPSI[i]);
          if (static_cast<unsigned int>(StartPSI[i]+1)<=SieveControl::BestPSI[i])
           {
             cout << "creating Threshold Sieve Image for Interval " << SieveControl::BestPSI[i]-1 << endl;
             SieveControl::create_sieve_image(SieveImages[SieveControl::BestPSI[i]-1],-LogicalSieveSize+PhysicalSieveSize*(SieveControl::BestPSI[i]-1));
           }
          if (SieveControl::BestPSI[i]+1<=StopPSI[i])
           {
             cout << "creating Threshold Sieve Image for Interval " << SieveControl::BestPSI[i]+1 << endl;
             SieveControl::create_sieve_image(SieveImages[SieveControl::BestPSI[i]+1],-LogicalSieveSize+PhysicalSieveSize*(SieveControl::BestPSI[i]+1));
           }
#elif 1
          cout << "creating Threshold Sieve Image for Interval " << SieveControl::BestPSI[i] << endl;
          SieveControl::create_sieve_image(SieveImages[SieveControl::BestPSI[i]],-LogicalSieveSize+PhysicalSieveSize*SieveControl::BestPSI[i]);
#endif

        }
     }
  }

  CDeltaComputations::recompute_all_Deltas(false);

  for (int i=0; i<2; ++i)
   {
     SieveOffset=-LogicalSieveSize+PhysicalSieveSize*StartPSI[i];
     SieveControl::PhysicalSieveIntervalIndex=StartPSI[i];
     CSieveStaticFactors::init_LocalDeltas(HelperTables[i][0],HelperTables[i][1]);
     while (SieveControl::PhysicalSieveIntervalIndex<=StopPSI[i])
      {
        //cout << "Sieving Interval: " << SieveOffset << endl;
        if (SieveImages[SieveControl::PhysicalSieveIntervalIndex<=StopPSI[i]])
          initialize_Sieve(SieveImages[SieveControl::PhysicalSieveIntervalIndex]);
        else
          initialize_Sieve();
        CSieveStaticFactors::do_sieving_StaticFactors(); // with primes out of the static factorbase
        do_scanning_Sieve();
        SieveOffset+=PhysicalSieveSize;
        SieveControl::PhysicalSieveIntervalIndex++;
      }
   }
  statistical_data::StatusReport(); // issue information to screen (don't remove this, it triggers some other stats, too!)

  // decide when to switch to complete interval sieving mode:
  if (DynamicFactorArrays::DYNFB_threshold<0.0) do_sieving=do_sieving_full_intervals;
  //if ( DynamicFactorRelations.size() > 8*static_cast<unsigned int>(StaticFactorbase::Size()) ) do_sieving=do_sieving_full_intervals;
}

void do_sieving_full_intervals()
{
  SieveOffset=-LogicalSieveSize;
  SieveControl::PhysicalSieveIntervalIndex=0;

#if 1
  {
    // this blocks triggers recalibration of sieving thresholds
    // from time to time.
    static unsigned int counter = 1;
    if (--counter==0)
     { 
       counter=0x20000;
       statistical_data::StatusReport(true); // issue information to screen (don't remove this, it triggers some other stats, too!)
       SieveControl::RecalibrateThresholds();
       cout << "Sieving complete intervals." << endl;
     }
  }
#endif
  
  CDeltaComputations::recompute_all_Deltas();
#ifdef USE_FAKEHEAP
  Sieve_Delta_Heap.sort();
#endif
  CSieveStaticFactors::init_LocalDeltas(CSieveStaticFactors::DefaultHelperTable[0],CSieveStaticFactors::DefaultHelperTable[1]);
  //CSieveStaticFactors::init_LocalDeltas(); // or use this deprecated function, if memory access is slower (due to cache pollution) than computing thousands of mulmods!
  while (SieveOffset<LogicalSieveSize)
   {
     initialize_Sieve();
     CSieveStaticFactors::do_sieving_StaticFactors(); // with primes out of the static factorbase
     do_sieving_Squares(); // (bigger) squares of primes of the static factorbase
     do_sieving_DynamicFactors(); // sieve with active single large primes (=dynamic factors)
     do_scanning_Sieve();
     SieveOffset+=PhysicalSieveSize;
     SieveControl::PhysicalSieveIntervalIndex++;
   }
  statistical_data::StatusReport(); // issue information to screen (don't remove this, it triggers some other stats, too!)
}


void select_sieve_method()
{
  cout << "QSieve sieve method selector called." << endl;

  // try calculate a good threshold for switching from partial sieving to complete sieving:
  // - the logical sieve interval is [-LogicalSieveSize ... LogicalSieveSize] -> *2
  // - two hits per prime number -> *2
  // - the larger the logical sieve interval, the less costs for switching polynomials -> 2^21 as reference interval size
  // - we want that the dynamic Factorbase has roughly the same quality as the static factorbase
  DynamicFactorArrays::DYNFB_threshold=4.0*2097152.0*StaticFactorbaseSettings::Size()/StaticFactorbaseSettings::BiggestPrime();

  if (mpz_sizeinbase(n,10)<60) // for small numbers we use full intervals!
   {
     do_sieving=do_sieving_full_intervals;
   }
  else // for large numbers we start with sieving partial intervals
   {
     do_sieving=do_sieving_partial_intervals;
     if (mpz_sizeinbase(n,10)<100) DynamicFactorArrays::DYNFB_threshold*=5.0; // high threshold avoids sieving complete intervals
     if (mpz_sizeinbase(n,10)>105) DynamicFactorArrays::DYNFB_threshold/=5.0; // low threshold (use partial intervals only to collect dynamic factors for starting the chain reaction)
   }
  do_sieving();
}
