/*! @file
 * @brief
 * contains implementation for static factorbase
 */

#include "StaticFactorbase.H"
#include "Sieving.H"
#include <cmath>

StaticFactorbaseSettings::FBsizetype StaticFactorbaseSettings::Size_StaticFactorbase = 5000; // default size for factorbase
/*  
    Constraint for AMD-3DNow! specific code: Size_StaticFactorbase%2 == 0  (because of SIMD)
    Useful values are between 1000 and 50000 (depending on available memory and the number to factorize).
    Normally this value will be overwritten by a value given in the config file (see "tune_parameters").
*/

int StaticFactorbaseSettings::PrimeNumbers[StaticFactorbaseSettings::MaxSize] __attribute__ ((aligned (64)));
int StaticFactorbaseSettings::PrimeNumberReciprocals[StaticFactorbaseSettings::MaxSize] __attribute__ ((aligned (64)));
float StaticFactorbaseSettings::PrimeNumberFloatReciprocals[StaticFactorbaseSettings::MaxSize] __attribute__ ((aligned (64)));
int StaticFactorbaseSettings::biggest_Prime_in_Factorbase = 0; // will be initialized at runtine


int StaticFactorbase::NumberOf_more_PrimePowers = 0; // actual number (will be evaluated in runtime)
int StaticFactorbase::FB_maxQuadrate = 0; // actual number of squares to sieve with (will be evaluated in runtime)
int StaticFactorbase::PrimePowers[StaticFactorbase::max_additional_Powers];
int StaticFactorbase::PrimePowerReciprocals[StaticFactorbase::max_additional_Powers];
int StaticFactorbase::SQRT_kN_of_PrimeNumbers[MaxSize];
int StaticFactorbase::SQRT_kN_of_PrimePowers[StaticFactorbase::max_additional_Powers];
int StaticFactorbase::SQRT_kN_of_PrimeSquares[StaticFactorbase::MaxSize];



/*!
   Constraint for AMD-3DNow! specific code: abs(LogicalSieveSize)<=2^23 (because of float precision)

   In contrast to "PhysicalSieveSize", LogicalSieveSize is a value that is
   less dependent on the hardware. You should choose the (logical) SieveSize
   according to size of the number to factorize. The default value should be
   modified before a factorization is started.

   The relative rate of hits in the multiple polynomial quadratic sieve
   algorithm (MPQS) is higher for smaller intervals, but however, the cost
   of changing polynomials, initialization, and calculation of roots is
   higher, when polynomials are switched more often.
*/
int LogicalSieveSize = 1000000; // sieving interval will be [-LogicalSieveSize,LogicalSieveSize] for each MPQS polynomial


int MPQS_Multiplier; // multiplier for n (kN=MPQS_Multiplier*n), will be determined later!

using std::cin;
using std::cout;
using std::cerr;
using std::endl;
using std::flush;
using std::sqrt;
using namespace numtheory;

int check_SQRT_kN_mod_PrimeNumber(const int Primzahl)
{
  // can be called for debugging
  // (and to check, whether "SQRT_kN_mod_PrimeNumber" works correctly)
  int w1 = SQRT_kN_mod_PrimeNumber(Primzahl);
  if (w1>Primzahl/2) w1=Primzahl-w1;

  mpz_t x;
  mpz_init_set_ui(x,w1); mpz_mul(x,x,x); mpz_sub(x,x,kN);
  if (mpz_mod_ui(x,x,Primzahl)!=0)
    {
      MARK;
      cerr << "wrong result! " << w1 << " mod " << Primzahl << endl;
      exit(1);
    }
  mpz_clear(x);
  return w1;
}

// ------ pre-multiplier for MPQS -----------
#include "mpqsMultiplier.cc"
// ------------------------------------------



void StaticFactorbase::compute_StaticFactorbase()
{
#if defined(ASM_MMX) || defined(ASM_3DNOW) || defined(ASM_ATHLON) || defined(ASM_SSE) || defined(ASM_SSE2)
  // ATHLON specific sanity checks
  if ( LogicalSieveSize>(1<<23) || (StaticFactorbase::Size()&3) )
   {
     MARK;
     cerr << "Sorry! Processor specific constraints not met." << endl;
     cerr << "For using AMD-3DNow! or SSE optimized version we need:" << endl;
     cerr << "Constraint 1 (SIMD): (Size_StaticFactorbase&3)==0" << endl;
     cerr << "Constraint 2 (float precision): LogicalSieveSize<=(1<<23) [=8388608]" << endl;
     cerr << "Please edit ConfigFile according to these constraints" << endl;
     cerr << "OR use a version without these optimizations." << endl;
     exit(1);
   }
#endif

#ifdef VERBOSE_NOTICE
  cout << "Calculating static factorbase." << endl;
#endif
  PrimeNumbers[0]=-1; // for handling signess of the relation
  SieveControl::set_logVal_for_Primenumber(0,0,0); // sign has no logarithmic value

  int Primzahl=2;
  // in this implementation of MPQS the prime number 2 is a member of any
  // static factorbase (without exception!)
  PrimeNumbers[1]=2;
  SieveControl::set_logVal_for_Primenumber(1,static_cast<TSieveElement>(ceil(log_SieveEntryMultiplier*log(Primzahl))),static_cast<TSieveElement>(ceil(log_SieveEntryMultiplier*log(Primzahl))));

  Primzahl=1; // initial value (for the loop, expected step: +2)
  mpz_t x,y,wuqu;
  mpz_init(x); mpz_init(y); mpz_init(wuqu); // for computing qsuare root of prime powers
  for (int Primzahl_nr=2; Primzahl_nr<StaticFactorbase::Size(); Primzahl_nr++)
    {
      do // compute next prime number in static factorbase
	{
	  do Primzahl+=2; while(!is_prime(Primzahl)); // next prime number
	  mpz_set_ui(x,Primzahl);
	}
      while (mpz_legendre(kN,x)<0); // until prime number is member of factorbase
      
      PrimeNumbers[Primzahl_nr]=Primzahl;
      PrimeNumberReciprocals[Primzahl_nr]=reciprocal(Primzahl);
      PrimeNumberFloatReciprocals[Primzahl_nr]=static_cast<float>(1.0)/static_cast<float>(Primzahl);
      
      // needed for efficient sieving (spares repeated recomputing)
      SieveControl::set_logVal_for_Primenumber(Primzahl_nr,
              static_cast<TSieveElement>(ceil(log_SieveEntryMultiplier*log(Primzahl))),
              static_cast<TSieveElement>(ceil(log_SieveEntryMultiplier*log(Primzahl)*SieveControl::DirtyFactor(Primzahl_nr))));
      
      // needed to compute the Delta values for sieving
      SQRT_kN_of_PrimeNumbers[Primzahl_nr]=SQRT_kN_mod_PrimeNumber(Primzahl);
      
      // check, whether computations of square roots were correct:
      if ( squaremod(SQRT_kN_of_PrimeNumbers[Primzahl_nr],Primzahl) != mpz_remainder_ui(kN,Primzahl) )
       {
         cerr << "square root not correct!" << endl;
         cerr << SQRT_kN_of_PrimeNumbers[Primzahl_nr] << "^2 <> "
              << mpz_remainder_ui(kN,Primzahl) << " (mod " << Primzahl << ")" << endl;
         exit(1);
       }


      if ( Primzahl < sqrt(std::numeric_limits<int>::max()) && MPQS_Multiplier%Primzahl!=0 )
       {
          // *** compute square roots of kN mod Primzahl^2 ***
          const unsigned int P = Primzahl*Primzahl;
	  const unsigned int Wu = SQRT_kN_of_PrimeNumbers[Primzahl_nr];
          unsigned int iy = invmod(2*Wu,P);
          unsigned int ix = mpz_remainder_ui(kN,P);
          
	  unsigned int wuq=Wu*Wu;
          if (ix>=wuq) ix-=wuq; else ix=ix-wuq+P; // ix-=wuq; avoid overflow; we compute values modulo P
          ix=mulmod(ix,iy,P)+Wu;
          wuq = (ix<P) ? ix : ix-P; // this is a faster version for: wuq=ix%P

#if 1 || defined(DEBUG)
          if (mulmod(wuq,wuq,P)!=mpz_remainder_ui(kN,P))
           {
             cerr << "PQ-root incorrect!" << endl;
             cerr << "Primzahl=" << PrimeNumbers[Primzahl_nr] << endl;
             cerr << "kN= " << kN << endl;
	     cerr << mulmod(Wu,Wu,P) << " != " << mpz_remainder_ui(kN,P) << endl;
             exit(1); 
           }
#endif
          SQRT_kN_of_PrimeSquares[Primzahl_nr]=wuq;
       } else SQRT_kN_of_PrimeSquares[Primzahl_nr]=-1;


      const int Threshold = LogicalSieveSize < std::numeric_limits<int>::max()/8 ? 8*LogicalSieveSize/Primzahl : LogicalSieveSize/Primzahl;
      if (Primzahl<=Threshold && Primzahl>2 && mpz_remainder_ui(kN,Primzahl)!=0)
	// constraint for Primzahl to assure correct sieving with prime-powers: 
        //  - not too big, bigger than 2, and no divisior of kN
	{
	  FB_maxQuadrate=Primzahl_nr; // at the end of the loop this will be the limit, up to which we can sieve squares the normal way...

	  // compute square roots of kN modulo Primzahl^pot
	  // initial values:
	  int P_Potenz = Primzahl*Primzahl;
	  int Wu       = SQRT_kN_of_PrimeNumbers[Primzahl_nr];

          double Weight = Primzahl_nr<SieveControl::FBLowestStartIndex ? 2.0 : 1.0;
           // We do not sieve with the first couple of primes of the
           // factorbase (because these primes are very small and sieving
           // with them takes too long); as a minor correction of this
           // "dirty behaviour" the value of the smallest power of each dirty prime
           // can be adapted for sieving: log(P_Potenz)=Potenz*log(Primzahl).
           // All other powers are handled the usual way: since it is a recurrence of a known factor,
           // only log(Primzahl) has to be subtracted.

	  for (int pot=2; ;pot++)
	    {
	      //cout << "computing square root of " << Primzahl << "^" << pot << endl;

	      mpz_set_ui(y,2*Wu); mpz_set_ui(x,P_Potenz);
	      if (mpz_invert(y,y,x)==0)
		{ 
		  cerr << "PPot-root for " << Primzahl  << ": inverse doesn't exist!" << endl;
		  break;
		}

              if (NumberOf_more_PrimePowers>=StaticFactorbase::max_additional_Powers)
               { 
                 static char weitermachen = 'n';
                 if (weitermachen!='y')
                  {
	           cerr << "Overflow for " << Primzahl << "^" << pot << endl;
                   cerr << "Es kann nicht optimal mit Primzahlpotenzen gesiebt werden," << endl
                        << "da der hierfr reservierte Speicherplatz nicht ausreicht." << endl;
                   cerr << "Sie sollten \"StaticFactorbase::max_additional_Powers\" vergroessern!" << endl;
                   cerr << "Reserved memory for storing prime powers is too small..." << endl;
                   cerr << "sieving will be less efficient..." << endl;
                   cerr << "continue? (y/n) " << flush;
                   cin >> weitermachen;
                   if (weitermachen=='n') exit(1);
                  }
                 break; // avaoid overflow by breaking the loop
               }

	      mpz_mod_ui(x,kN,P_Potenz); mpz_set_ui(wuqu,Wu); mpz_mul_ui(wuqu,wuqu,Wu); mpz_sub(x,x,wuqu);
	      mpz_mul(x,x,y); mpz_add_ui(x,x,Wu); mpz_mod_ui(x,x,P_Potenz);
	      Wu = mpz_get_ui(x); if (Wu>P_Potenz-Wu) Wu=P_Potenz-Wu; // normalize to the "lower" of the two square roots
              // "Wu" is now the square root of kN (mod P_Potenz)

	      // check, whether computation was correct:
	      mpz_set_ui(x,Wu); mpz_mul(x,x,x); mpz_sub(x,kN,x); 
	      if (mpz_mod_ui(x,x,P_Potenz)!=0)
		{
		  cerr << "PPot-root incorrect!" << endl;
		  cerr << "Primzahl=" << PrimeNumbers[Primzahl_nr] << " Nr. " << Primzahl_nr << endl;
		  cerr << "kN= " << kN << endl;
		  cerr << "SQRT(kN) mod p=" << SQRT_kN_of_PrimeNumbers[Primzahl_nr] << endl;
		  cerr << "SQRT(kN) mod p^i=" << SQRT_kN_of_PrimePowers[NumberOf_more_PrimePowers] << endl;
		  cerr << "but we got a wrong value: " << x << endl;
		  exit(1);
		}

              // heuristics: Increase the weight of the last power a little bit,
              // since we have a slight chance, that this hit is a hit of higher powers, too.
              if (P_Potenz>Threshold) Weight+=4.0/Primzahl;

#if 1
              // heuristics: (determined by trial&error):
              // For factorizing "small" numbers: Sieving is significantly
              // faster, if sieving is skipped for smaller values;
              // the sieve gets more wide-meshed, but this doen't matter, since relations come plentiful...
              // For factorizing "bigger" numbers: If we sieve more carefully, we detect more DLP in the same time,
              // this effect is by far more important!
              if ( P_Potenz<250 && mpz_sizeinbase(kN,10)<70 ) Weight+=1.0;
              else
#endif
               {
	         PrimePowers[NumberOf_more_PrimePowers]=P_Potenz;
	         PrimePowerReciprocals[NumberOf_more_PrimePowers]=numtheory::reciprocal(P_Potenz);
	         SQRT_kN_of_PrimePowers[NumberOf_more_PrimePowers]=Wu;

		 SieveControl::set_logVal_for_Primepower(NumberOf_more_PrimePowers,
                   static_cast<TSieveElement>(ceil(log_SieveEntryMultiplier*Weight*log(Primzahl))));
                 Weight=1.0;
                 ++NumberOf_more_PrimePowers; // we got one more...
               }
	      
	      if (P_Potenz>Threshold) break; // next one would be too big...
	      P_Potenz*=Primzahl;
	    }
	}
    }
  mpz_clear(x); mpz_clear(y); mpz_clear(wuqu);

  biggest_Prime_in_Factorbase=Primzahl;
#ifdef VERBOSE_INFO
  cout << "prime numbers in static factorbase have been computed." << endl;
  if (Primzahl>=PhysicalSieveSize)
    cout << "Remark: Some members of the static factorbase are bigger than the sieve size!" << endl;

  cout << "The first 20 members of the static factorbase are" << endl;
  for (int i=0; i<20; i++) cout << StaticFactorbase::PrimeNumbers[i] << " "; cout << endl;
  cout << "Biggest prime in static factorbase: " << StaticFactorbase::BiggestPrime() << endl;
  cout << "#squares (of primes) in FB: " << StaticFactorbase::FB_maxQuadrate << endl;
  cout << "#powers (of primes) in FB: " << StaticFactorbase::NumberOf_more_PrimePowers << endl;
#endif
}
