#ifndef _fft_param_file
#define _fft_param_file

/*! @file
 * @brief
 * contains parameter sets and memory estimation routines for fft
 */


#if HAVE_LINUX_SYSINFO

// only guaranteed to work under Linux, since we read kernel-specific data
#include <sys/sysinfo.h>

unsigned long int allowed_memory_usage_KB(void)
{
  // returns the allowed memory that fft may comsume during operation.
  // only guaranteed to work under Linux, since we read kernel-specific data.

  // problem: between different calls of this function there may still exist
  // allocated space (up to several hundred megabytes!) for invariant
  // polynomials. For this reason we must add the amount of memory allocated by
  // the current process. This info can be obtained via "/proc/<pid>/mstat".

  const pid_t mypid = getpid();
  ostringstream ostr;
  ostr << "/proc/" << mypid << "/statm"; // filename to retrieve info: "/proc/<pid>/mstat"
  std::ifstream proc_file(ostr.str().c_str());

  unsigned long long int owned_ram = 0;
  proc_file >> owned_ram; // read in the memory pages currently allocated by this process
  owned_ram*=getpagesize(); // multplied with the pagesize to get the actual value in bytes

  struct sysinfo info;
  if (sysinfo(&info)!=0)
   {
     cerr << "call sysinfo failed; unable to get memory info" << endl;
     exit(1);
   }

  owned_ram/=info.mem_unit; // actual value in "memory units"

  static unsigned long long int initially_owned_ram = 0;
  if (initially_owned_ram==0) initially_owned_ram = owned_ram;

#if defined(VERBOSE_INFO)
  cout << "memory status info:" << endl; 
  cout << "memory unit: " << info.mem_unit << endl;
  cout << "total ram  : " << info.totalram << endl;
  cout << "free ram   : " << info.freeram << endl;
  cout << "free swap  : " << info.freeswap << endl;
  cout << "totalhigh  : " << info.totalhigh << endl;
  cout << "freehigh   : " << info.freehigh << endl;
  cout << "buffer ram : " << info.bufferram << endl;
  cout << "shared ram : " << info.sharedram << endl;
  cout << "owned ram  : " << owned_ram << endl;
#endif

  unsigned long int ram = info.freeram;
  if (ram<info.totalram/2)
    ram=info.totalram/2; // sorry, but we cannot detect cached mem that will
                         // eventually be freed if needed. But anyone who
                         // starts this program expects some usability.
                         // We have to be a little greedy...
  if (info.freeswap>ram-ram/4) ram+=ram/2; // a little bit of swapping should be allowed ;-)

  ram=ram+owned_ram-initially_owned_ram; // treat ram allocated by ourselves as free ram

  unsigned long int m = static_cast<unsigned long int>(static_cast<double>(ram) /1024.0 * static_cast<double>(info.mem_unit));
  return min(m,FFT_MAX_MEM_USAGE);
}
#else

#warning "linux free memory detection disabled!"
unsigned long int allowed_memory_usage_KB(void)
{
  return FFT_MAX_MEM_USAGE;
}
#endif


//! get parameters for phase 2 of Phi, Fibonacci, elliptic curves...
void get_fft_parameter(unsigned int &pg_returnvalue, unsigned int &pms_returnvalue,
                       const double phase2, const mpz_t N)
{
   // estimated memory usage depends on
   //  - given number N:
   //    for polynomial calculations we need to reduce results mod N;
   //    intermediate results are about N^2;
   //    -> memory usage per coefficient is approximately ld(N^2)=2*ld(N) bits
   //  - size of polynomial:
   //    each polynomial stores pg coefficients
   //    -> 2*ld(N)*pg bits
   //  - temporary memory for polynomial calculations:
   //    since we are operating with more than one polynomial
   //    and since most data structures produce some memory overhead
   //    we estimate the temporary space by using a fixed multiplier,
   //    -> 2*ld(N)*pg*fixed_multiplier bits
   //  - storage requested for recursive calls
   //     if intermediate results need to be stored in temporary memory
   //     during recursion, the storage requirements are typically
   //     proportial to the size of initial given data.
   //     However, if intermediate results are generated and
   //     need to be stored for later use (cf. fast multipoint polynomial
   //     evaluation schemes), the amount of storage needed can exceed
   //     a constant factor. We assume a logarithmic growth of the multiplier:
   //       recursion_depth * size_of_initial_data
   //
   // memory usage in Kilobytes (1KB = 1024 bytes= 8*1024 bits = 2^13 bits)
   //  -> 2*ld(N)*pg*fixed_multiplier *2^(-13) KB
   //   = (fixed_multiplier*ld(N)/4096)*pg KB

   const double fixed_multiplier = 2.3;
   const double mem_multiplier = fixed_multiplier/4096*mpz_sizeinbase(N,2); 


   // precomputing most common used differences:
   // pre_mul_size ist die Intervallgre mit der pro Polynom gesucht wird

     // Je mehr verschiedene, kleine Primfaktoren enthalten sind, desto
     // weniger teilerfremde Zahlen gibt es im Intervall -> desto geringer
     // sind also die Speicherkosten.
     // Bei unserer fft-Version gilt in etwa, dass phi*pre_mul_size ungefhr phase2
     // entsprechen sollte; d.h. das Polynom hat die Grenordnung phi und
     // greift ein Intervall der Gre pre_mul_size ab. Und #phi Funktionswerte lassen
     // sich dann schnell ber multipoint_eval ermitteln.
     // Nun ist die Polynomauswertung allerdings besonders effizient, wenn die
     // Polynome und die Anzahl der Punkte Zweierpotenzen sind.
     // Meiner Ansicht nach drfte es daher vorteilhaft sein, pre_mul_size so zu
     // whlen, dass es maximal wird fr ein phi, welches nahe unterhalb der
     // gewnschten Zweierpotenz liegt.
     // Da es nun nicht allzu viele praktikable Zweierpotenzen gibt, mit denen
     // wir arbeiten knnen (bei kleinen Zweierpotenzen ist die improved standard
     // continuation berlegen, bei zu groen Zweierpotenzen ist der Speicherbedarf
     // zu hoch), knnen wir geeignete Werte ber eine Tabelle vorgeben.
     // (Fr die Berechnung der Tabellenwerte siehe "fft_param.txt")
     /* Vorschlge wren 
        Polynomgrad    phi(pre_mul_size)     pre_mul_size             		       abgedeckter Suchraum
	2^8 =     256,        240,	  1050=(2)*(3)*(5)^2*(7),			            134.400
	2^9 =     512,        480,	  2310=(2)*(3)*(5)*(7)*(11),		                    591.360
	2^10=    1024,        960,	  4620=(2)^2*(3)*(5)*(7)*(11),		                  2.365.440
	2^11=    2048,	     1920,	  9240=(2)^3*(3)*(5)*(7)*(11),		                  9.461.760
	2^12=    4096,	     4032,       19110=(2)*(3)*(5)*(7)^2*(13),		                 39.137.280
	2^13=    8192,	     7680,       39270=(2)*(3)*(5)*(7)*(11)*(17),                       160.849.920
	2^14=   16384,      16128,       79170=(2)*(3)*(5)*(7)*(13)*(29),                       648.560.640
	2^15=   32768,      31680,      159390=(2)*(3)^2*(5)*(7)*(11)*(23),                   2.611.445.760
	2^16=   65536,      63360,      330330=(2)*(3)*(5)*(7)*(11)^2*(13),                  10.824.253.440
	2^17=  131072,	   126720,      690690=(2)*(3)*(5)*(7)*(11)*(13)*(23),               45.265.059.840
	2^18=  262144,     253440,     1381380=(2)^2*(3)*(5)*(7)*(11)*(13)*(23),            181.060.239.360
	2^19=  524288,     518400,     2852850=(2)*(3)*(5)^2*(7)*(11)*(13)*(19),            747.857.510.400
	2^20= 1048576,    1036800,     5705700=(2)^2*(3)*(5)^2*(7)*(11)*(13)*(19),        2.991.430.041.600
	2^21= 2097152,    2027520,    11741730=(2)*(3)*(5)*(7)*(11)*(13)*(17)*(23),      12.312.096.276.480
	2^22= 4194304,    4055040,    23483460=(2)^2*(3)*(5)*(7)*(11)*(13)*(17)*(23),    49.248.385.105.920
	2^23= 8388608,    8294400,    48498450=(2)*(3)*(5)^2*(7)*(11)*(13)*(17)*(19),   203.417.242.828.800
	2^24=16777216,   16588800,    96996900=(2)^2*(3)*(5)^2*(7)*(11)*(13)*(17)*(19), 813.668.971.315.200
     */

   const unsigned int pg_start = 256;
   static const unsigned int pms[]
    = { 1050, 2310, 4620, 9240, 19110, 39270, 79170, 159390, 330330, 690690,
        1381380, 2852850, 5705700, 11741730, 23483460, 48498450, 96996900,0 };


   // since we do pairing, our values vary a little bit...
   pg_returnvalue=pg_start; pms_returnvalue=2*pms[0]; // initial values (default)
   unsigned int estimated_mem_usage = 0;
   const unsigned int allowed_mem_usage = allowed_memory_usage_KB();
   for (unsigned int pg=pg_start,i=0; pms[i]!=0; pg*=2, ++i)
    {
      const unsigned int mem_usage = static_cast<unsigned int>(mem_multiplier*(8+3+i)*pg);
      const double grenze = static_cast<double>((pg/4)*3)*static_cast<double>(pms[i]);

#if defined(VERBOSE)
      cout << "pg=" << pg
           << " -> pms: " << pms[i]
           << ", estimated memory usage: " << mem_usage << "KB  (" << mem_usage/1024 << "MB)"
	   << endl;
#endif

      if ( phase2 > grenze && allowed_mem_usage>=mem_usage )
       {
         estimated_mem_usage=mem_usage;
         pg_returnvalue = pg; pms_returnvalue = 2*pms[i];
       }
    }

#if defined (VERBOSE_NOTICE)
   cout << "using pg=" << pg_returnvalue
        << " -> pms: " << pms_returnvalue << endl
        << "estimated memory usage: " << estimated_mem_usage << "KB (" << estimated_mem_usage/1024 << "MB)"
        << endl;
   cout << "allowed memory usage : " << allowed_mem_usage << "KB (" << allowed_mem_usage/1024 << "MB)"
        << endl;
#endif

   /* Am Rande:
      Bei 1 GB main memory und pg=262144 geht dem Rechner bereits der
      Speicher aus. (Der Speicherbedarf hngt natrlich auch von der Gre
      der zu faktorisierenden Zahl ab.) Wenn der Rechner nur im Hintergrund
      faktorisieren soll und Memory geswapt wird, ist das kontraproduktiv.
      -> Die Werte sind also individuell zu tunen.
         Die Funktion allowed_memory_usage_KB(void) kann z.B. entsprechend
         verfeinert werden.
      -> Die Speicherbedarfsabschtzung ist sehr grob; sie kann aber ohne eine
         genaue Analyse der unterliegenden Datenstrukturen und verwendeten
         Algorithmen nicht sehr viel besser sein.
         Wenn speicherbedarfsrelevante nderungen an den Algorithmen vorgenommen
         werden, kann eine Validierung und ggf. nderung der Abschtzungsroutine
         erforderlich werden.
   */

}

#endif
