// Modulo-Operations for unsigned int
// written by Thorsten Reinecke (1999-03-12)
// last change: 2003-12-29

// some of the following functions are necessary because a*b%m will
// return false results if a*b overflows unsigned-int-range.
// these routines are okay for unsigned-int-range/2

/*! @file
 * @brief
 * This file contains some number theoretic functions.
 *
 * The functions implemented in this file provide support for fast modulo
 * operations (multiplication, exponentiation,
 * computation of the modular inverse, quadratic residues)
 * and some integer functions (greatest common divisor, prime numbers).
 *
 * Most of these functions contain also optimized assembler code for
 * Intel Pentium and AMD Athlon processors.
 */


#ifndef MODULO_CC_INCLUDED
#define MODULO_CC_INCLUDED

#include <iostream>

//! number theoretic concepts
namespace numtheory
{

#ifndef ASM386

/*!
 * @param x a signed integer value
 * @param m an unsigned integer value
 * @result @p x mod @p m
 * @remark This function is a replacement for x%m. It normalizes
 *         the result to an nonnegative number.
 *         (On most architectures x%m is negative, if x is negative.) 
 */
inline unsigned int normalized_signed_mod(const signed int x, const int m)
{
  // dammed sign!!
  // x%m will not be normalized by default, but often you depend on it!
  // -1%2 -> -1 , 1%2 -> 1,
  // therefore (-1%2 != 1%2) but one surely expects -1=1 (mod 2) !!!
  return (x%m<0) ? (x%m)+m : x%m;
}

/*!
 * @param a an unsigned integer value with @p a < @p m
 * @param b an unsigned integer value with @p b < @p m
 * @param m an unsigned integer value
 * @result @p a * @p b mod @p m
 * @remark This function is often necessary to avoid
 *         integer overflows in the product.
 */
unsigned int mulmod(register unsigned int a, register unsigned int b, const unsigned int m)
{
  // assumes a<m, b<m !!
  // hint: multiplication runs faster if b<=a
  // returns (a*b) mod m
  //a%=m; b%=m; // (only necessary if a>=m or b>=m)
  register unsigned int x = (b&1) ? a : 0;
  while (b>>=1)
   {
    a<<=1; if (a>=m) a-=m;
    if (b&1) { x+=a; if (x>=m) x-=m; }
   }
  return x;
}

/*!
 * @param a an unsigned integer value with @p a < @p m
 * @param m an unsigned integer value
 * @result @p a * @p a mod @p m
 * @remark This function is often necessary to avoid
 *         integer overflows in the product, it should be slightly faster than
 *         the multiplication function.
 */
unsigned int squaremod(register unsigned int a, const unsigned int m)
{
  // assumes a<m !!
  // returns (a^2) mod m
  //a%=m; // only necessary if a>=m
  register unsigned int b=a;
  register unsigned int x = (b&1) ? a : 0;
  while (b>>=1)
   {
    a<<=1; if (a>=m) a-=m;
    if (b&1) { x+=a; if (x>=m) x-=m; }
   }
  return x;
}

#else

#warning "i386-assembler code optimizations enabled"
// i386 specific assembler code
// runs much faster :)


#ifdef ASM386_CMOV
#warning "using cmov-operation for normalized_signed_mod(const signed int,int)"
inline unsigned int normalized_signed_mod(const signed int x, const int m)
{
  // dammed sign!!
  // x%m will not be normalized by default, but often you depend on it!
  // -1%2 -> -1 , 1%2 -> 1,
  // therefore (-1%2 != 1%2) but one surely expects -1=1 (mod 2) !!!
  register unsigned int result;
  register unsigned int clobbered;
  asm ( \
   "cdq # prepare signed division \n\t"
   "idivl %[mod] # remainder -> edx \n\t" \
   "mov %%edx,%%eax \n\t" \
   "addl %[mod],%%edx # this is a positive value \n\t" \
   "test %%eax,%%eax \n\t" \
   "cmovns %%eax,%%edx # and now it is normalized (minimal) \n\t"
   : "=a" (clobbered), "=&d" (result) : "a" (x), [mod] "g" (m) : "cc");
  return result;
}
#else
inline unsigned int normalized_signed_mod(const signed int x, const int m)
{
  // dammed sign!!
  // x%m will not be normalized by default, but often you depend on it!
  // -1%2 -> -1 , 1%2 -> 1,
  // therefore (-1%2 != 1%2) but one surely expects -1=1 (mod 2) !!!
  register unsigned int result;
  register unsigned int clobbered;
  asm ( \
   "cdq # prepare signed division \n\t"
   "idivl %[mod] # remainder -> edx \n\t" \
   "testl %%edx,%%edx \n\t" \
   "jns 1f # signed? \n\t"
   "addl %[mod],%%edx # normalize to positive value \n\t"
   "1:"
   : "=a" (clobbered), "=&d" (result) : "a" (x), [mod] "g" (m) : "cc");
  return result;
}
#endif

inline unsigned int mulmod(const unsigned int a, const unsigned int b, const unsigned int m)
{
  // returns (a*b) mod m
  unsigned int x;
  register unsigned int clobbered;
  asm ( \
   "mull %[faktor2] # multiply the factors \n\t" \
   "divl %[mod] # remainder -> edx \n\t"
   : "=a" (clobbered), "=&d" (x) : "a" (a), [faktor2] "g" (b), [mod] "g" (m) : "cc");

  // IMPORTANT REMARK
  // If you request a register for an input operand and this register gets modified,
  // you have to tell GCC that this register is invalid for this operand after leaving the
  // assembler code...
  // You cannot put the register on the clobberlist (because GCC forbids operands to use
  // clobbered registers), 
  // instead you have to declare a dummy output operand that uses the same register...
  // Be careful: if all output operands are dummies, you have to declare the assembler code
  // as volatile!

  return x;
}

inline unsigned int squaremod(const unsigned int a, const unsigned int m)
{
  // returns (a^2) mod m
  unsigned int x;
  register unsigned int clobbered;
  asm ( \
   "mull %%eax # square the factor \n\t" \
   "divl %[mod] # remainder -> edx \n\t"
   : "=a" (clobbered), "=&d" (x) : "a" (a), [mod] "g" (m) : "cc");
  return x;
}

#endif

/*!
 * modular exponentiation
 * @param a base, an unsigned integer value
 * @param pow exponent, an unsigned integer value
 * @param m unsigned integer value
 * @result @p a ^ @p pow mod @p m
 */
unsigned int powmod(register unsigned int a, register unsigned int pow, const unsigned int m)
{
  // assumes a<m
  // returns (a^pow) mod m
  unsigned int x = (pow&1) ? a : 1;
  while (pow>>=1)
   {
     a=squaremod(a,m);
     if (pow&1) x=mulmod(x,a,m);
   }
  return x;
}

/*!
 * @param p unsigned integer to test for primilaty
 * @param base base for the strong probable prime test
 * @returns whether @p p is a strong probable prime regarding to @p base .
 */
bool strong_probab_prime(const unsigned int p, const unsigned int base)
{
  // assumes p>base>1 !!
  register unsigned int d=p-1;
  register unsigned int s=0;
  while (!(d&1U)) { d>>=1; ++s; }
  //cout << "d,s: " << d << "," << s << endl;
  if (s==0) return false;
  //cout << "p,d,s: " << p << "," << d << "," << s << endl;
  d=powmod(base,d,p);
  unsigned int x = d;
  while (s && x!=p-1) { x=squaremod(x,p); --s; } 
  //cout << "p,d,s,x: " << p << "," << d << "," << s << "," << x << endl;
  return (d==1 && x!=p-1) || (d!=1 && x==p-1);
}

/*!
 * @param p unsigned integer to check for primility
 * @result whether @p p is probably prime to the bases 2, 7 and 61.
 * 
 * @remark actually the result returns, whether @p p is prime,
 *         iff 61 < @p p < 4.759.123.141 (and no integer overflow occurs),
 *         as someone has checked these conditions beforehand.
 */
bool probab_prime(const unsigned int p)
{
 // assumes p>61 !!
 // actually p is prime if 61<p<4.759.123.141 and p is a probable prime for 2,7 and 61
 // (this has been checked by someone before).
 //
 // problem: mulmod(a,b,n) produces false results if highest
 //          significant bit in unsigned int overflows!
 return strong_probab_prime(p,2)
        && strong_probab_prime(p,7)
        && strong_probab_prime(p,61);
}


/*!
 * @param p unsigned integer to check for primility
 * @result whether @p p is a prime number
 * 
 * @remark This function is slow for large numbers, but safe for all numbers, since
 *         it is based on trial division.
 */
bool is_prime(register const int p)
{
  // slow for large p's, but safe for all p's
  if (p==2 || p==3) return true;
  if ((p%2==0) || (p%3==0)) return false;
  register int t = 5;
  while (t*t<=p) 
    {
      if ((p%t)==0) return false;
      t+=2;
      if ((p%t)==0) return false;
      t+=4;
    }
  return true;
}


/*!
 * @result greatest common divisor of @p a and @p b
 */
unsigned int gcd (register unsigned int a, register unsigned int b)
{
  // returns greatest common divisor
  while (a&&b) if (a>b) a%=b; else b%=a;
  return a ? a : b;
}

/*!
 * @param a must be an odd unsigned integer
 * @param b must be an odd unsigned integer
 * @result greatest common divisor of @p a and @p b
 * @remark This function should be somewhat faster than the normal gcd computation
 *         since divisions are replaced by shift operations.
 */
unsigned int oddgcd(register unsigned int a, register unsigned int b)
{
  // returns greatest (odd) common divisor
  if (a==0) return b;
  if (b==0) return a;
  do
   {
     //cout << a << "," << b << endl;
     while (1&a^1) a>>=1;
     while (1&b^1) b>>=1;
     if (a>b) a-=b;
     if (b>a) b-=a;
   } while (a!=b);
  //cout << a << endl;
  return a;
}

/*!
 * @param a an unsigned integer
 * @param b an unsigned integer
 * @result whether @p a and @p b are coprime (that is gcd(a,b)==1 )
 */
bool coprime(register unsigned int a, register unsigned int b)
{
  return ((a|b)&1) ? oddgcd(a,b)==1 : false;
}

/*!
 * @param a an integer value
 * @param b an odd unsigned integer value
 * @result Jacobi symbol (a/b) (which is undefined for even @p b)
 */
signed int jacobi (int a, unsigned int b)
{
  // returns jacobi(a/b), assumes b odd!

  if (a%b)
   {
     signed int x = 1;
     if (a<0)
      {
        a=-a;
        if ((b-1)&2) x=-x; // (-1/b)=(-1)^((b-1)/2)
      }
     while (a>1 && b>1)
      {
        a%=b; if (a==0) return 0;
        unsigned int twos = 0;
        while ((a&1)==0) { a>>=1; twos++; }
        if (twos&1) // only odd two's in a can change the sign!
         {
          // (2/b)=(-1)^((b^2-1)/8)
          twos=(b&15)*(b&15); // but note that overflow wouldn't hurt here...
          if (twos&8) x=-x;
         }
	// a and b odd natural numbers -> (a/b)=(-1)^( (a-1)/2 * (b-1)/2 ) * (b/a)
        if (2&a&b) x=-x;
        twos=a; a=b; b=twos; // swap loop again...
      }
     return x;
   }
  else return 0;   
}

/*!
 * @param a an integer value
 * @param p an odd prime number (as unsigned integer)
 * @result Legendre symbol (a/p), which is
 *          - 1, if @p a is a quadratic residue modulo @p p
 *          - -1, if @p a is not a quadratic residue modulo @p p
 *          - 0, if @p a and @p p are not coprime
 * @remark 
 *         - The result is only defined, if @p p is an odd prime number. You cannot rely on
 *           on any specific behaviour on this function, if this condition is not met!
 *         - For valid values the result is identical to that of the jacobi function.
 */
signed int legendre (signed int a, const unsigned int p)
{
  // returns legendre(a/p), assumes p being an odd prime!
  
  // warning: legendre is only defined for odd prime p.
  //          this function works for all odd p: 
  //          it will therefore return jacobi(a/p)
  // remember: undefined results can be runerrors, but
  //           an undefined result can be anything else, too.
  //           So, undefined means, you can't rely on a specific
  //           behaviour.

  //           For debugging reasons one may wish to check for
  //           odd prime input p.
#if 0
  if ( (p%2==0) || !is_prime(p) )
   {
     cerr << "error in legendre: " << p << " is not an odd prime!" << endl;
     exit(1);
   }
#endif
  return jacobi(a,p);
} 

/*!
 * @param x an unsigned integer value
 * @param m an unsigned integer value
 * @result the modular inverse of @p x mod @p m
 * @remark The result is only defined, if @p x and @p m are coprime.
 */
unsigned int invmod(const unsigned int x, const unsigned int m)
{
  // returns 1/x mod m (or error if no inverse exists)
  unsigned int a;
  signed int inv;

#ifndef ASM386
  unsigned int b = x; //(x>=m) ? x%m : x;
  signed int sw=1;
  a=m; inv=0;
  while (b)
   {
//     std::cout << "b=" <<  b << ", a=" << a; 
     unsigned int q=a/b; unsigned int r=a%b;
     a=b; b=r;
     signed h=inv-sw*q; inv=sw; sw=h;
//     std::cout << ", q=" << q << ", r=" << r << ", inv=" << inv << std::endl;
   }
  inv = (inv>=0) ? inv : m+inv;

#else

#ifdef ASM386_CMOV
// inline assembler code for AMD Athlon

unsigned int temp;

#warning "using cmov for invmod-function"
  asm volatile ( \
   "movl %[M],%%eax \n\t" \
   "movl %[X],%%ebx \n\t" \
   "movl $0,%[inv] # initialize inv \n\t" \
   "movl $1,%%ecx \n\t" \
   "1: # calc modular inverse\n\t" \
   "xorl %%edx,%%edx # prepare long div \n\t" \
   "divl %%ebx \n\t" \
   "movl %%ebx,%[tmp] \n\t" \
   "movl %%edx,%%ebx \n\t" \
   "imull %%ecx \n\t" \
   "subl %[inv],%%eax \n\t" \
   "movl %%ecx,%[inv] \n\t" \
   "movl %%eax,%%ecx \n\t" \
   "movl %[tmp],%%eax \n\t" \
   "negl %%ecx \n\t" \
   "testl $0xffff0000,%%ebx \n\t" \
   "jnz 1b \n\t" \
   "movl %%eax,%%edx \n\t" \
   "testl %%ebx,%%ebx \n\t" \
   "jz 9f \n\t" \
   "shrl $16,%%edx \n\t" \
   "cmpl %%ebx,%%edx \n\t" \
   "jae 1b # ext gcd loop (big operands 0:32/32) \n\t" \
   "movzx %%ax,%%eax \n\t" \
   "2: # calc modular inverse (small operands 16:16/16) \n\t" \
   "divw %%bx \n\t" \
   "movl %%ebx,%[tmp] \n\t" \
   "movl %%edx,%%ebx \n\t" \
   "imull %%ecx \n\t" \
   "subl %[inv],%%eax \n\t" \
   "movl %%ecx,%[inv] \n\t" \
   "xorl %%edx,%%edx \n\t" \
   "movl %%eax,%%ecx \n\t" \
   "movl %[tmp],%%eax \n\t" \
   "negl %%ecx \n\t" \
   "testb $0xff,%%bh \n\t" \
   "jnz 2b # ext gcd loop (small ops) \n\t" \
   "testb %%bl,%%bl \n\t" \
   "jz 9f \n\t" \
   "cmpb %%bl,%%ah \n\t" \
   "jae 2b \n\t" \
   "3: # calc modular inverse (byte operands 8:8/8 \n\t" \
   "divb %%bl \n\t" \
   "movb %%bl,%%bh \n\t" \
   "movb %%ah,%%bl \n\t" \
   "movzx %%al,%%eax \n\t" \
   "imull %%ecx \n\t" \
   "subl %[inv],%%eax \n\t" \
   "movl %%ecx,%[inv] \n\t" \
   "movl %%eax,%%ecx \n\t" \
   "movzx %%bh,%%eax \n\t" \
   "negl %%ecx \n\t" \
   "cmpb $1,%%bl \n\t" \
   "ja 3b # ext gcd loop (byte ops) \n\t" \
   "movzx %%bl,%%ebx \n\t" \
   "cmovel %%ecx,%[inv] \n\t" \
   "cmovel %%ebx,%%eax \n\t" \
   "9: # normalize result \n\t" \
   "movl %[M],%%edx \n\t" \
   "addl %[inv],%%edx \n\t" \
   "cmpl $0,%[inv] \n\t" \
   "cmovng %%edx,%[inv] \n"
   : "=&a" (a), [inv] "=&D" (inv), [tmp] "=&g" (temp) : [M] "g" (m), [X] "g" (x) : "ebx", "edx", "ecx", "cc");

#else /* "standard" ASM386 handoptimized inline assembler */
  asm volatile ( \
   "movl %[M],%%eax \n\t" \
   "movl %[X],%%ebx \n\t" \
   "movl $0,%[inv] # initialize inv \n\t" \
   "movl $1,%%ecx \n\t" \
   "1: # calc modular inverse\n\t" \
   "xorl %%edx,%%edx # prepare long div \n\t" \
   "divl %%ebx \n\t" \
   "pushl %%ebx \n\t" \
   "movl %%edx,%%ebx \n\t" \
   "imull %%ecx \n\t" \
   "movl %[inv],%%edx \n\t" \
   "subl %%eax,%%edx \n\t" \
   "movl %%ecx,%[inv] \n\t" \
   "movl %%edx,%%ecx \n\t" \
   "popl %%eax \n\t" \
   "testl %%ebx,%%ebx \n\t" \
   "jz 9f # fertig \n\t" \
   "testl $0xffff0000,%%eax \n\t" \
   "jnz 1b # ext gcd loop (big operands) \n\t" \
   "2: # calc modular inverse (small operands) \n\t" \
   "xorl %%edx,%%edx # prepare (word) div \n\t" \
   "divw %%bx \n\t" \
   "pushl %%ebx \n\t" \
   "movl %%edx,%%ebx \n\t" \
   "imull %%ecx \n\t" \
   "movl %[inv],%%edx \n\t" \
   "subl %%eax,%%edx \n\t" \
   "movl %%ecx,%[inv] \n\t" \
   "movl %%edx,%%ecx \n\t" \
   "popl %%eax \n\t" \
   "cmpl $1,%%ebx \n\t" \
   "ja 2b # ext gcd loop (small ops) \n\t" \
   "jb 9f \n\t" \
   "movl %%ecx,%[inv] \n\t" \
   "movl %%ebx,%%eax \n\t" \
   "9: # normalize result \n\t" \
   "cmpl $0,%[inv] \n\t" \
   "jg 2f \n\t" \
   "addl %[M],%[inv] \n\t" \
   "2: \n" \
   : "=&a" (a), [inv] "=&D" (inv) : [M] "g" (m), [X] "g" (x) : "ebx", "edx", "ecx", "cc");
#endif
#endif

  if (a!=1)
   {
     std::cerr << "WARNING! invmod: 1/" << x << " (mod " << m << ") does not exist" << std::endl;
     exit(1);
   }

#if 0 /* for debugging */
  if   (mulmod(x%m,inv,m)!=1)
   {
     std::cerr << "error in invmod: buggy?" << std::endl;
//     std::cout << "a= " << a << " inv= " << inv <<  std::endl;
//     std::cout << "x= " << x << " m= " << m <<  std::endl;
     exit(1);
   }
#endif

  return inv;
}


} // namespace numtheory

#endif /* MODULO_CC_INCLUDED */


#if 0

using namespace std;
using namespace numtheory;

int main()
{
#if 1
 while(true)
  {
    unsigned int x,m;
    cout << "x= " << flush; cin >> x;
    cout << "m= " << flush; cin >> m;
    cout << "coprime: " << coprime(x,m) << endl;
    unsigned int y;
    y=invmod(x,m);
    cout << y << " = 1/" << x << " (mod " << m << ")" << endl;
  }
#endif

#if 0
 signed int a,m;
  cout << "a= " << flush; cin >> a;
  cout << "m= " << flush; cin >> m;
//  cout << a%m << " <-> " << normalized_signed_mod(a,m) << endl;
  a=normalized_signed_mod(a,m);
  cout << a << endl;
#endif
  return 0;
}

#endif
