// Congruency-Bitmatch (& Fermat Factorization)
// Version 1.0
// (C) 2005 by Thorsten Reinecke (thre@thorstenreinecke.de)

// this is *not* optimized for factorization, it's just an example
// for evaluating congruency problems...

// to compile it: gcc -Wall -O3 bitmatch.c -lgmp
// (you need the GNU multiple precision arithmetic library, see: http://www.swox.com/gmp/)


// be welcome to modify this program!
// if you have any nice ideas how to speedup the scan-function,
// or if you have significantly improved the function, please tell me...


#include <stdlib.h>
#include <stdio.h>
#include <gmp.h>

// do we want to count all wheel steps explicitly?
//#define VERIFY_STEPS

#define MAXcount 50
// prime table size ( = number of available wheels)

unsigned int count = 30;
// number of wheels to test

#define max 256
// maximum wheel size may not exceed this limit


// the more bits fit into "Bits", the faster is the scan...
// (but it must be an unsigned integer type that fits into a register!)
typedef unsigned int Bits;
const unsigned int K = sizeof(Bits)*8;

const unsigned int s[MAXcount]
 = { 3,5,7,11,13,         17,19,23,29,31,
     37,41,43,47,53,      59,61,67,71,73,
     79,83,89,97,101,     103,107,109,113,127,
     131,137,139,149,151, 157,163,167,173,179,
     181,191,193,197,199, 211,223,227,229,233 };


// you may define the wheels here,
// (but calling "initialize_fermat" clobbers this input...)
// however, it's much simpler to see what the algorithm is actually doing,
// if you can define the wheels by example ;-)
// the wheel n has s[n] positions that are marked (0 or 1);
// all wheels are turned simultaneously;
// the goal is to find the first position, where all wheel positions
// are marked with 1.
char Input[MAXcount][max] =
 { 
  { 1,0,0 },
  { 0,1,1,0,0 },
  { 0,0,1,0,1,0,0 },
  { 0,1,0,1,0,0,1,1,1,0,0 },
  { 0,0,1,1,1,0,0,1,0,0,1,1,0 },
  { 0,1,0,0,0,1,0,0,0,1,1,0,0,1,1,0,0 },
  { 0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0 },  
  { 1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 },
  { 1,0,1,1,1,0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,1 },
  { 1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1 }
 };

 
int pos[MAXcount] = { 0 };
Bits* v[MAXcount] = { NULL };
mpz_t N;

#ifdef VERIFY_STEPS
 mpz_t Steps;
#endif 


// number to factor using this method...
const char *N_str = "2264398678369051900618122549728617779130841750243238532254691036960782019423669379563375285008684958835635674673925450070505271283344382543843542224895386356360815190603619139250934963007414845206417644123726419587283103181869548764357702949545417973712846395177";

void fermat_test(const mpz_t D)
{
  mpz_t x,y;
  mpz_init(x); mpz_init(y);
  mpz_mul_ui(x,N,4); mpz_sqrt(x,x); mpz_add(x,x,D); mpz_div_ui(x,x,2);
  mpz_mul(y,x,x); mpz_sub(y,y,N);
  if (mpz_perfect_square_p(y))
   {
     mpz_sqrt(y,y); mpz_add(x,x,y);
     printf("Factor found: "); mpz_out_str(0,10,x); printf("\n");
     exit(0);
   }
  else printf("No factor found.\n");
 mpz_clear(x); mpz_clear(y);
}


void ChineseRemaindering(mpz_t r1, const unsigned int wheelpos[], const unsigned int count)
{  
  // instead of using an explicit counter, we can get the number of
  // processed steps via Chinese remaindering:
  // (we assume that m1 and m2 are coprime!)
  //  x = r1 (mod m1), x = r2 (mod m2)
  //  => x = k*m1 + r1 --> k=(r2-r1)*m1^(-1) (mod m2)
  //  => x = ((r2-r1)*m1^(-1) mod m2) *m1 + r1  (mod m1*m2)
  mpz_t x,r2,m1,m2;
  mpz_init(x); mpz_init(r2); mpz_init(m1); mpz_init(m2);
  mpz_set_ui(r1,wheelpos[0]); mpz_set_ui(m1,s[0]);
  unsigned int i;
  for (i=1; i<count; ++i)
   {
     mpz_set_ui(r2,wheelpos[i]); mpz_set_ui(m2,s[i]);
     mpz_sub(r2,r2,r1);
     mpz_invert(x,m1,m2); mpz_mul(x,x,r2); mpz_mod(x,x,m2); mpz_mul(x,x,m1);
     mpz_add(r1,r1,x); mpz_mul(m1,m1,m2);
     //printf("ChinRem, #%i: ",i); mpz_out_str(0,10,r1); printf("\n");
   }
  mpz_clear(x); mpz_clear(r2); mpz_clear(m1); mpz_clear(m2);
}


void scan()
{
  register int i;
  register Bits x;
  
  do
   {
     x = v[count-1][pos[count-1]];
     if (--pos[count-1]<0) pos[count-1]=s[count-1]-1;
     
     for (i=count-2; i>=0; --i)
      {
        x &= v[i][pos[i]];
        if (--pos[i]<0) pos[i]=s[i]-1;
      }

#ifdef VERIFY_STEPS    
    mpz_add_ui(Steps,Steps,K);
#endif
    
   } while (x==0);
   
  unsigned int bitoffset = 0;
  do
   {
      while ((x&1)==0) { bitoffset+=1; x>>=1; }
      x^=1;
      if (x) printf("Multiple positions found.\n"); 
      printf("Bitoffset: %i\n",bitoffset);
      
      printf("Wheel positions:\n");
      unsigned int wheelpos[count];
      for (i=0; i<count; ++i)
       {
         wheelpos[i]=s[i]-1-(K*((pos[i]+1)%s[i]) + K*s[i] - bitoffset)%s[i];
         printf("%i ", wheelpos[i]);
       }
      printf("\n");
      
      // instead of using an explicit counter, we can get the number of
      // processed steps via Chinese remaindering:
      mpz_t S;
      mpz_init(S);
      ChineseRemaindering(S,wheelpos,count);
      printf("Number of steps (ChinRem): "); mpz_out_str(0,10,S); printf("\n");
      fermat_test(S);
      
    #ifdef VERIFY_STEPS  
      { 
        mpz_t d;
        mpz_init_set(d,Steps); 
        mpz_sub_ui(d,d,K); mpz_sub_ui(d,d,1);
        mpz_add_ui(d,d,bitoffset);
        printf("Number of steps (verify): "); mpz_out_str(0,10,d); printf("\n");
        printf("Wheel positions (verify):\n");
        for (i=0; i<count; ++i) printf("%li ", mpz_fdiv_ui(d,s[i]));
        printf("\n");
        
        if (mpz_cmp(d,S)!=0)
         {
           printf("Mismatch! Please use more wheels...\n");
           // most probably the mismatch is caused by an overflow;
           // Chinese remaindering works for 0 <= S < s[0]*s[1]*...*s[count-1]
           exit(1);
         }
        mpz_clear(d);
      }
    #endif
    
    mpz_clear(S); 
  } while(x);
}


void prepare_scan()
{
  unsigned int i,j,k;
  
  // allocate memory for the wheels
  k=0;
  for (i=0; i<count; ++i) k+=s[i];
  v[0] = calloc(k,sizeof(Bits));
  // the wheels are concatenated
  // v[0][0..s[0]-1],v[1][0..s[1]-1],...,v[count-1][0..s[count-1]-1]
  //  -> v[i+1]=&v[i+1][0]=&v[i][s[i]] for i=0..count-2
  for (i=0; i<count-1; ++i) v[i+1]=&v[i][s[i]];
  
  // convert upwards -> downwards
  // and put K bits together for processing them in one step
  for (i=0; i<count; ++i)
   for (j=0; j<s[i]; ++j)
    {
      register Bits x = 0;
      for (k=0; k<K; ++k) x |= Input[i][(K*(s[i]-j)-1+k)%s[i]] ? 1<<k : 0;
      v[i][j]=x;
    }
#ifdef VERIFY_STEPS    
  mpz_init_set_ui(Steps,0);
#endif  
}

void initialize_fermat()
{
  mpz_init(N);

  int i;
  do
   {
     printf("please input an odd number to seed the wheels...\n");
     printf("... or 0 for default number\n");
     i=mpz_inp_str(N,0,0);
     if (mpz_cmp_ui(N,0)==0) mpz_set_str(N,N_str,10);
   } while (i==0 || mpz_even_p(N));
  
  printf("trying to factorize "); mpz_out_str(0,10,N);
  printf(" by finding a bitmatch congruency.\n");
  
  mpz_t t,D,Start;
  mpz_init(t); mpz_init(D); mpz_init(Start);
  
  mpz_mul_ui(Start,N,4); mpz_sqrt(Start,Start);
  
  for (i=0; i<count; ++i)
   {
     mpz_set_ui(t,s[i]);
     unsigned int j;
     const unsigned int Nt = mpz_fdiv_ui(N,s[i]);
     const unsigned int St = mpz_fdiv_ui(Start,s[i]);
     for (j=0; j<s[i]; ++j)
      {
        unsigned int h = (St+j)%s[i];
        if (h&1) h+=s[i];
        h/=2;
        mpz_set_ui(D,h*h);
        mpz_sub_ui(D,D,Nt);
        // now D = (h/2)^2 - N  (mod t)
        Input[i][j] = (mpz_jacobi(D,t)==-1) ? 0 : 1;
      }
   }

  mpz_clear(t); mpz_clear(D); mpz_clear(Start);
}


int main()
{
  printf("Congruency Bitmatch\n");
  do
   {
     printf("How many wheels should be used? 2-%i\n",MAXcount);
     scanf("%u",&count);
   } while (count<2 || count>MAXcount);
  printf("okay, turning %u wheels...\n",count);
  
  initialize_fermat();
  prepare_scan();

#if 1
  while(1) scan();
#else
  scan();  
#endif
  
  return 0;
}

