/* genefer.c -- 1.3 A program for finding large probable generalized Fermat primes. Copyright (C) 2001-2002, Yves GALLOT, galloty@wanadoo.fr GENEFER is free source code. You can redistribute, use and/or modify it. Thank you to give a feedback to the author if improvement is realized. It is distributed in the hope that it will be useful. */ #include #include #include #include #include #if defined(_MSC_VER) # include #endif #if (defined(_MSC_VER) && defined(_M_IX86)) # include #endif #if !defined(M_PI) # define M_PI 3.1415926535897932364626 #endif #if !defined(CONST) #define CONST const #endif typedef struct { double x, y; } Complex; #if defined(_MSC_VER) typedef __int32 Int32; typedef unsigned __int32 UInt32; typedef unsigned __int64 UInt64; #else typedef int Int32; typedef unsigned int UInt32; typedef unsigned long long UInt64; #endif /* depend on cache size */ #if !defined(FOUR_STEP_LIMIT) # define FOUR_STEP_LIMIT 2048 #endif /* depend on length and number of cache lines */ #if !defined(GAP) # define GAP 4 #endif #define BLK_SIZE 16 static Complex e0[FOUR_STEP_LIMIT], e1[2*FOUR_STEP_LIMIT], e2[FOUR_STEP_LIMIT]; static Complex * e3; static float maxErr; #define C5152 6755399441055744.0 #define C6263 13835058055282163712.0 /* optimize this code for your compiler and processor */ #if (defined(_MSC_VER) && defined(_M_IX86)) # define VERSION "x86" # define round(x) ((x + C6263) - C6263) # define B_MAX 35000000 #elif (defined(__sparc__) || defined(sparc) || defined(__sparc)) # define VERSION "Sparc" # define round(x) ((x + C5152) - C5152) # define B_MAX 30000000 #elif (defined(__alpha__) || defined(alpha) || defined(__alpha)) # define VERSION "Alpha" # define round(x) ((x + C5152) - C5152) # define B_MAX 30000000 #elif (defined(__POWERPC__) || defined(POWERPC) || defined(__POWERPC)) # define VERSION "PowerPC" # define round(x) ((x + C5152) - C5152) # define B_MAX 30000000 #elif (defined(__mips) || defined(mips) || defined(__mips)) # define VERSION "MIPS" # define round(x) ((x + C5152) - C5152) # define B_MAX 30000000 #else # define VERSION "standard" # define round(x) ((x >= 0) ? floor(x + 0.5) : ceil(x - 0.5)) # define B_MAX 30000000 #endif #if (defined(_MSC_VER) && defined(_M_IX86)) # define myAlloc(n) _aligned_malloc(n, 64) # define myFree(x) _aligned_free(x) #else # define myAlloc(n) malloc(n) # define myFree(x) free(x) #endif static UInt32 lpt(CONST UInt32 n) /* least power of two greater than n */ { UInt32 i; for (i = 1; i < n; i *= 2); return i; } static UInt32 ilog(CONST UInt32 n) /* floor log base 2 */ { UInt32 r, i; for (r = 0, i = n; i != 1; i /= 2, ++r); return r; } static unsigned int lg(CONST UInt32 * CONST a, CONST unsigned int len) { UInt32 x; unsigned int l = (len - 1) * 8 * sizeof(UInt32) - 1; for (x = a[len-1]; x != 0; x /= 2) ++l; return l; } static UInt32 mul_1_add_n(UInt32 * CONST a, CONST UInt32 n, CONST UInt32 * CONST b, CONST unsigned int len) { unsigned int i; UInt64 l = 0; for (i = 0; i != len; ++i) { l += b[i] * (UInt64)n + a[i]; a[i] = (UInt32)l; l >>= 32; } return (UInt32)l; } static void initExp(CONST unsigned int n1, CONST unsigned int n2) { unsigned int i, j, k, l; Complex * ePtr; CONST double twoPi_n = 2 * M_PI / (n1 * n2); e3 = (Complex *)myAlloc(n2 * (BLK_SIZE + n1 / BLK_SIZE) * sizeof(Complex)); for (j = 1; j != n2; ++j) { /* compute BitRev */ for (i = n2 >> 1, k = 0, l = j; i != 0; i >>= 1, l >>= 1) k += k + (l & 1); ePtr = &e3[k * (BLK_SIZE + n1 / BLK_SIZE)]; for (i = 0; i != BLK_SIZE; ++i) { ePtr->x = cos(twoPi_n * j * i); ePtr->y = sin(twoPi_n * j * i); ++ePtr; } for (i = 0; i != n1; i += BLK_SIZE) { ePtr->x = cos(twoPi_n * j * i); ePtr->y = sin(twoPi_n * j * i); ++ePtr; } } } static void FFTinit(CONST unsigned int n, CONST unsigned int n2) { unsigned int i; CONST double twoPi_n = 2 * M_PI / n; double theta; e0[0].x = 0.5 * sqrt(2.0); for (i = 1; i != n / 4; ++i) { theta = twoPi_n * i; e0[3*i + 0].x = cos(1*theta); e0[3*i + 0].y = sin(1*theta); e0[3*i + 1].x = cos(2*theta); e0[3*i + 1].y = sin(2*theta); e0[3*i + 2].x = cos(3*theta); e0[3*i + 2].y = sin(3*theta); } if (n2 != 0) initExp(n, n2); } static void mulExp(Complex * CONST z, CONST unsigned int n, CONST unsigned int nsc) { unsigned int i, j; double c, s, x, y; Complex * zPtr = &z[1]; CONST Complex * ebPtr = &e3[nsc + 1]; CONST Complex * eaPtr = &ebPtr[BLK_SIZE]; for (j = BLK_SIZE - 1; j != 0; --j) { x = ebPtr->x * zPtr->x + ebPtr->y * zPtr->y; y = ebPtr->x * zPtr->y - ebPtr->y * zPtr->x; ++ebPtr; zPtr->x = x; zPtr->y = y; ++zPtr; } ebPtr -= BLK_SIZE - 1; for (i = n / BLK_SIZE - 1; i != 0; --i) { x = eaPtr->x * zPtr->x + eaPtr->y * zPtr->y; y = eaPtr->x * zPtr->y - eaPtr->y * zPtr->x; zPtr->x = x; zPtr->y = y; ++zPtr; for (j = BLK_SIZE - 1; j != 0; --j) { c = eaPtr->x * ebPtr->x - eaPtr->y * ebPtr->y; s = eaPtr->x * ebPtr->y + eaPtr->y * ebPtr->x; ++ebPtr; x = c * zPtr->x + s * zPtr->y; y = c * zPtr->y - s * zPtr->x; zPtr->x = x; zPtr->y = y; ++zPtr; } ebPtr -= BLK_SIZE - 1; ++eaPtr; } } static void FFTsquareFFT(Complex * CONST z, CONST unsigned int n, CONST unsigned int nsc) { unsigned int i, j, m, l; Complex * zPtr, * z2Ptr; CONST Complex * ePtr; double x000, y000, x001, y001, x002, y002, x003, y003, x010, y010; double x100, y100, x101, y101, x102, y102, x103, y103, x110, y110; if (nsc != 0) mulExp(z, n, nsc); m = n / 4, l = 3; for (; m > 2; m /= 4, l *= 4) { zPtr = &z[0]; z2Ptr = &z[2*m]; for (i = l; i != 0; i -= 3) { x000 = zPtr[0].x + z2Ptr[0].x; x002 = zPtr[m].x + z2Ptr[m].x; y000 = zPtr[0].y + z2Ptr[0].y; y002 = zPtr[m].y + z2Ptr[m].y; x001 = zPtr[0].x - z2Ptr[0].x; y003 = zPtr[m].y - z2Ptr[m].y; y001 = zPtr[0].y - z2Ptr[0].y; x003 = zPtr[m].x - z2Ptr[m].x; zPtr[0].x = x000 + x002; zPtr[0].y = y000 + y002; zPtr[m].x = x000 - x002; zPtr[m].y = y000 - y002; z2Ptr[0].x = x001 + y003; z2Ptr[0].y = y001 - x003; z2Ptr[m].x = x001 - y003; z2Ptr[m].y = y001 + x003; ePtr = &e0[l]; x100 = zPtr[1].x + z2Ptr[1].x; x102 = zPtr[m + 1].x + z2Ptr[m + 1].x; y100 = zPtr[1].y + z2Ptr[1].y; y102 = zPtr[m + 1].y + z2Ptr[m + 1].y; x101 = zPtr[1].x - z2Ptr[1].x; y103 = zPtr[m + 1].y - z2Ptr[m + 1].y; y101 = zPtr[1].y - z2Ptr[1].y; x103 = zPtr[m + 1].x - z2Ptr[m + 1].x; zPtr[1].x = x100 + x102; zPtr[1].y = y100 + y102; x100 -= x102; y100 -= y102; x110 = x101 + y103; y110 = y101 - x103; x101 -= y103; y101 += x103; zPtr[m + 1].x = ePtr[1].x * x100 + ePtr[1].y * y100; zPtr[m + 1].y = ePtr[1].x * y100 - ePtr[1].y * x100; z2Ptr[1].x = ePtr[0].x * x110 + ePtr[0].y * y110; z2Ptr[1].y = ePtr[0].x * y110 - ePtr[0].y * x110; z2Ptr[m + 1].x = ePtr[2].x * x101 + ePtr[2].y * y101; z2Ptr[m + 1].y = ePtr[2].x * y101 - ePtr[2].y * x101; zPtr += 2; z2Ptr += 2; ePtr += l; for (j = m - 2; j != 0; j -= 2) { x000 = zPtr[0].x + z2Ptr[0].x; x002 = zPtr[m].x + z2Ptr[m].x; y000 = zPtr[0].y + z2Ptr[0].y; y002 = zPtr[m].y + z2Ptr[m].y; x001 = zPtr[0].x - z2Ptr[0].x; y003 = zPtr[m].y - z2Ptr[m].y; y001 = zPtr[0].y - z2Ptr[0].y; x003 = zPtr[m].x - z2Ptr[m].x; zPtr[0].x = x000 + x002; zPtr[0].y = y000 + y002; x000 -= x002; y000 -= y002; x010 = x001 + y003; y010 = y001 - x003; x001 -= y003; y001 += x003; zPtr[m].x = ePtr[1].x * x000 + ePtr[1].y * y000; zPtr[m].y = ePtr[1].x * y000 - ePtr[1].y * x000; z2Ptr[0].x = ePtr[0].x * x010 + ePtr[0].y * y010; z2Ptr[0].y = ePtr[0].x * y010 - ePtr[0].y * x010; z2Ptr[m].x = ePtr[2].x * x001 + ePtr[2].y * y001; z2Ptr[m].y = ePtr[2].x * y001 - ePtr[2].y * x001; x100 = zPtr[1].x + z2Ptr[1].x; x102 = zPtr[m + 1].x + z2Ptr[m + 1].x; y100 = zPtr[1].y + z2Ptr[1].y; y102 = zPtr[m + 1].y + z2Ptr[m + 1].y; x101 = zPtr[1].x - z2Ptr[1].x; y103 = zPtr[m + 1].y - z2Ptr[m + 1].y; y101 = zPtr[1].y - z2Ptr[1].y; x103 = zPtr[m + 1].x - z2Ptr[m + 1].x; zPtr[1].x = x100 + x102; zPtr[1].y = y100 + y102; x100 -= x102; y100 -= y102; x110 = x101 + y103; y110 = y101 - x103; x101 -= y103; y101 += x103; zPtr[m + 1].x = ePtr[l + 1].x * x100 + ePtr[l + 1].y * y100; zPtr[m + 1].y = ePtr[l + 1].x * y100 - ePtr[l + 1].y * x100; z2Ptr[1].x = ePtr[l + 0].x * x110 + ePtr[l + 0].y * y110; z2Ptr[1].y = ePtr[l + 0].x * y110 - ePtr[l + 0].y * x110; z2Ptr[m + 1].x = ePtr[l + 2].x * x101 + ePtr[l + 2].y * y101; z2Ptr[m + 1].y = ePtr[l + 2].x * y101 - ePtr[l + 2].y * x101; ePtr += 2*l; zPtr += 2; z2Ptr += 2; } zPtr += 3*m; z2Ptr += 3*m; } } if (m == 2) { zPtr = &z[0]; x010 = e0[0].x; for (i = n; i != 0; i -= 8) { x000 = zPtr[0].x - zPtr[4].x; y000 = zPtr[0].y - zPtr[4].y; zPtr[0].x += zPtr[4].x; zPtr[0].y += zPtr[4].y; zPtr[4].x = x000; zPtr[4].y = y000; x001 = x010 * (zPtr[1].x - zPtr[5].x); y001 = x010 * (zPtr[1].y - zPtr[5].y); zPtr[1].x += zPtr[5].x; zPtr[1].y += zPtr[5].y; zPtr[5].x = x001 + y001; zPtr[5].y = y001 - x001; x002 = zPtr[6].x - zPtr[2].x; y002 = zPtr[2].y - zPtr[6].y; zPtr[2].x += zPtr[6].x; zPtr[2].y += zPtr[6].y; zPtr[6].x = y002; zPtr[6].y = x002; x003 = x010 * (zPtr[7].x - zPtr[3].x); y003 = x010 * (zPtr[7].y - zPtr[3].y); zPtr[3].x += zPtr[7].x; zPtr[3].y += zPtr[7].y; zPtr[7].x = x003 - y003; zPtr[7].y = y003 + x003; zPtr += 8; } l *= 2; } zPtr = &z[0]; for (i = n; i != 0; i -= 8) { x000 = zPtr[0].x + zPtr[2].x; x002 = zPtr[1].x + zPtr[3].x; y000 = zPtr[0].y + zPtr[2].y; y002 = zPtr[1].y + zPtr[3].y; x001 = zPtr[0].x - zPtr[2].x; y003 = zPtr[1].y - zPtr[3].y; y001 = zPtr[0].y - zPtr[2].y; x003 = zPtr[1].x - zPtr[3].x; zPtr[0].x = x000 + x002; zPtr[0].y = y000 + y002; zPtr[1].x = x000 - x002; zPtr[1].y = y000 - y002; zPtr[2].x = x001 + y003; zPtr[2].y = y001 - x003; zPtr[3].x = x001 - y003; zPtr[3].y = y001 + x003; x100 = zPtr[4].x + zPtr[6].x; x102 = zPtr[5].x + zPtr[7].x; y100 = zPtr[4].y + zPtr[6].y; y102 = zPtr[5].y + zPtr[7].y; x101 = zPtr[4].x - zPtr[6].x; y103 = zPtr[5].y - zPtr[7].y; y101 = zPtr[4].y - zPtr[6].y; x103 = zPtr[5].x - zPtr[7].x; zPtr[4].x = x100 + x102; zPtr[4].y = y100 + y102; zPtr[5].x = x100 - x102; zPtr[5].y = y100 - y102; zPtr[6].x = x101 + y103; zPtr[6].y = y101 - x103; zPtr[7].x = x101 - y103; zPtr[7].y = y101 + x103; zPtr += 8; } zPtr = &z[0]; for (i = n; i != 0; i -= 8) { x000 = zPtr[0].x + zPtr[0].y; y000 = zPtr[0].x - zPtr[0].y; x001 = zPtr[1].x + zPtr[1].y; y001 = zPtr[1].x - zPtr[1].y; x002 = zPtr[2].x + zPtr[2].y; y002 = zPtr[2].x - zPtr[2].y; x003 = zPtr[3].x + zPtr[3].y; y003 = zPtr[3].x - zPtr[3].y; zPtr[0].y *= -2 * zPtr[0].x; zPtr[0].x = x000 * y000; zPtr[1].y *= 2 * zPtr[1].x; zPtr[1].x = x001 * y001; zPtr[2].y *= 2 * zPtr[2].x; zPtr[2].x = x002 * y002; zPtr[3].y *= 2 * zPtr[3].x; zPtr[3].x = x003 * y003; x100 = zPtr[4].x + zPtr[4].y; y100 = zPtr[4].x - zPtr[4].y; x101 = zPtr[5].x + zPtr[5].y; y101 = zPtr[5].x - zPtr[5].y; x102 = zPtr[6].x + zPtr[6].y; y102 = zPtr[6].x - zPtr[6].y; x103 = zPtr[7].x + zPtr[7].y; y103 = zPtr[7].x - zPtr[7].y; zPtr[4].y *= -2 * zPtr[4].x; zPtr[4].x = x100 * y100; zPtr[5].y *= 2 * zPtr[5].x; zPtr[5].x = x101 * y101; zPtr[6].y *= 2 * zPtr[6].x; zPtr[6].x = x102 * y102; zPtr[7].y *= 2 * zPtr[7].x; zPtr[7].x = x103 * y103; zPtr += 8; } zPtr = &z[0]; for (i = n; i != 0; i -= 8) { x000 = zPtr[0].x + zPtr[1].x; x001 = zPtr[0].x - zPtr[1].x; y000 = zPtr[0].y - zPtr[1].y; y001 = zPtr[0].y + zPtr[1].y; x002 = zPtr[2].x + zPtr[3].x; x003 = zPtr[2].x - zPtr[3].x; y002 = zPtr[2].y + zPtr[3].y; y003 = zPtr[2].y - zPtr[3].y; zPtr[0].x = x000 + x002; zPtr[2].x = x000 - x002; zPtr[1].y = y001 - x003; zPtr[3].y = y001 + x003; zPtr[0].y = y000 - y002; zPtr[2].y = y000 + y002; zPtr[1].x = x001 - y003; zPtr[3].x = x001 + y003; x100 = zPtr[4].x + zPtr[5].x; x101 = zPtr[4].x - zPtr[5].x; y100 = zPtr[4].y - zPtr[5].y; y101 = zPtr[4].y + zPtr[5].y; x102 = zPtr[6].x + zPtr[7].x; x103 = zPtr[6].x - zPtr[7].x; y102 = zPtr[6].y + zPtr[7].y; y103 = zPtr[6].y - zPtr[7].y; zPtr[4].x = x100 + x102; zPtr[6].x = x100 - x102; zPtr[5].y = y101 - x103; zPtr[7].y = y101 + x103; zPtr[4].y = y100 - y102; zPtr[6].y = y100 + y102; zPtr[5].x = x101 - y103; zPtr[7].x = x101 + y103; zPtr += 8; } if (m == 2) { l /= 2; zPtr = &z[0]; x010 = e0[0].x; for (i = n; i != 0; i -= 8) { x000 = zPtr[0].x - zPtr[4].x; y000 = zPtr[0].y - zPtr[4].y; zPtr[0].x += zPtr[4].x; zPtr[0].y += zPtr[4].y; zPtr[4].x = x000; zPtr[4].y = y000; x001 = x010 * (zPtr[5].x + zPtr[5].y); y001 = x010 * (zPtr[5].y - zPtr[5].x); zPtr[5].x = zPtr[1].x - x001; zPtr[5].y = zPtr[1].y - y001; zPtr[1].x += x001; zPtr[1].y += y001; x002 = zPtr[2].x - zPtr[6].y; y002 = zPtr[2].y + zPtr[6].x; zPtr[2].x += zPtr[6].y; zPtr[2].y -= zPtr[6].x; zPtr[6].x = x002; zPtr[6].y = y002; x003 = x010 * (zPtr[7].x - zPtr[7].y); y003 = x010 * (zPtr[7].y + zPtr[7].x); zPtr[7].x = zPtr[3].x + x003; zPtr[7].y = zPtr[3].y + y003; zPtr[3].x -= x003; zPtr[3].y -= y003; zPtr += 8; } } m *= 4, l /= 4; for (; m != n; m *= 4, l /= 4) { zPtr = &z[0]; z2Ptr = &z[2*m]; for (i = l; i != 0; i -= 3) { x000 = zPtr[0].x + zPtr[m].x; x002 = z2Ptr[0].x + z2Ptr[m].x; y000 = zPtr[0].y + zPtr[m].y; y002 = z2Ptr[0].y + z2Ptr[m].y; x001 = zPtr[0].x - zPtr[m].x; y003 = z2Ptr[0].y - z2Ptr[m].y; y001 = zPtr[0].y - zPtr[m].y; x003 = z2Ptr[0].x - z2Ptr[m].x; zPtr[0].x = x000 + x002; zPtr[0].y = y000 + y002; z2Ptr[0].x = x000 - x002; z2Ptr[0].y = y000 - y002; zPtr[m].x = x001 + y003; zPtr[m].y = y001 - x003; z2Ptr[m].x = x001 - y003; z2Ptr[m].y = y001 + x003; ePtr = &e0[l]; x101 = ePtr[1].x * zPtr[m + 1].x + ePtr[1].y * zPtr[m + 1].y; y101 = ePtr[1].x * zPtr[m + 1].y - ePtr[1].y * zPtr[m + 1].x; x102 = ePtr[0].x * z2Ptr[1].x + ePtr[0].y * z2Ptr[1].y; y102 = ePtr[0].x * z2Ptr[1].y - ePtr[0].y * z2Ptr[1].x; x103 = ePtr[2].x * z2Ptr[m + 1].x + ePtr[2].y * z2Ptr[m + 1].y; y103 = ePtr[2].x * z2Ptr[m + 1].y - ePtr[2].y * z2Ptr[m + 1].x; x100 = zPtr[1].x + x101; x101 = zPtr[1].x - x101; y100 = zPtr[1].y + y101; y101 = zPtr[1].y - y101; x110 = x102 + x103; x102 -= x103; y110 = y102 + y103; y102 -= y103; zPtr[1].x = x100 + x110; z2Ptr[1].x = x100 - x110; zPtr[1].y = y100 + y110; z2Ptr[1].y = y100 - y110; zPtr[m + 1].x = x101 + y102; z2Ptr[m + 1].x = x101 - y102; zPtr[m + 1].y = y101 - x102; z2Ptr[m + 1].y = y101 + x102; ePtr += l; zPtr += 2; z2Ptr += 2; for (j = m - 2; j != 0; j -= 2) { x001 = ePtr[1].x * zPtr[m].x + ePtr[1].y * zPtr[m].y; y001 = ePtr[1].x * zPtr[m].y - ePtr[1].y * zPtr[m].x; x002 = ePtr[0].x * z2Ptr[0].x + ePtr[0].y * z2Ptr[0].y; y002 = ePtr[0].x * z2Ptr[0].y - ePtr[0].y * z2Ptr[0].x; x003 = ePtr[2].x * z2Ptr[m].x + ePtr[2].y * z2Ptr[m].y; y003 = ePtr[2].x * z2Ptr[m].y - ePtr[2].y * z2Ptr[m].x; x000 = zPtr[0].x + x001; x001 = zPtr[0].x - x001; y000 = zPtr[0].y + y001; y001 = zPtr[0].y - y001; x010 = x002 + x003; x002 -= x003; y010 = y002 + y003; y002 -= y003; zPtr[0].x = x000 + x010; z2Ptr[0].x = x000 - x010; zPtr[0].y = y000 + y010; z2Ptr[0].y = y000 - y010; zPtr[m].x = x001 + y002; z2Ptr[m].x = x001 - y002; zPtr[m].y = y001 - x002; z2Ptr[m].y = y001 + x002; x101 = ePtr[l + 1].x * zPtr[m + 1].x + ePtr[l + 1].y * zPtr[m + 1].y; y101 = ePtr[l + 1].x * zPtr[m + 1].y - ePtr[l + 1].y * zPtr[m + 1].x; x102 = ePtr[l + 0].x * z2Ptr[1].x + ePtr[l + 0].y * z2Ptr[1].y; y102 = ePtr[l + 0].x * z2Ptr[1].y - ePtr[l + 0].y * z2Ptr[1].x; x103 = ePtr[l + 2].x * z2Ptr[m + 1].x + ePtr[l + 2].y * z2Ptr[m + 1].y; y103 = ePtr[l + 2].x * z2Ptr[m + 1].y - ePtr[l + 2].y * z2Ptr[m + 1].x; x100 = zPtr[1].x + x101; x101 = zPtr[1].x - x101; y100 = zPtr[1].y + y101; y101 = zPtr[1].y - y101; x110 = x102 + x103; x102 -= x103; y110 = y102 + y103; y102 -= y103; zPtr[1].x = x100 + x110; z2Ptr[1].x = x100 - x110; zPtr[1].y = y100 + y110; z2Ptr[1].y = y100 - y110; zPtr[m + 1].x = x101 + y102; z2Ptr[m + 1].x = x101 - y102; zPtr[m + 1].y = y101 - x102; z2Ptr[m + 1].y = y101 + x102; ePtr += 2*l; zPtr += 2; z2Ptr += 2; } zPtr += 3*m; z2Ptr += 3*m; } } if (nsc != 0) mulExp(z, n, nsc); } static void FFTSinit(CONST unsigned int n) { unsigned int i; CONST double twoPi_n = 2 * M_PI / n; double theta; e2[0].x = 0.5 * sqrt(2.0); for (i = 1; i != n / 4; ++i) { theta = twoPi_n * i; e2[3*i + 0].x = cos(1*theta); e2[3*i + 0].y = sin(1*theta); e2[3*i + 1].x = cos(2*theta); e2[3*i + 1].y = sin(2*theta); e2[3*i + 2].x = cos(3*theta); e2[3*i + 2].y = sin(3*theta); } } static void FFTStime(Complex * CONST z, CONST unsigned int n, CONST unsigned int s) { unsigned int i, j, m, l; Complex * zPtr, * z2Ptr; CONST Complex * ePtr; double x000, y000, x001, y001, x002, y002, x003, y003, x010, y010; double x100, y100, x101, y101, x102, y102, x103, y103, x110, y110; double x200, y200, x201, y201, x202, y202, x203, y203, x210, y210; double x300, y300, x301, y301, x302, y302, x303, y303, x310, y310; m = s, l = 3 * n / 4; zPtr = &z[0]; z2Ptr = &z[2*s]; for (i = n; i != 0; i -= 4) { x000 = zPtr[0].x + zPtr[s].x; x002 = z2Ptr[0].x + z2Ptr[s].x; y000 = zPtr[0].y + zPtr[s].y; y002 = z2Ptr[0].y + z2Ptr[s].y; x001 = zPtr[0].x - zPtr[s].x; y003 = z2Ptr[0].y - z2Ptr[s].y; y001 = zPtr[0].y - zPtr[s].y; x003 = z2Ptr[0].x - z2Ptr[s].x; zPtr[0].x = x000 + x002; zPtr[0].y = y000 + y002; z2Ptr[0].x = x000 - x002; z2Ptr[0].y = y000 - y002; zPtr[s].x = x001 + y003; zPtr[s].y = y001 - x003; z2Ptr[s].x = x001 - y003; z2Ptr[s].y = y001 + x003; x100 = zPtr[1].x + zPtr[s + 1].x; x102 = z2Ptr[1].x + z2Ptr[s + 1].x; y100 = zPtr[1].y + zPtr[s + 1].y; y102 = z2Ptr[1].y + z2Ptr[s + 1].y; x101 = zPtr[1].x - zPtr[s + 1].x; y103 = z2Ptr[1].y - z2Ptr[s + 1].y; y101 = zPtr[1].y - zPtr[s + 1].y; x103 = z2Ptr[1].x - z2Ptr[s + 1].x; zPtr[1].x = x100 + x102; zPtr[1].y = y100 + y102; z2Ptr[1].x = x100 - x102; z2Ptr[1].y = y100 - y102; zPtr[s + 1].x = x101 + y103; zPtr[s + 1].y = y101 - x103; z2Ptr[s + 1].x = x101 - y103; z2Ptr[s + 1].y = y101 + x103; x200 = zPtr[2].x + zPtr[s + 2].x; x202 = z2Ptr[2].x + z2Ptr[s + 2].x; y200 = zPtr[2].y + zPtr[s + 2].y; y202 = z2Ptr[2].y + z2Ptr[s + 2].y; x201 = zPtr[2].x - zPtr[s + 2].x; y203 = z2Ptr[2].y - z2Ptr[s + 2].y; y201 = zPtr[2].y - zPtr[s + 2].y; x203 = z2Ptr[2].x - z2Ptr[s + 2].x; zPtr[2].x = x200 + x202; zPtr[2].y = y200 + y202; z2Ptr[2].x = x200 - x202; z2Ptr[2].y = y200 - y202; zPtr[s + 2].x = x201 + y203; zPtr[s + 2].y = y201 - x203; z2Ptr[s + 2].x = x201 - y203; z2Ptr[s + 2].y = y201 + x203; x300 = zPtr[3].x + zPtr[s + 3].x; x302 = z2Ptr[3].x + z2Ptr[s + 3].x; y300 = zPtr[3].y + zPtr[s + 3].y; y302 = z2Ptr[3].y + z2Ptr[s + 3].y; x301 = zPtr[3].x - zPtr[s + 3].x; y303 = z2Ptr[3].y - z2Ptr[s + 3].y; y301 = zPtr[3].y - zPtr[s + 3].y; x303 = z2Ptr[3].x - z2Ptr[s + 3].x; zPtr[3].x = x300 + x302; zPtr[3].y = y300 + y302; z2Ptr[3].x = x300 - x302; z2Ptr[3].y = y300 - y302; zPtr[s + 3].x = x301 + y303; zPtr[s + 3].y = y301 - x303; z2Ptr[s + 3].x = x301 - y303; z2Ptr[s + 3].y = y301 + x303; zPtr += 4*s; z2Ptr += 4*s; } if ((ilog(n) & 1) != 0) { m *= 2, l /= 2; zPtr = &z[0]; z2Ptr = &z[4*s]; x010 = e2[0].x; for (i = n; i != 0; i -= 8) { x000 = zPtr[0].x - z2Ptr[0].x; y000 = zPtr[0].y - z2Ptr[0].y; zPtr[0].x += z2Ptr[0].x; zPtr[0].y += z2Ptr[0].y; z2Ptr[0].x = x000; z2Ptr[0].y = y000; x001 = x010 * (z2Ptr[s].x + z2Ptr[s].y); y001 = x010 * (z2Ptr[s].y - z2Ptr[s].x); z2Ptr[s].x = zPtr[s].x - x001; z2Ptr[s].y = zPtr[s].y - y001; zPtr[s].x += x001; zPtr[s].y += y001; x002 = zPtr[2*s].x - z2Ptr[2*s].y; y002 = zPtr[2*s].y + z2Ptr[2*s].x; zPtr[2*s].x += z2Ptr[2*s].y; zPtr[2*s].y -= z2Ptr[2*s].x; z2Ptr[2*s].x = x002; z2Ptr[2*s].y = y002; x003 = x010 * (z2Ptr[3*s].x - z2Ptr[3*s].y); y003 = x010 * (z2Ptr[3*s].y + z2Ptr[3*s].x); z2Ptr[3*s].x = zPtr[3*s].x + x003; z2Ptr[3*s].y = zPtr[3*s].y + y003; zPtr[3*s].x -= x003; zPtr[3*s].y -= y003; x100 = zPtr[1].x - z2Ptr[1].x; y100 = zPtr[1].y - z2Ptr[1].y; zPtr[1].x += z2Ptr[1].x; zPtr[1].y += z2Ptr[1].y; z2Ptr[1].x = x100; z2Ptr[1].y = y100; x101 = x010 * (z2Ptr[s + 1].x + z2Ptr[s + 1].y); y101 = x010 * (z2Ptr[s + 1].y - z2Ptr[s + 1].x); z2Ptr[s + 1].x = zPtr[s + 1].x - x101; z2Ptr[s + 1].y = zPtr[s + 1].y - y101; zPtr[s + 1].x += x101; zPtr[s + 1].y += y101; x102 = zPtr[2*s + 1].x - z2Ptr[2*s + 1].y; y102 = zPtr[2*s + 1].y + z2Ptr[2*s + 1].x; zPtr[2*s + 1].x += z2Ptr[2*s + 1].y; zPtr[2*s + 1].y -= z2Ptr[2*s + 1].x; z2Ptr[2*s + 1].x = x102; z2Ptr[2*s + 1].y = y102; x103 = x010 * (z2Ptr[3*s + 1].x - z2Ptr[3*s + 1].y); y103 = x010 * (z2Ptr[3*s + 1].y + z2Ptr[3*s + 1].x); z2Ptr[3*s + 1].x = zPtr[3*s + 1].x + x103; z2Ptr[3*s + 1].y = zPtr[3*s + 1].y + y103; zPtr[3*s + 1].x -= x103; zPtr[3*s + 1].y -= y103; x200 = zPtr[2].x - z2Ptr[2].x; y200 = zPtr[2].y - z2Ptr[2].y; zPtr[2].x += z2Ptr[2].x; zPtr[2].y += z2Ptr[2].y; z2Ptr[2].x = x200; z2Ptr[2].y = y200; x201 = x010 * (z2Ptr[s + 2].x + z2Ptr[s + 2].y); y201 = x010 * (z2Ptr[s + 2].y - z2Ptr[s + 2].x); z2Ptr[s + 2].x = zPtr[s + 2].x - x201; z2Ptr[s + 2].y = zPtr[s + 2].y - y201; zPtr[s + 2].x += x201; zPtr[s + 2].y += y201; x202 = zPtr[2*s + 2].x - z2Ptr[2*s + 2].y; y202 = zPtr[2*s + 2].y + z2Ptr[2*s + 2].x; zPtr[2*s + 2].x += z2Ptr[2*s + 2].y; zPtr[2*s + 2].y -= z2Ptr[2*s + 2].x; z2Ptr[2*s + 2].x = x202; z2Ptr[2*s + 2].y = y202; x203 = x010 * (z2Ptr[3*s + 2].x - z2Ptr[3*s + 2].y); y203 = x010 * (z2Ptr[3*s + 2].y + z2Ptr[3*s + 2].x); z2Ptr[3*s + 2].x = zPtr[3*s + 2].x + x203; z2Ptr[3*s + 2].y = zPtr[3*s + 2].y + y203; zPtr[3*s + 2].x -= x203; zPtr[3*s + 2].y -= y203; x300 = zPtr[3].x - z2Ptr[3].x; y300 = zPtr[3].y - z2Ptr[3].y; zPtr[3].x += z2Ptr[3].x; zPtr[3].y += z2Ptr[3].y; z2Ptr[3].x = x300; z2Ptr[3].y = y300; x301 = x010 * (z2Ptr[s + 3].x + z2Ptr[s + 3].y); y301 = x010 * (z2Ptr[s + 3].y - z2Ptr[s + 3].x); z2Ptr[s + 3].x = zPtr[s + 3].x - x301; z2Ptr[s + 3].y = zPtr[s + 3].y - y301; zPtr[s + 3].x += x301; zPtr[s + 3].y += y301; x302 = zPtr[2*s + 3].x - z2Ptr[2*s + 3].y; y302 = zPtr[2*s + 3].y + z2Ptr[2*s + 3].x; zPtr[2*s + 3].x += z2Ptr[2*s + 3].y; zPtr[2*s + 3].y -= z2Ptr[2*s + 3].x; z2Ptr[2*s + 3].x = x302; z2Ptr[2*s + 3].y = y302; x303 = x010 * (z2Ptr[3*s + 3].x - z2Ptr[3*s + 3].y); y303 = x010 * (z2Ptr[3*s + 3].y + z2Ptr[3*s + 3].x); z2Ptr[3*s + 3].x = zPtr[3*s + 3].x + x303; z2Ptr[3*s + 3].y = zPtr[3*s + 3].y + y303; zPtr[3*s + 3].x -= x303; zPtr[3*s + 3].y -= y303; zPtr += 8*s; z2Ptr += 8*s; } } m *= 4, l /= 4; for (; l != 0; m *= 4, l /= 4) { zPtr = &z[0]; z2Ptr = &z[2*m]; for (i = l; i != 0; i -= 3) { x000 = zPtr[0].x + zPtr[m].x; x002 = z2Ptr[0].x + z2Ptr[m].x; y000 = zPtr[0].y + zPtr[m].y; y002 = z2Ptr[0].y + z2Ptr[m].y; x001 = zPtr[0].x - zPtr[m].x; y003 = z2Ptr[0].y - z2Ptr[m].y; y001 = zPtr[0].y - zPtr[m].y; x003 = z2Ptr[0].x - z2Ptr[m].x; zPtr[0].x = x000 + x002; zPtr[0].y = y000 + y002; z2Ptr[0].x = x000 - x002; z2Ptr[0].y = y000 - y002; zPtr[m].x = x001 + y003; zPtr[m].y = y001 - x003; z2Ptr[m].x = x001 - y003; z2Ptr[m].y = y001 + x003; x100 = zPtr[1].x + zPtr[m + 1].x; x102 = z2Ptr[1].x + z2Ptr[m + 1].x; y100 = zPtr[1].y + zPtr[m + 1].y; y102 = z2Ptr[1].y + z2Ptr[m + 1].y; x101 = zPtr[1].x - zPtr[m + 1].x; y103 = z2Ptr[1].y - z2Ptr[m + 1].y; y101 = zPtr[1].y - zPtr[m + 1].y; x103 = z2Ptr[1].x - z2Ptr[m + 1].x; zPtr[1].x = x100 + x102; zPtr[1].y = y100 + y102; z2Ptr[1].x = x100 - x102; z2Ptr[1].y = y100 - y102; zPtr[m + 1].x = x101 + y103; zPtr[m + 1].y = y101 - x103; z2Ptr[m + 1].x = x101 - y103; z2Ptr[m + 1].y = y101 + x103; x200 = zPtr[2].x + zPtr[m + 2].x; x202 = z2Ptr[2].x + z2Ptr[m + 2].x; y200 = zPtr[2].y + zPtr[m + 2].y; y202 = z2Ptr[2].y + z2Ptr[m + 2].y; x201 = zPtr[2].x - zPtr[m + 2].x; y203 = z2Ptr[2].y - z2Ptr[m + 2].y; y201 = zPtr[2].y - zPtr[m + 2].y; x203 = z2Ptr[2].x - z2Ptr[m + 2].x; zPtr[2].x = x200 + x202; zPtr[2].y = y200 + y202; z2Ptr[2].x = x200 - x202; z2Ptr[2].y = y200 - y202; zPtr[m + 2].x = x201 + y203; zPtr[m + 2].y = y201 - x203; z2Ptr[m + 2].x = x201 - y203; z2Ptr[m + 2].y = y201 + x203; x300 = zPtr[3].x + zPtr[m + 3].x; x302 = z2Ptr[3].x + z2Ptr[m + 3].x; y300 = zPtr[3].y + zPtr[m + 3].y; y302 = z2Ptr[3].y + z2Ptr[m + 3].y; x301 = zPtr[3].x - zPtr[m + 3].x; y303 = z2Ptr[3].y - z2Ptr[m + 3].y; y301 = zPtr[3].y - zPtr[m + 3].y; x303 = z2Ptr[3].x - z2Ptr[m + 3].x; zPtr[3].x = x300 + x302; zPtr[3].y = y300 + y302; z2Ptr[3].x = x300 - x302; z2Ptr[3].y = y300 - y302; zPtr[m + 3].x = x301 + y303; zPtr[m + 3].y = y301 - x303; z2Ptr[m + 3].x = x301 - y303; z2Ptr[m + 3].y = y301 + x303; ePtr = &e2[l]; zPtr += s; z2Ptr += s; for (j = m - s; j != 0; j -= s) { x001 = ePtr[1].x * zPtr[m].x + ePtr[1].y * zPtr[m].y; y001 = ePtr[1].x * zPtr[m].y - ePtr[1].y * zPtr[m].x; x002 = ePtr[0].x * z2Ptr[0].x + ePtr[0].y * z2Ptr[0].y; y002 = ePtr[0].x * z2Ptr[0].y - ePtr[0].y * z2Ptr[0].x; x003 = ePtr[2].x * z2Ptr[m].x + ePtr[2].y * z2Ptr[m].y; y003 = ePtr[2].x * z2Ptr[m].y - ePtr[2].y * z2Ptr[m].x; x000 = zPtr[0].x + x001; x001 = zPtr[0].x - x001; y000 = zPtr[0].y + y001; y001 = zPtr[0].y - y001; x010 = x002 + x003; x002 -= x003; y010 = y002 + y003; y002 -= y003; zPtr[0].x = x000 + x010; z2Ptr[0].x = x000 - x010; zPtr[0].y = y000 + y010; z2Ptr[0].y = y000 - y010; zPtr[m].x = x001 + y002; z2Ptr[m].x = x001 - y002; zPtr[m].y = y001 - x002; z2Ptr[m].y = y001 + x002; x101 = ePtr[1].x * zPtr[m + 1].x + ePtr[1].y * zPtr[m + 1].y; y101 = ePtr[1].x * zPtr[m + 1].y - ePtr[1].y * zPtr[m + 1].x; x102 = ePtr[0].x * z2Ptr[1].x + ePtr[0].y * z2Ptr[1].y; y102 = ePtr[0].x * z2Ptr[1].y - ePtr[0].y * z2Ptr[1].x; x103 = ePtr[2].x * z2Ptr[m + 1].x + ePtr[2].y * z2Ptr[m + 1].y; y103 = ePtr[2].x * z2Ptr[m + 1].y - ePtr[2].y * z2Ptr[m + 1].x; x100 = zPtr[1].x + x101; x101 = zPtr[1].x - x101; y100 = zPtr[1].y + y101; y101 = zPtr[1].y - y101; x110 = x102 + x103; x102 -= x103; y110 = y102 + y103; y102 -= y103; zPtr[1].x = x100 + x110; z2Ptr[1].x = x100 - x110; zPtr[1].y = y100 + y110; z2Ptr[1].y = y100 - y110; zPtr[m + 1].x = x101 + y102; z2Ptr[m + 1].x = x101 - y102; zPtr[m + 1].y = y101 - x102; z2Ptr[m + 1].y = y101 + x102; x201 = ePtr[1].x * zPtr[m + 2].x + ePtr[1].y * zPtr[m + 2].y; y201 = ePtr[1].x * zPtr[m + 2].y - ePtr[1].y * zPtr[m + 2].x; x202 = ePtr[0].x * z2Ptr[2].x + ePtr[0].y * z2Ptr[2].y; y202 = ePtr[0].x * z2Ptr[2].y - ePtr[0].y * z2Ptr[2].x; x203 = ePtr[2].x * z2Ptr[m + 2].x + ePtr[2].y * z2Ptr[m + 2].y; y203 = ePtr[2].x * z2Ptr[m + 2].y - ePtr[2].y * z2Ptr[m + 2].x; x200 = zPtr[2].x + x201; x201 = zPtr[2].x - x201; y200 = zPtr[2].y + y201; y201 = zPtr[2].y - y201; x210 = x202 + x203; x202 -= x203; y210 = y202 + y203; y202 -= y203; zPtr[2].x = x200 + x210; z2Ptr[2].x = x200 - x210; zPtr[2].y = y200 + y210; z2Ptr[2].y = y200 - y210; zPtr[m + 2].x = x201 + y202; z2Ptr[m + 2].x = x201 - y202; zPtr[m + 2].y = y201 - x202; z2Ptr[m + 2].y = y201 + x202; x301 = ePtr[1].x * zPtr[m + 3].x + ePtr[1].y * zPtr[m + 3].y; y301 = ePtr[1].x * zPtr[m + 3].y - ePtr[1].y * zPtr[m + 3].x; x302 = ePtr[0].x * z2Ptr[3].x + ePtr[0].y * z2Ptr[3].y; y302 = ePtr[0].x * z2Ptr[3].y - ePtr[0].y * z2Ptr[3].x; x303 = ePtr[2].x * z2Ptr[m + 3].x + ePtr[2].y * z2Ptr[m + 3].y; y303 = ePtr[2].x * z2Ptr[m + 3].y - ePtr[2].y * z2Ptr[m + 3].x; x300 = zPtr[3].x + x301; x301 = zPtr[3].x - x301; y300 = zPtr[3].y + y301; y301 = zPtr[3].y - y301; x310 = x302 + x303; x302 -= x303; y310 = y302 + y303; y302 -= y303; zPtr[3].x = x300 + x310; z2Ptr[3].x = x300 - x310; zPtr[3].y = y300 + y310; z2Ptr[3].y = y300 - y310; zPtr[m + 3].x = x301 + y302; z2Ptr[m + 3].x = x301 - y302; zPtr[m + 3].y = y301 - x302; z2Ptr[m + 3].y = y301 + x302; ePtr += l; zPtr += s; z2Ptr += s; } zPtr += 3*m; z2Ptr += 3*m; } } } static void FFTSfreq(Complex * CONST z, CONST unsigned int n, CONST unsigned int s) { unsigned int i, j, m, l; Complex * zPtr, * z2Ptr; CONST Complex * ePtr; double x000, y000, x001, y001, x002, y002, x003, y003, x010, y010; double x100, y100, x101, y101, x102, y102, x103, y103, x110, y110; double x200, y200, x201, y201, x202, y202, x203, y203, x210, y210; double x300, y300, x301, y301, x302, y302, x303, y303, x310, y310; m = s * n / 4, l = 3; for (; m > 2*s; m /= 4, l *= 4) { zPtr = &z[0]; z2Ptr = &z[2*m]; for (i = l; i != 0; i -= 3) { x000 = zPtr[0].x + z2Ptr[0].x; x002 = zPtr[m].x + z2Ptr[m].x; y000 = zPtr[0].y + z2Ptr[0].y; y002 = zPtr[m].y + z2Ptr[m].y; x001 = zPtr[0].x - z2Ptr[0].x; y003 = zPtr[m].y - z2Ptr[m].y; y001 = zPtr[0].y - z2Ptr[0].y; x003 = zPtr[m].x - z2Ptr[m].x; zPtr[0].x = x000 + x002; zPtr[0].y = y000 + y002; zPtr[m].x = x000 - x002; zPtr[m].y = y000 - y002; z2Ptr[0].x = x001 + y003; z2Ptr[0].y = y001 - x003; z2Ptr[m].x = x001 - y003; z2Ptr[m].y = y001 + x003; x100 = zPtr[1].x + z2Ptr[1].x; x102 = zPtr[m + 1].x + z2Ptr[m + 1].x; y100 = zPtr[1].y + z2Ptr[1].y; y102 = zPtr[m + 1].y + z2Ptr[m + 1].y; x101 = zPtr[1].x - z2Ptr[1].x; y103 = zPtr[m + 1].y - z2Ptr[m + 1].y; y101 = zPtr[1].y - z2Ptr[1].y; x103 = zPtr[m + 1].x - z2Ptr[m + 1].x; zPtr[1].x = x100 + x102; zPtr[1].y = y100 + y102; zPtr[m + 1].x = x100 - x102; zPtr[m + 1].y = y100 - y102; z2Ptr[1].x = x101 + y103; z2Ptr[1].y = y101 - x103; z2Ptr[m + 1].x = x101 - y103; z2Ptr[m + 1].y = y101 + x103; x200 = zPtr[2].x + z2Ptr[2].x; x202 = zPtr[m + 2].x + z2Ptr[m + 2].x; y200 = zPtr[2].y + z2Ptr[2].y; y202 = zPtr[m + 2].y + z2Ptr[m + 2].y; x201 = zPtr[2].x - z2Ptr[2].x; y203 = zPtr[m + 2].y - z2Ptr[m + 2].y; y201 = zPtr[2].y - z2Ptr[2].y; x203 = zPtr[m + 2].x - z2Ptr[m + 2].x; zPtr[2].x = x200 + x202; zPtr[2].y = y200 + y202; zPtr[m + 2].x = x200 - x202; zPtr[m + 2].y = y200 - y202; z2Ptr[2].x = x201 + y203; z2Ptr[2].y = y201 - x203; z2Ptr[m + 2].x = x201 - y203; z2Ptr[m + 2].y = y201 + x203; x300 = zPtr[3].x + z2Ptr[3].x; x302 = zPtr[m + 3].x + z2Ptr[m + 3].x; y300 = zPtr[3].y + z2Ptr[3].y; y302 = zPtr[m + 3].y + z2Ptr[m + 3].y; x301 = zPtr[3].x - z2Ptr[3].x; y303 = zPtr[m + 3].y - z2Ptr[m + 3].y; y301 = zPtr[3].y - z2Ptr[3].y; x303 = zPtr[m + 3].x - z2Ptr[m + 3].x; zPtr[3].x = x300 + x302; zPtr[3].y = y300 + y302; zPtr[m + 3].x = x300 - x302; zPtr[m + 3].y = y300 - y302; z2Ptr[3].x = x301 + y303; z2Ptr[3].y = y301 - x303; z2Ptr[m + 3].x = x301 - y303; z2Ptr[m + 3].y = y301 + x303; ePtr = &e2[l]; zPtr += s; z2Ptr += s; for (j = m - s; j != 0; j -= s) { x000 = zPtr[0].x + z2Ptr[0].x; x002 = zPtr[m].x + z2Ptr[m].x; y000 = zPtr[0].y + z2Ptr[0].y; y002 = zPtr[m].y + z2Ptr[m].y; x001 = zPtr[0].x - z2Ptr[0].x; y003 = zPtr[m].y - z2Ptr[m].y; y001 = zPtr[0].y - z2Ptr[0].y; x003 = zPtr[m].x - z2Ptr[m].x; zPtr[0].x = x000 + x002; zPtr[0].y = y000 + y002; x000 -= x002; y000 -= y002; x010 = x001 + y003; y010 = y001 - x003; x001 -= y003; y001 += x003; zPtr[m].x = ePtr[1].x * x000 + ePtr[1].y * y000; zPtr[m].y = ePtr[1].x * y000 - ePtr[1].y * x000; z2Ptr[0].x = ePtr[0].x * x010 + ePtr[0].y * y010; z2Ptr[0].y = ePtr[0].x * y010 - ePtr[0].y * x010; z2Ptr[m].x = ePtr[2].x * x001 + ePtr[2].y * y001; z2Ptr[m].y = ePtr[2].x * y001 - ePtr[2].y * x001; x100 = zPtr[1].x + z2Ptr[1].x; x102 = zPtr[m + 1].x + z2Ptr[m + 1].x; y100 = zPtr[1].y + z2Ptr[1].y; y102 = zPtr[m + 1].y + z2Ptr[m + 1].y; x101 = zPtr[1].x - z2Ptr[1].x; y103 = zPtr[m + 1].y - z2Ptr[m + 1].y; y101 = zPtr[1].y - z2Ptr[1].y; x103 = zPtr[m + 1].x - z2Ptr[m + 1].x; zPtr[1].x = x100 + x102; zPtr[1].y = y100 + y102; x100 -= x102; y100 -= y102; x110 = x101 + y103; y110 = y101 - x103; x101 -= y103; y101 += x103; zPtr[m + 1].x = ePtr[1].x * x100 + ePtr[1].y * y100; zPtr[m + 1].y = ePtr[1].x * y100 - ePtr[1].y * x100; z2Ptr[1].x = ePtr[0].x * x110 + ePtr[0].y * y110; z2Ptr[1].y = ePtr[0].x * y110 - ePtr[0].y * x110; z2Ptr[m + 1].x = ePtr[2].x * x101 + ePtr[2].y * y101; z2Ptr[m + 1].y = ePtr[2].x * y101 - ePtr[2].y * x101; x200 = zPtr[2].x + z2Ptr[2].x; x202 = zPtr[m + 2].x + z2Ptr[m + 2].x; y200 = zPtr[2].y + z2Ptr[2].y; y202 = zPtr[m + 2].y + z2Ptr[m + 2].y; x201 = zPtr[2].x - z2Ptr[2].x; y203 = zPtr[m + 2].y - z2Ptr[m + 2].y; y201 = zPtr[2].y - z2Ptr[2].y; x203 = zPtr[m + 2].x - z2Ptr[m + 2].x; zPtr[2].x = x200 + x202; zPtr[2].y = y200 + y202; x200 -= x202; y200 -= y202; x210 = x201 + y203; y210 = y201 - x203; x201 -= y203; y201 += x203; zPtr[m + 2].x = ePtr[1].x * x200 + ePtr[1].y * y200; zPtr[m + 2].y = ePtr[1].x * y200 - ePtr[1].y * x200; z2Ptr[2].x = ePtr[0].x * x210 + ePtr[0].y * y210; z2Ptr[2].y = ePtr[0].x * y210 - ePtr[0].y * x210; z2Ptr[m + 2].x = ePtr[2].x * x201 + ePtr[2].y * y201; z2Ptr[m + 2].y = ePtr[2].x * y201 - ePtr[2].y * x201; x300 = zPtr[3].x + z2Ptr[3].x; x302 = zPtr[m + 3].x + z2Ptr[m + 3].x; y300 = zPtr[3].y + z2Ptr[3].y; y302 = zPtr[m + 3].y + z2Ptr[m + 3].y; x301 = zPtr[3].x - z2Ptr[3].x; y303 = zPtr[m + 3].y - z2Ptr[m + 3].y; y301 = zPtr[3].y - z2Ptr[3].y; x303 = zPtr[m + 3].x - z2Ptr[m + 3].x; zPtr[3].x = x300 + x302; zPtr[3].y = y300 + y302; x300 -= x302; y300 -= y302; x310 = x301 + y303; y310 = y301 - x303; x301 -= y303; y301 += x303; zPtr[m + 3].x = ePtr[1].x * x300 + ePtr[1].y * y300; zPtr[m + 3].y = ePtr[1].x * y300 - ePtr[1].y * x300; z2Ptr[3].x = ePtr[0].x * x310 + ePtr[0].y * y310; z2Ptr[3].y = ePtr[0].x * y310 - ePtr[0].y * x310; z2Ptr[m + 3].x = ePtr[2].x * x301 + ePtr[2].y * y301; z2Ptr[m + 3].y = ePtr[2].x * y301 - ePtr[2].y * x301; ePtr += l; zPtr += s; z2Ptr += s; } zPtr += 3*m; z2Ptr += 3*m; } } if (m == 2*s) { zPtr = &z[0]; z2Ptr = &z[4*s]; x010 = e0[0].x; for (i = n; i != 0; i -= 8) { x000 = zPtr[0].x - z2Ptr[0].x; y000 = zPtr[0].y - z2Ptr[0].y; zPtr[0].x += z2Ptr[0].x; zPtr[0].y += z2Ptr[0].y; z2Ptr[0].x = x000; z2Ptr[0].y = y000; x001 = x010 * (zPtr[s].x - z2Ptr[s].x); y001 = x010 * (zPtr[s].y - z2Ptr[s].y); zPtr[s].x += z2Ptr[s].x; zPtr[s].y += z2Ptr[s].y; z2Ptr[s].x = x001 + y001; z2Ptr[s].y = y001 - x001; x002 = z2Ptr[2*s].x - zPtr[2*s].x; y002 = zPtr[2*s].y - z2Ptr[2*s].y; zPtr[2*s].x += z2Ptr[2*s].x; zPtr[2*s].y += z2Ptr[2*s].y; z2Ptr[2*s].x = y002; z2Ptr[2*s].y = x002; x003 = x010 * (z2Ptr[3*s].x - zPtr[3*s].x); y003 = x010 * (z2Ptr[3*s].y - zPtr[3*s].y); zPtr[3*s].x += z2Ptr[3*s].x; zPtr[3*s].y += z2Ptr[3*s].y; z2Ptr[3*s].x = x003 - y003; z2Ptr[3*s].y = y003 + x003; x100 = zPtr[1].x - z2Ptr[1].x; y100 = zPtr[1].y - z2Ptr[1].y; zPtr[1].x += z2Ptr[1].x; zPtr[1].y += z2Ptr[1].y; z2Ptr[1].x = x100; z2Ptr[1].y = y100; x101 = x010 * (zPtr[s + 1].x - z2Ptr[s + 1].x); y101 = x010 * (zPtr[s + 1].y - z2Ptr[s + 1].y); zPtr[s + 1].x += z2Ptr[s + 1].x; zPtr[s + 1].y += z2Ptr[s + 1].y; z2Ptr[s + 1].x = x101 + y101; z2Ptr[s + 1].y = y101 - x101; x102 = z2Ptr[2*s + 1].x - zPtr[2*s + 1].x; y102 = zPtr[2*s + 1].y - z2Ptr[2*s + 1].y; zPtr[2*s + 1].x += z2Ptr[2*s + 1].x; zPtr[2*s + 1].y += z2Ptr[2*s + 1].y; z2Ptr[2*s + 1].x = y102; z2Ptr[2*s + 1].y = x102; x103 = x010 * (z2Ptr[3*s + 1].x - zPtr[3*s + 1].x); y103 = x010 * (z2Ptr[3*s + 1].y - zPtr[3*s + 1].y); zPtr[3*s + 1].x += z2Ptr[3*s + 1].x; zPtr[3*s + 1].y += z2Ptr[3*s + 1].y; z2Ptr[3*s + 1].x = x103 - y103; z2Ptr[3*s + 1].y = y103 + x103; x200 = zPtr[2].x - z2Ptr[2].x; y200 = zPtr[2].y - z2Ptr[2].y; zPtr[2].x += z2Ptr[2].x; zPtr[2].y += z2Ptr[2].y; z2Ptr[2].x = x200; z2Ptr[2].y = y200; x201 = x010 * (zPtr[s + 2].x - z2Ptr[s + 2].x); y201 = x010 * (zPtr[s + 2].y - z2Ptr[s + 2].y); zPtr[s + 2].x += z2Ptr[s + 2].x; zPtr[s + 2].y += z2Ptr[s + 2].y; z2Ptr[s + 2].x = x201 + y201; z2Ptr[s + 2].y = y201 - x201; x202 = z2Ptr[2*s + 2].x - zPtr[2*s + 2].x; y202 = zPtr[2*s + 2].y - z2Ptr[2*s + 2].y; zPtr[2*s + 2].x += z2Ptr[2*s + 2].x; zPtr[2*s + 2].y += z2Ptr[2*s + 2].y; z2Ptr[2*s + 2].x = y202; z2Ptr[2*s + 2].y = x202; x203 = x010 * (z2Ptr[3*s + 2].x - zPtr[3*s + 2].x); y203 = x010 * (z2Ptr[3*s + 2].y - zPtr[3*s + 2].y); zPtr[3*s + 2].x += z2Ptr[3*s + 2].x; zPtr[3*s + 2].y += z2Ptr[3*s + 2].y; z2Ptr[3*s + 2].x = x203 - y203; z2Ptr[3*s + 2].y = y203 + x203; x300 = zPtr[3].x - z2Ptr[3].x; y300 = zPtr[3].y - z2Ptr[3].y; zPtr[3].x += z2Ptr[3].x; zPtr[3].y += z2Ptr[3].y; z2Ptr[3].x = x300; z2Ptr[3].y = y300; x301 = x010 * (zPtr[s + 3].x - z2Ptr[s + 3].x); y301 = x010 * (zPtr[s + 3].y - z2Ptr[s + 3].y); zPtr[s + 3].x += z2Ptr[s + 3].x; zPtr[s + 3].y += z2Ptr[s + 3].y; z2Ptr[s + 3].x = x301 + y301; z2Ptr[s + 3].y = y301 - x301; x302 = z2Ptr[2*s + 3].x - zPtr[2*s + 3].x; y302 = zPtr[2*s + 3].y - z2Ptr[2*s + 3].y; zPtr[2*s + 3].x += z2Ptr[2*s + 3].x; zPtr[2*s + 3].y += z2Ptr[2*s + 3].y; z2Ptr[2*s + 3].x = y302; z2Ptr[2*s + 3].y = x302; x303 = x010 * (z2Ptr[3*s + 3].x - zPtr[3*s + 3].x); y303 = x010 * (z2Ptr[3*s + 3].y - zPtr[3*s + 3].y); zPtr[3*s + 3].x += z2Ptr[3*s + 3].x; zPtr[3*s + 3].y += z2Ptr[3*s + 3].y; z2Ptr[3*s + 3].x = x303 - y303; z2Ptr[3*s + 3].y = y303 + x303; zPtr += 8*s; z2Ptr += 8*s; } } zPtr = &z[0]; z2Ptr = &z[2*s]; for (i = n; i != 0; i -= 4) { x000 = zPtr[0].x + z2Ptr[0].x; x002 = zPtr[s].x + z2Ptr[s].x; y000 = zPtr[0].y + z2Ptr[0].y; y002 = zPtr[s].y + z2Ptr[s].y; x001 = zPtr[0].x - z2Ptr[0].x; y003 = zPtr[s].y - z2Ptr[s].y; y001 = zPtr[0].y - z2Ptr[0].y; x003 = zPtr[s].x - z2Ptr[s].x; zPtr[0].x = x000 + x002; zPtr[0].y = y000 + y002; zPtr[s].x = x000 - x002; zPtr[s].y = y000 - y002; z2Ptr[0].x = x001 + y003; z2Ptr[0].y = y001 - x003; z2Ptr[s].x = x001 - y003; z2Ptr[s].y = y001 + x003; x100 = zPtr[1].x + z2Ptr[1].x; x102 = zPtr[s + 1].x + z2Ptr[s + 1].x; y100 = zPtr[1].y + z2Ptr[1].y; y102 = zPtr[s + 1].y + z2Ptr[s + 1].y; x101 = zPtr[1].x - z2Ptr[1].x; y103 = zPtr[s + 1].y - z2Ptr[s + 1].y; y101 = zPtr[1].y - z2Ptr[1].y; x103 = zPtr[s + 1].x - z2Ptr[s + 1].x; zPtr[1].x = x100 + x102; zPtr[1].y = y100 + y102; zPtr[s + 1].x = x100 - x102; zPtr[s + 1].y = y100 - y102; z2Ptr[1].x = x101 + y103; z2Ptr[1].y = y101 - x103; z2Ptr[s + 1].x = x101 - y103; z2Ptr[s + 1].y = y101 + x103; x200 = zPtr[2].x + z2Ptr[2].x; x202 = zPtr[s + 2].x + z2Ptr[s + 2].x; y200 = zPtr[2].y + z2Ptr[2].y; y202 = zPtr[s + 2].y + z2Ptr[s + 2].y; x201 = zPtr[2].x - z2Ptr[2].x; y203 = zPtr[s + 2].y - z2Ptr[s + 2].y; y201 = zPtr[2].y - z2Ptr[2].y; x203 = zPtr[s + 2].x - z2Ptr[s + 2].x; zPtr[2].x = x200 + x202; zPtr[2].y = y200 + y202; zPtr[s + 2].x = x200 - x202; zPtr[s + 2].y = y200 - y202; z2Ptr[2].x = x201 + y203; z2Ptr[2].y = y201 - x203; z2Ptr[s + 2].x = x201 - y203; z2Ptr[s + 2].y = y201 + x203; x300 = zPtr[3].x + z2Ptr[3].x; x302 = zPtr[s + 3].x + z2Ptr[s + 3].x; y300 = zPtr[3].y + z2Ptr[3].y; y302 = zPtr[s + 3].y + z2Ptr[s + 3].y; x301 = zPtr[3].x - z2Ptr[3].x; y303 = zPtr[s + 3].y - z2Ptr[s + 3].y; y301 = zPtr[3].y - z2Ptr[3].y; x303 = zPtr[s + 3].x - z2Ptr[s + 3].x; zPtr[3].x = x300 + x302; zPtr[3].y = y300 + y302; zPtr[s + 3].x = x300 - x302; zPtr[s + 3].y = y300 - y302; z2Ptr[3].x = x301 + y303; z2Ptr[3].y = y301 - x303; z2Ptr[s + 3].x = x301 - y303; z2Ptr[s + 3].y = y301 + x303; zPtr += 4*s; z2Ptr += 4*s; } } static void fourStepSquare(Complex * CONST z, CONST unsigned int n1, CONST unsigned int n2) { unsigned int i, j; Complex * zPtr; zPtr = z; for (i = n1 / 4; i != 0; --i) { FFTSfreq(zPtr, n2, n1 + GAP); zPtr += 4; } zPtr = z; for (i = n2, j = 0; i != 0; --i, j += BLK_SIZE + n1 / BLK_SIZE) { FFTsquareFFT(zPtr, n1, j); zPtr += n1 + GAP; } zPtr = z; for (i = n1 / 4; i != 0; --i) { FFTStime(zPtr, n2, n1 + GAP); zPtr += 4; } } static void convertInitGFN(CONST unsigned int n1, CONST unsigned int n2) { unsigned int i; CONST double Pi_Twon = -M_PI / (2 * n1 * n2); CONST double Pi_Twon2 = -M_PI / (2 * n2); for (i = 0; i != n1; ++i) { e1[i].x = cos(Pi_Twon * i); e1[i].y = sin(Pi_Twon * i); } for (i = 0; i != n2; ++i) { e1[n1 + i].x = cos(Pi_Twon2 * i); e1[n1 + i].y = sin(Pi_Twon2 * i); } } static Complex * FFTinitGFN(CONST UInt32 m, CONST double x, unsigned int * CONST rn1, unsigned int * CONST rn2) { unsigned int n1, n2, l; Complex * z; if (m / 2 <= FOUR_STEP_LIMIT) { n1 = m / 2; n2 = 1; FFTinit(n1, 0); } else { l = ilog(m / 2); n1 = 1 << (l - (l - 1) / 2); n2 = 1 << ((l - 1) / 2); FFTinit(n1, n2); FFTSinit(n2); } convertInitGFN(n1, n2); z = (Complex *)myAlloc((n1 + GAP) * n2 * sizeof(Complex)); z[0].x = x; z[0].y = 0; for (l = 1; l < (n1 + GAP) * n2; ++l) z[l].x = z[l].y = 0; maxErr = 0; *rn1 = n1; *rn2 = n2; return z; } static void FFTfree(CONST unsigned int n2) { if (n2 > 1) { myFree(e3); } } static void FFTsquareGFN(Complex * CONST z, CONST unsigned int n1, CONST unsigned int n2) { if (n2 == 1) FFTsquareFFT(z, n1, 0); else fourStepSquare(z, n1, n2); } static void FFTnextStepGFN(Complex * CONST z, CONST Int32 base, CONST double t, CONST unsigned int n1, CONST unsigned int n2) { unsigned int i, j; Complex * zPtr; CONST Complex * eiPtr, * ejPtr; double c, s, x, y, xi, yi, f1, f2; CONST double invBase = 1.0 / base; float errx, erry; f1 = f2 = 0; zPtr = z; eiPtr = e1; for (i = n1; i != 0; --i) { x = f1 + t * (eiPtr->x * zPtr->x + eiPtr->y * zPtr->y); y = f2 + t * (eiPtr->y * zPtr->x - eiPtr->x * zPtr->y); xi = round(x); yi = round(y); errx = (float)fabs(x - xi); if (errx > maxErr) maxErr = errx; erry = (float)fabs(y - yi); if (erry > maxErr) maxErr = erry; x = xi * invBase; y = yi * invBase; f1 = round(x); f2 = round(y); xi -= f1 * base; yi -= f2 * base; zPtr->x = eiPtr->x * xi + eiPtr->y * yi; zPtr->y = eiPtr->x * yi - eiPtr->y * xi; ++eiPtr; ++zPtr; } zPtr += GAP; ejPtr = &e1[n1 + 1]; for (j = n2 - 1; j != 0; --j) { c = ejPtr->x; s = ejPtr->y; eiPtr = &e1[1]; for (i = n1; i != 0; --i) { x = f1 + t * (c * zPtr->x + s * zPtr->y); y = f2 + t * (s * zPtr->x - c * zPtr->y); xi = round(x); yi = round(y); errx = (float)fabs(x - xi); if (errx > maxErr) maxErr = errx; erry = (float)fabs(y - yi); if (erry > maxErr) maxErr = erry; x = xi * invBase; y = yi * invBase; f1 = round(x); f2 = round(y); xi -= f1 * base; yi -= f2 * base; zPtr->x = c * xi + s * yi; zPtr->y = c * yi - s * xi; ++zPtr; c = ejPtr->x * eiPtr->x - ejPtr->y * eiPtr->y; s = ejPtr->x * eiPtr->y + ejPtr->y * eiPtr->x; ++eiPtr; } zPtr += GAP; ++ejPtr; } f2 = -f2; /* a[n] = -a[0] */ zPtr = z; eiPtr = e1; for (i = 2; i != 0; --i) { x = f2 + eiPtr->x * zPtr->x - eiPtr->y * zPtr->y; y = f1 + eiPtr->x * zPtr->y + eiPtr->y * zPtr->x; xi = round(x); yi = round(y); errx = (float)fabs(x - xi); if (errx > maxErr) maxErr = errx; erry = (float)fabs(y - yi); if (erry > maxErr) maxErr = erry; x = xi * invBase; y = yi * invBase; f2 = round(x); f1 = round(y); xi -= f2 * base; yi -= f1 * base; zPtr->x = eiPtr->x * xi + eiPtr->y * yi; zPtr->y = eiPtr->x * yi - eiPtr->y * xi; ++eiPtr; ++zPtr; } x = eiPtr->x * zPtr->x - eiPtr->y * zPtr->y; y = eiPtr->x * zPtr->y + eiPtr->y * zPtr->x; xi = round(x); yi = round(y); errx = (float)fabs(x - xi); if (errx > maxErr) maxErr = errx; erry = (float)fabs(y - yi); if (erry > maxErr) maxErr = erry; f2 += xi; f1 += yi; zPtr->x = eiPtr->x * f2 + eiPtr->y * f1; zPtr->y = eiPtr->x * f1 - eiPtr->y * f2; } static void FFTgetResult(CONST Complex * CONST z, CONST Int32 base, Int32 * CONST a, CONST unsigned int n1, CONST unsigned int n2, float * CONST maxFFTerror) { unsigned int i, j, k; CONST Complex * zPtr, * eiPtr, * ejPtr; CONST unsigned int n = n1 * n2; double c, s, x, y, xi, yi, f1, f2; CONST double t = 1.0 / n, invBase = 1.0 / base; float errx, erry; k = 0; f1 = f2 = 0; zPtr = z; ejPtr = &e1[n1]; for (j = n2; j != 0; --j) { eiPtr = e1; for (i = n1; i != 0; --i) { c = ejPtr->x * eiPtr->x - ejPtr->y * eiPtr->y; s = ejPtr->x * eiPtr->y + ejPtr->y * eiPtr->x; ++eiPtr; x = f1 + t * (c * zPtr->x + s * zPtr->y); y = f2 + t * (s * zPtr->x - c * zPtr->y); ++zPtr; xi = round(x); yi = round(y); errx = (float)fabs(x - xi); if (errx > maxErr) maxErr = errx; erry = (float)fabs(y - yi); if (erry > maxErr) maxErr = erry; x = xi * invBase; y = yi * invBase; f1 = round(x); f2 = round(y); x = xi - f1 * base; y = yi - f2 * base; xi = round(x); yi = round(y); a[k] = (Int32)xi; a[k + n] = (Int32)yi; ++k; } zPtr += GAP; ++ejPtr; } f2 = -f2; /* a[n] = -a[0] */ for (k = 0; k != 9; ++k) { yi = f2 + a[k]; xi = f1 + a[k + n]; y = yi * invBase; x = xi * invBase; f2 = round(y); f1 = round(x); a[k] = (Int32)(yi - f2 * base); a[k + n] = (Int32)(xi - f1 * base); } a[9] += (Int32)f2; a[9 + n] += (Int32)f1; *maxFFTerror = maxErr; } static int isOne(Int32 * CONST a, CONST unsigned int len, CONST Int32 base) { unsigned int i; Int32 f, r; int rt; do { f = 0; for (i = 0; i != len; ++i) { f += a[i]; r = f % base; if (r < 0) r += base; a[i] = r; f -= r; f /= base; } a[0] -= f; /* a[n] = -a[0] */ } while (f != 0); rt = 1; for (i = 1; i != len; ++i) { if (a[i] != 0) { rt = 0; break; } } if (a[0] != 1) rt = 0; return rt; } static int check(CONST Int32 b, CONST UInt32 m) { UInt32 j; unsigned int i, Nlen, bt, n1, n2; UInt32 * Na, * a; Int32 * Ra; Complex * z; float maxError; int r; CONST double t1 = 2.0 / m, t2 = 2 * t1; FILE * fp; char str[132]; printf("Testing %d^%u+1...\r", b, m); Nlen = 1; Na = (UInt32 *)myAlloc(1 * sizeof(UInt32)); Na[0] = (UInt32)b; for (j = m; j != 1; j /= 2) { a = (UInt32 *)myAlloc(2 * Nlen * sizeof(UInt32)); for (i = 0; i != Nlen; ++i) a[i] = 0; for (i = 0; i != Nlen; ++i) a[i + Nlen] = mul_1_add_n(&a[i], Na[i], Na, Nlen); myFree(Na); Na = a; Nlen *= 2; if (Na[Nlen - 1] == 0) --Nlen; } z = FFTinitGFN(m, 2.0, &n1, &n2); for (i = lg(Na, Nlen) - 1; i != 0; --i) { FFTsquareGFN(z, n1, n2); bt = Na[i / (8 * sizeof(UInt32))] >> (i % (8 * sizeof(UInt32))) & 1; FFTnextStepGFN(z, b, (bt == 0) ? t1 : t2, n1, n2); } FFTsquareGFN(z, n1, n2); myFree(Na); Ra = (Int32 *)myAlloc(m * sizeof(UInt32)); FFTgetResult(z, b, Ra, n1, n2, &maxError); myFree(z); FFTfree(n2); r = isOne(Ra, m, b); myFree(Ra); if (r == 1) { sprintf(str, "%d^%u+1 is a probable prime. (err = %.2e)\n", b, m, maxError); } else { if (2*log((double)b)/log(2.0) + ilog(m) + log((double)ilog(m))/log(2.0) < 53) sprintf(str, "%d^%u+1 is composite (err = %.2e).\n", b, m, maxError); else sprintf(str, "%d^%u+1 is a probable composite (err = %.2e).\n", b, m, maxError); } printf(str); fp = fopen("genefer.log", "a"); if (fp == NULL) { fprintf(stderr, "Cannot write results to genefer.log\n"); exit(1); } fprintf(fp, str); fclose(fp); return r; } static void test() { check(100050, 16); check(100014, 32); check(100234, 64); check(100032, 128); #if (B_MAX == 35000000) check(6631340, 256); check(5386302, 512); check(4376378, 1024); check(3563456, 2048); check(2891508, 4096); check(2409664, 8192); check(1915122, 16384); check(1519380, 32768); check(1266062, 65536); #else check(5684328, 256); check(4619000, 512); check(3752220, 1024); check(3066672, 2048); check(2485064, 4096); check(2030234, 8192); check(1651902, 16384); check(1277444, 32768); check(857678, 65536); #endif } static double benchGFN(CONST Int32 b, CONST UInt32 n, CONST unsigned int m, float * CONST maxFFTerror) { unsigned int i, n1, n2; clock_t clock0; double et; Complex * z; Int32 * Ra; CONST double t1 = 2.0 / n, t2 = 2 * t1; z = FFTinitGFN(n, (double)(b - 2), &n1, &n2); for (i = 0; i <= ilog(n); ++i) { FFTsquareGFN(z, n1, n2); FFTnextStepGFN(z, b, t1, n1, n2); } clock0 = clock(); for (i = 0; i < m; ++i) { FFTsquareGFN(z, n1, n2); FFTnextStepGFN(z, b, (i % 2 == 0) ? t1 : t2, n1, n2); } et = (clock() - clock0) / (double)CLOCKS_PER_SEC; FFTsquareGFN(z, n1, n2); Ra = (Int32 *)myAlloc(n * sizeof(UInt32)); FFTgetResult(z, b, Ra, n1, n2, maxFFTerror); myFree(z); FFTfree(n2); myFree(Ra); return et; } static void runBench() { unsigned int i, m, dgts; Int32 b; UInt32 n; double et, etn; float maxError; FILE * fp; fp = fopen("genefer.bench", "w"); printf("Generalized Fermat Number Bench\n"); if (fp != NULL) fprintf(fp, "Generalized Fermat Number Bench\n"); for (i = 0; i < 14; ++i) { n = 1 << (i + 8); m = 1 << (16 - i); b = (Int32)(B_MAX / pow(n, 0.3)) & 0xfffffffe; et = benchGFN(b, n, m, &maxError); etn = et / m; dgts = (unsigned int)(n * log((double)b) / log(10.0)) + 1; if (etn >= 1) { printf("%d^%u+1\tTime: %.03g s/mul.\tErr: %.2e\t%u digits\n", b, n, etn, maxError, dgts); if (fp != NULL) fprintf(fp, "%d^%u+1\tTime: %.03g s/mul.\tErr: %.2e\t%u digits\n", b, n, etn, maxError, dgts); } else if (etn >= 1e-3) { printf("%d^%u+1\tTime: %.03g ms/mul.\tErr: %.2e\t%u digits\n", b, n, etn * 1e3, maxError, dgts); if (fp != NULL) fprintf(fp, "%d^%u+1\tTime: %.03g ms/mul.\tErr: %.2e\t%u digits\n", b, n, etn * 1e3, maxError, dgts); } else { printf("%d^%u+1\tTime: %.03g us/mul.\tErr: %.2e\t%u digits\n", b, n, etn * 1e6, maxError, dgts); if (fp != NULL) fprintf(fp, "%d^%u+1\tTime: %.03g us/mul.\tErr: %.2e\t%u digits\n", b, n, etn * 1e6, maxError, dgts); } } if (fp != NULL) fclose(fp); } int main(int argc, char * argv[]) { int r, i, currentLine; UInt32 N; Int32 b; FILE * fp, * fpi; char str[132]; #if (defined(_MSC_VER) && defined(_M_IX86)) _control87(_PC_64, _MCW_PC); _control87(_RC_NEAR, _MCW_RC); #endif printf("GeneFer 1.3 (%s) Copyright (C) 2001-2002, Yves GALLOT\n", VERSION); printf("A program for finding large probable generalized Fermat primes.\n\n"); printf("Usage: GeneFer -b run bench\n"); printf(" GeneFer -t run test\n"); printf(" GeneFer test \n"); printf(" GeneFer use interactive mode\n\n"); if (argc == 1) { printf(" 1. bench\n 2. test\n 3. normal\n"); fgets(str, 80, stdin); r = atoi(str); if (r == 1) { runBench(); return 0; } if (r == 2) { test(); return 0; } printf("N: "); fgets(str, 80, stdin); N = lpt(atoi(str)); if (N < 16) N = 16; printf("b: "); fgets(str, 80, stdin); b = atoi(str) & 0xfffffffe; check(b, N); return 0; } if (strcmp(argv[1], "-b") == 0) { runBench(); return 0; } if (strcmp(argv[1], "-t") == 0) { test(); return 0; } fp = fopen(argv[1], "r"); if (fp == NULL) { fprintf(stderr, "Cannot open '%s'\n", argv[1]); return 1; } fpi = fopen("genefer.ini", "r"); if (fpi != NULL) { fgets(str, 132, fpi); currentLine = atoi(str); fclose(fpi); printf("Continue test of file '%s' at line %d.\n", argv[1], currentLine); } else { currentLine = 0; printf("Start test of file '%s'.\n", argv[1]); } for (i = 0; i < currentLine; ++i) fgets(str, 132, fp); while (fgets(str, 132, fp) != NULL) { if (sscanf(str, "%u%u", &b, &N) == 2) { if ((b != 0) && (b <= 0x7fffffff) && (N >= 16)) { check(b, N); } } ++currentLine; fpi = fopen("genefer.ini", "w"); if (fpi != NULL) { fprintf(fpi, "%d\n", currentLine); fclose(fpi); } } fclose(fp); return 0; }