main.c Ver 0.7

#include <stdio.h>
#include <stdlib.h>

typedef unsigned long long U64;
typedef unsigned short gtype;

typedef struct filter_data
{
    unsigned int val;
    U64 amin;
    U64 bmin;
    gtype gmin;
} filter_data;

typedef struct leap_data
{
    // Transform x = (x>>LEAP_BITS)*a+b
    U64 a, b;
    // Threshold to switch 64+48 bit op.
    U64 threshold;
    gtype g;
} leap_data;

extern void init_cutoff_tab(int maxbits, int *tabsize, filter_data **co_tab);

extern void init_leap_table(int bits, leap_data *leap_tab);

// For Leap calculation
#define LEAP_BITS 10
#define LEAP_N (1<<LEAP_BITS)
#define LEAP_MASK (LEAP_N-1)
leap_data leap_tab[LEAP_N];

// Cutoff
#define CUTOFF_BITS 22
#define CUTOFF_BLOCK ((1<<CUTOFF_BITS)*3)
filter_data *cutoff_tab;
int cutoff_tabsize;

// For cache
#define GTABLE_MAX ((1<<20)*16)
gtype *gtable;

// Global
gtype gmax = 1;
U64 kmax = 1;
gtype gmax_n4=9999;
gtype gmax_n2=1;
U64 kmax_n4;

U64 totalskip=0;
U64 allnum=0;
U64 astep=0;
U64 loongstep=0;

// For 48+64 bit operation
#define LOONG_MASK ((1LL<<48)-1)
#define LOONG_BITS 48
/*   64  48  32  16  01
 *XH ******************
 *               64  48  32  16  01
 *XL             MARGIN************
 */
// x += b, x *=b operations
// b < 1^16 is assumed
#define LOONG_ADD(xh,xl,b) {xl+=b;xh += xl>>LOONG_BITS;xl&=LOONG_MASK;}
#define LOONG_MUL(xh,xl,b) {xl*=b;xh = xh*b + (xl>>LOONG_BITS);xl&=LOONG_MASK;}

void handle_loong(U64 *x, gtype *g)
{
    int g0 = *g;
    U64 xh, xl;
    U64 x64 = *x;
    xl = x64&LOONG_MASK;
    xh = x64 >>LOONG_BITS;

    while ( xh != 0)
    {
	leap_data *l = &(leap_tab[xl & LEAP_MASK]);
	U64 a = l->a;
	U64 b = l->b;
	xl >>=LEAP_BITS;
	xl |= (xh&LEAP_MASK) << (LOONG_BITS-LEAP_BITS);
	xh >>= LEAP_BITS;
        LOONG_MUL(xh, xl, a);
        LOONG_ADD(xh, xl, b);
	g0 += l->g;
	loongstep++;
    }
    *x = xl;
    *g = g0;
}

void do_check(U64 k, filter_data *f)
{
    int i;
    #define H_BUFSIZE (10000)
    static U64 x_log[H_BUFSIZE];
    static gtype g_log[H_BUFSIZE];

    static gtype g=0;
    static int n_log = 0;
    U64 x = k;

    allnum++;
    g=0;
#if 1
    {
        U64 m = x>>CUTOFF_BITS;
        x = m*(f->amin)+(f->bmin);
        g = f->gmin;

	if (x<kmax_n4 && g+gmax_n4<gmax)
	{
	    totalskip++;
	    return;
	}
    }
#endif

   n_log = 0;

   while(1)
    {
        if (x <= GTABLE_MAX)
        {
    	    if (gtable[x-1] != 0) break;
	    x_log[n_log] = x;
	    g_log[n_log] = g;
	    n_log++;
            if (x & 1) x = x*3+1;
	    else x >>=1;
	    g++;
	}
	else
	{
	    //Hot Spot
	    U64 m = x>>LEAP_BITS;
	    leap_data *l = &(leap_tab[x & LEAP_MASK]);
            if (m >= l->threshold) handle_loong(&x, &g);
	    else
	    {
	        x = m*(l->a)+(l->b);
	        g += l->g;
	        astep++;
	    }
        }
    }
    g += gtable[x-1];

    for(i=0;i<n_log;i++)
    {
        gtable[x_log[i]-1] = g-g_log[i];
    }

    if (g>gmax)
    {
        gmax = g;
        kmax = k;
        printf("g(%lld)=%d\n",kmax,gmax);
    }
    else
    if(g==gmax && k<kmax)
    {
        kmax = k;
        printf("g(%lld)=%d\n",kmax,gmax);
    }
}

int  main(int argc, char **argv)
{
    int i;
    U64 k, n0;
    U64 n = atoll(argv[1]);
    U64 n2;

    if(argv[1][0]=='2' && argv[1][1]=='^')
        n = 1LL<<atoll(&argv[1][2]);

    if (argc>=3)
    {
	gmax = gmax_n2 = (gtype)atoi(argv[2]);
	gmax_n4 = (gtype)atoi(argv[3]);
	kmax_n4 = n/4;
    }

    printf("Memory Usage:\n");
    i=sizeof(leap_tab);
    printf("Leap Table: %f Kb\n",i*1e-3);
    i=GTABLE_MAX*sizeof(gtype);
    printf("Cache Table: %f Mb\n",i*1e-6);

    init_leap_table(LEAP_BITS, leap_tab);

    init_cutoff_tab(CUTOFF_BITS, &cutoff_tabsize, &cutoff_tab);

    gtable = (gtype *)malloc(sizeof(gtype)*GTABLE_MAX);
    gtable[0] = 1;
    for(i=1;i<GTABLE_MAX;i++)
	gtable[i] = 0;


    n2= n-n%CUTOFF_BLOCK;

    for (i=0;i<cutoff_tabsize;i++)
    {
	filter_data *f = &(cutoff_tab[i]);
	k = n2 + f->val;
	if (k>n) break;
        do_check(k, f);
    }
    n2 -= CUTOFF_BLOCK;

    n0=n/2;
    n0 -= n0%CUTOFF_BLOCK;
    // Main
    for(k=n2;k>=n0;k-=CUTOFF_BLOCK)
    {
	for (i=0;i<cutoff_tabsize;i++)
	{
	    filter_data *f = &(cutoff_tab[i]);
            do_check(k + (f->val), f);
	}

	if(k%(n>>8)==0)
	{
	    // Print stats
            printf("#%f Skip Rt %f ", k*1.0/n, totalskip*1.0/allnum);
            printf("Ave.Step %f Long %f\n",
		    astep*1.0/(allnum-totalskip),
		    loongstep*1.0/(allnum-totalskip));
	    totalskip = astep = allnum = loongstep = 0;
	}
    }

    printf("Final Result: ");
    if (gmax==gmax_n2+1)
    {
        printf("g(*)=%d (Double)\n",gmax);
    }
    else
    printf("g(%lld)=%d\n",kmax,gmax);

    return 0;
}