Codebase list librandom123 / d562df95-02c3-4c2f-be76-2e27e51e08d4/upstream/1.14.0+git20220117.2.9545ff6+dfsg tests / time_boxmuller.cpp
d562df95-02c3-4c2f-be76-2e27e51e08d4/upstream/1.14.0+git20220117.2.9545ff6+dfsg

Tree @d562df95-02c3-4c2f-be76-2e27e51e08d4/upstream/1.14.0+git20220117.2.9545ff6+dfsg (Download .tar.gz)

time_boxmuller.cpp @d562df95-02c3-4c2f-be76-2e27e51e08d4/upstream/1.14.0+git20220117.2.9545ff6+dfsgraw · history · blame

// Test for boxmuller.h on CPU
#include <Random123/philox.h>
#include <Random123/threefry.h>
#include <Random123/boxmuller.hpp>
#include "util.h"   // for timer()

#if __GNUC__>=7
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#endif

typedef r123::Philox4x32 CBRNGF;
#if R123_USE_64BIT
typedef r123::Threefry2x64 CBRNGD;
#endif

const char *progname = "time_boxmuller";

// Each call to boxmuller() returns a pair of values in the .x and .y
// members, which we add up into sum just to avoid being optimized away.
template <typename CBRNG, typename F, typename F2>
F timedloop(typename CBRNG::ukey_type k, size_t Ntry){
    F sum = 0.f;
    typename CBRNG::ctr_type ctr = {{}};
    const size_t csize = sizeof(ctr)/sizeof(ctr[0]);
    CBRNG rng;

    for(size_t i=0; i<Ntry; i+=csize){
        ctr.incr();
        typename CBRNG::ctr_type u = rng(ctr, k);
	F2 f2;
	switch(csize) {
	case 8: f2 = r123::boxmuller(u[6], u[7]); sum += f2.x + f2.y;
                f2 = r123::boxmuller(u[4], u[5]); sum += f2.x + f2.y;
	case 4: f2 = r123::boxmuller(u[2], u[3]); sum += f2.x + f2.y;
	case 2: f2 = r123::boxmuller(u[0], u[1]); sum += f2.x + f2.y;
                break;
	default:
	        R123_ASSERT(0);
	}
    }
    return sum;
}

template <typename CBRNG, typename F2>
void dumploop(FILE *fp, typename CBRNG::ukey_type k, size_t Ntry){
    typename CBRNG::ctr_type ctr = {{}};
    const size_t csize = sizeof(ctr)/sizeof(ctr[0]);
    CBRNG rng;

    for(size_t i=0; i<Ntry; i+=csize){
        ctr.incr();
        typename CBRNG::ctr_type u = rng(ctr, k);
	F2 f2;
	switch(csize) {
	case 8: f2 = r123::boxmuller(u[6], u[7]); fprintf(fp, "%g\n%g\n", f2.x, f2.y);
                f2 = r123::boxmuller(u[4], u[5]); fprintf(fp, "%g\n%g\n", f2.x, f2.y);
	case 4: f2 = r123::boxmuller(u[2], u[3]); fprintf(fp, "%g\n%g\n", f2.x, f2.y);
	case 2: f2 = r123::boxmuller(u[0], u[1]); fprintf(fp, "%g\n%g\n", f2.x, f2.y); break;
	default:
	    R123_ASSERT(0);
	}
    }
}

#define NREPEAT 20

template <typename CBRNG, typename F, typename F2>
void timedcall(const char *tname, typename CBRNG::ukey_type k, size_t Ntry, char *out_fname) {
    double cur_time, dt;
    F sums[NREPEAT];
    int i;
    FILE *fp;
    char *fname;
    if (out_fname) {
	fname = (char *) malloc(strlen(out_fname) + strlen(tname) + 2);
	CHECKNOTZERO(fname);
	sprintf(fname, "%s-%s", out_fname, tname);
	fp = fopen(fname, "w");
	CHECKNOTZERO(fp);
    } else {
	fname = NULL;
	fp = NULL;
    }
    (void) timer(&cur_time);
    /*
     * we call timedloop NREPEAT times so that it is easy to keep
     * Ntry the same for boxmuller.cu and boxmuller.cpp, so sum[0]
     * can be checked.
     */
    for (i = 0; i < NREPEAT; i++) {
	k.v[sizeof(k)/sizeof(k.v[0])-1] = i;
	if (fp)
	    dumploop<CBRNG, F2>(fp, k, Ntry);
	else
	    sums[i] = timedloop<CBRNG, F, F2>(k, Ntry);
    }
    dt = timer(&cur_time);
    if (fp) {
	printf("%s %lu written to %s in %g sec: %gM/sec\n", tname, (unsigned long)(Ntry*NREPEAT), fname, dt, Ntry*NREPEAT*1.e-6/dt);
	fclose(fp);
	free(fname);
    } else {
	printf("%s %lu in %g sec: %gM/sec, sum = %g\n", tname, (unsigned long)(Ntry*NREPEAT), dt, Ntry*NREPEAT*1.e-6/dt, sums[0]);
	for (i = 1; i < NREPEAT; i++) {
	    printf(" %g", sums[i]);
	}
	printf("\n");
    }
}

const size_t DEF_N = 200000;

int main(int argc, char **argv){
    CBRNGF::ukey_type keyf = {{}};
#if R123_USE_64BIT
    CBRNGD::ukey_type keyd = {{}};
#endif
    size_t Ntry = DEF_N;
    char *dumpfname;
    
    dumpfname = getenv("BOXMULLER_DUMPFILE");
    if(argc>1) {
	if (argv[1][0] == '-') {
	    fprintf(stderr, "Usage: %s [iterations_per_thread [key0 [key1]]]\n", argv[0]);
	    exit(1);
	}
        Ntry = atol(argv[1]);
    }
    for (int i = 0; i < (int)(sizeof(keyf)/sizeof(keyf[0])-1) && 2+i < argc; i++) {
	keyf.v[i] = atol(argv[2+i]);
    }
    timedcall<CBRNGF,float,r123::float2>("float", keyf, Ntry, dumpfname);

#if R123_USE_64BIT
    for (int i = 0; i < (int)(sizeof(keyd)/sizeof(keyd[0])-1) && 2+i < argc; i++) {
	keyd.v[i] = atol(argv[2+i]);
    }
    timedcall<CBRNGD,double,r123::double2>("double", keyd, Ntry, dumpfname);
#endif
    return 0;
}