/*** gcc -O3 direct4_fourmis.c On my 1733 MHz Pentium M laptop, this runs at ~ 250'000'000 ant steps per second. Langton's Ant 100 or RLL (right-left-left). See introduction e.g. at http://www.fortunecity.com/emachines/e11/86/langton.html . Adjust the HEIGHT macro below to match the RAM you are willing to spend. This actually limits the address space; the actual RAM usage will grow until the ant reaches a border of the checkerboard, at which point the program segfaults. The checkerboard is memory-mapped from the file "Areas" so even after a segfault you can examine its state. You can also cleanly interrupt the program with a Ctrl-C or a SIGTERM signal and restart it later; it will resume in the saved state. To view the state, first send a SIGHUP signal if the program is currently running to force an msync(); then use summary3.c which scales the checkerboard down and dumps a PGM-formatted image file to stdout. The checkerboard is represented as follows in memory: | ... | +-------------+ ... | address P-1 | ... --+---------------+-------------+---------------+-- ... | addr P-HEIGHT | address P | addr P+HEIGHT | ... --+---------------+-------------+---------------+-- ... | address P+1 | ... ... Each box is one byte, which packs 5 adjacent pixels, each of which can be in three possible states: +-------------+ | 81 27 9 3 1 | +-------------+ ***/ #include #include #include #include #include #include #include #include #include #include #include #define FILENAME "Areas" #define LOG_FILENAME "areas.log" //#define COUNT_STEPS typedef int int32; #define PAGESIZE 4096 /* if mprotect() can't mark a large number of individual pages as PROT_NONE, turn the square area into a rectangle and hope that there is no top-to-bottom wrap-around. */ #define RECT_WIDTH_FACTOR 0.8 #define HEIGHT 299197 /* prime number, ~13.3GB of RAM */ /* expected max 8GB used */ //#define HEIGHT 9700 /* smaller value for tests */ #ifdef RECT_WIDTH_FACTOR # define WIDTH (int)(HEIGHT * RECT_WIDTH_FACTOR / 5) #else # define WIDTH (HEIGHT/5) #endif #define TOTALSIZE (((long) WIDTH) * ((long) HEIGHT)) /* TOTALSIZE computes how large the address space will be. Note that on 32-bit machines this is not allowed to be more than 2 or 3 GB. */ typedef struct { int32 width, height; int32 next_x, next_y; } header_t; void Log(char *msg,...) { va_list ap; time_t now; FILE *logfile; va_start(ap, msg); vprintf(msg, ap); printf("\n"); va_end(ap); logfile = fopen(LOG_FILENAME, "a"); if (logfile == NULL) { fprintf(stderr, "warning: cannot open log file\n"); } else { now = time(NULL); va_start(ap, msg); vfprintf(logfile, msg, ap); fprintf(logfile, "\t\t%.2f\t%s", ((double) clock()) / CLOCKS_PER_SEC, asctime(gmtime(&now))); va_end(ap); fclose(logfile); } } void Error(char *msg) { Log("ERROR: %s", msg); exit(1); } void PError(char *msg) { perror("langton-ant"); Error(msg); } void check_assertions(void) { printf("source code: %s\n", __FILE__); printf("board width: %d pixels (%d columns)\n", WIDTH*5, WIDTH); printf("board height: %d pixels\n", HEIGHT); printf("total size: %ld MB\n", (long)(TOTALSIZE/(1024*1024))); #ifdef COUNT_STEPS printf("COUNTING STEPS\n"); #endif } /************************************************************/ unsigned char *Data; int Data_fd; long next_x, next_y; void load(void) { off_t Data_length; Data_fd = open(FILENAME, O_RDWR | O_CREAT, 0644); if (Data_fd == -1) PError("open failed"); Data_length = lseek(Data_fd, 0, SEEK_END); if (Data_length == 0) { header_t header; Log("*** STARTING ***"); header.width = WIDTH; header.height = HEIGHT; header.next_x = WIDTH/2; header.next_y = HEIGHT/2; if (write(Data_fd, &header, sizeof(header)) != sizeof(header)) PError("write failed"); if (ftruncate(Data_fd, TOTALSIZE) < 0) PError("ftruncate failed"); Data_length = lseek(Data_fd, 0, SEEK_END); } else Log("*** RESUMING ***"); if ((long long) Data_length != (long long) TOTALSIZE) Error("bogus file length"); lseek(Data_fd, 0, SEEK_SET); Data = mmap(NULL, TOTALSIZE, PROT_READ | PROT_WRITE, MAP_SHARED, Data_fd, 0); if (Data == MAP_FAILED) PError("mmap failed"); if (madvise(Data, TOTALSIZE, MADV_RANDOM) < 0) perror("madvise warning"); { long offset; if (mprotect(Data + PAGESIZE, HEIGHT - PAGESIZE, PROT_NONE) < 0) PError("mprotect[1] failed"); offset = TOTALSIZE - HEIGHT; offset &= ~(PAGESIZE-1); if (mprotect(Data + offset, TOTALSIZE - offset, PROT_NONE) < 0) PError("mprotect[2] failed"); #ifndef RECT_WIDTH_FACTOR long x; for (x=1; xwidth != WIDTH) Error("bogus header->width"); if (headerp->height != HEIGHT) Error("bogus header->height"); next_x = headerp->next_x; next_y = headerp->next_y; if (next_x < 0 || next_y < 0) Error("No valid next position"); headerp->next_x = -1; headerp->next_y = -1; } void save(void) { header_t *headerp = (header_t*) Data; headerp->next_x = next_x; headerp->next_y = next_y; msync(Data, TOTALSIZE, MS_SYNC); Log("*** SHUTTING DOWN ***"); } /************************************************************/ volatile int stopflag = 0; unsigned char table1[12*512]; #define P_OFFSET(dir) (*(long*)(table1 + (dir) + 2*244)) void maketables(void) { int i, dir; static int p_offset[12] = { -HEIGHT, /* enter bit 0 from the left */ +HEIGHT, /* enter bit 4 from the right */ +1, +1, +1, +1, +1, /* enter from above */ -1, -1, -1, -1, -1 }; /* enter from below */ static int p_newbit[12] = { 0, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 }; static int p_pow3[5] = { 1, 3, 9, 27, 81 }; for (dir=0; dir<12*512; dir+=512) { P_OFFSET(dir) = p_offset[dir/512]; for (i=0; i<243; i++) { int byte = i; int d = p_offset[dir/512]; int n = p_newbit[dir/512]; int newdir; while (1) { /* turn left by default */ switch (d) { case -HEIGHT: d = +1; break; case +1: d = +HEIGHT; break; case +HEIGHT: d = -1; break; case -1: d = -HEIGHT; break; } if ((byte / p_pow3[n]) % 3 == 0) { /* should have turned right */ d = -d; byte += 2 * p_pow3[n]; } else { byte -= p_pow3[n]; } if (d == 1 || d == -1) break; if (d < 0) n += 1; else n -= 1; if (n < 0 || n >= 5) break; } if (n < 0) newdir = 1; else if (n >= 5) newdir = 0; else if (d > 0) newdir = 2 + n; else newdir = 7 + n; *(short*)(table1 + dir + 2*i) = newdir * 512 + byte; } } #ifdef DUMPTABLES for (dir=0; dir<12*512; dir+=512) { printf("P_OFFSET(0x%x) = %d\n", dir, P_OFFSET(dir)); for (i=0; i<243; i++) printf("[%03x]", (int) *(short*)(table1 + dir + 2*i)); printf("\n"); } #endif } #define LOOPBODY \ p += P_OFFSET(dir); \ dir = *(short*)(table1 + dir + 2*(long)(*p)); \ *p = (unsigned char) dir; \ dir &= -256; /* the next bit of assembler is not used, because GCC produces almost the same thing from the C code of LOOPBODY anyway */ #define LOOPBODY2 \ asm volatile (" \n\ addl table1+488(%%ebx), %%edx \n\ movzbl (%%edx), %%eax \n\ movzwl table1(%%ebx,%%eax,2), %%ebx \n\ movb %%bl, (%%edx) \n\ andl $-256, %%ebx" \ : "=d" (p), "=b" (dir) \ : "0" (p), "1" (dir) \ : "eax" ); void run(void) { unsigned char *p = Data + next_x * HEIGHT + next_y; long dir = 0; do { LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY LOOPBODY } while (!stopflag); while (dir != 0) { LOOPBODY } next_x = (p - Data) / HEIGHT; next_y = (p - Data) % HEIGHT; } /************************************************************/ static void myhandler(int ignored) { stopflag = 1; } static void mysynchandler(int ignored) { stopflag = 2; } static void mysegvhandler(int ignored) { #ifdef COUNT_STEPS fprintf(stderr, "Number of steps: %lld\n", steps); #endif Error("SIGSEGV"); } void install_sighandler(void) { if (signal(SIGINT, &myhandler) == SIG_ERR || signal(SIGTERM, &myhandler) == SIG_ERR || signal(SIGHUP, &mysynchandler) == SIG_ERR || signal(SIGSEGV, &mysegvhandler) == SIG_ERR || signal(SIGPIPE, SIG_IGN) == SIG_ERR) PError("cannot install signal handlers"); } /************************************************************/ int main(int argc, char** argv) { check_assertions(); maketables(); load(); install_sighandler(); starting(); while (1) { run(); #ifdef COUNT_STEPS fprintf(stderr, "Number of steps: %lld\n", steps); #endif if (stopflag == 1) break; stopflag = 0; msync(Data, TOTALSIZE, MS_ASYNC | MS_INVALIDATE); Log("*** Sync *** "); } save(); return 0; }