''' Peter Chinetti peter@chinetti.me ''' from multiprocessing.sharedctypes import RawArray import os import sys import argparse import time import itertools import math SIEVELENGTH = 100000 #This needs to be reset for multiple runs sieve = RawArray('c',SIEVELENGTH) def main(poolsize,chunksize): ''' Fork off poolsize number of processes (so with the parent process poolsize +1 total processes), each with their own queue of chunks to process. They will run a processing function, then exit and be waited on by the parent process. ''' global sieve sieve = RawArray('c',SIEVELENGTH,) mark = time.time() poolqueue = create_poolqueues(poolsize,chunksize) pidlist = list() for i in range(0,poolsize): pid = os.fork() if pid == 0: process_chunks(poolqueue[i]) pidlist.append(pid) for pid in pidlist: os.waitpid(pid,0) print("Last run with poolsize %d and chunksize %d took %f milliseconds" % (poolsize, chunksize, 1000*(time.time()-mark))) return get_primes() def create_poolqueues(poolsize,chunksize): ''' Create poolsize number of lists, then divide the sieve into chunks, dividing the chunks between pools by giving the 1st chunk to the first pool the 2nd to the 2nd, and looping back to the first at the end of the poollist. This method of divison is important. If we were instead to evenly divide the list among the pool, the process that gets the first chunk has to do a lot more work than the process that gets the last chunk ''' poolqueue = list() #Build queues for each process for i in range(0,poolsize): poolqueue.append(list()) chunkstart = 0 index = 0 #Distribute chunks to each process while chunkstart < SIEVELENGTH: if poolsize > 1: poolqueue[index % (poolsize)].append((chunkstart,chunkstart + chunksize)) index += 1 else: poolqueue[0].append((chunkstart,chunkstart + chunksize)) chunkstart += chunksize return poolqueue def process_chunks(lin): '''direct the process to start work on the chunk list, one at a time''' for chunk in lin: run_sieve(chunk[0],chunk[1]) sys.exit(0) def run_sieve(start,end): '''for all values in the chunk, mark multiples of the primes''' if start == 0: start = 2 while start <= end and start < math.sqrt(SIEVELENGTH): if sieve[start] == b'\x00': mark_multiples(start) start += 1 def mark_multiples(start): ''' mark 2x, 3x ...''' global sieve multiple = 2 while start*multiple < SIEVELENGTH: sieve[start*multiple] = b'\x01' multiple += 1 def timerun(poolsize, chunksize): ''' make a csv with poolsize, chunksize, and time required to run the sieve. don't forget to reset the sieve array before each run ''' results = open('results.txt','a') results.write(str(poolsize)+','+str(chunksize)) results.flush() global sieve sieve = RawArray('c',SIEVELENGTH,) mark = time.time() main(poolsize,chunksize) results.write(','+str(time.time()-mark)) results.write('\n') results.close() def take_data(): ''' create csv entries for every combination of poolsize and chunksize ''' poolsizes = list(range(1,21)) chunksizes = list([1,10,100,1000,10000,100000]) #cartisian product! for poolsize,chunksize in itertools.product(poolsizes,chunksizes): timerun(poolsize,chunksize) check_results() def check_results(): ''' verify my results against a list of primes I found online. the algo works. ''' primes = open('Primes.txt').readlines() primes = [int(x.strip()) for x in primes] sievelist = get_primes() for item in list(zip(sievelist,primes)): if item[0] != item[1]: print("I found a mismatching item!") sys.exit(1) def get_primes(): sievelist=list() index = 0 for item in sieve: if item == b'\x00': sievelist.append(index) index+=1 #0 and 1 are not primes sievelist = sievelist[2:] return sievelist if __name__ == "__main__": #Command line argument parsing parser = argparse.ArgumentParser(description='Split a range into subdivisions, then run the sieve on the subdivisions') parser.add_argument("--poolsize","-p",required=True,type=int) parser.add_argument("--chunksize","-c",required=True,type=int) parser.add_argument("--debug","-d",required=True,type=int,choices=(0,1)) args = parser.parse_args() main(args.poolsize,args.chunksize) check_results() if args.debug == 1: print("primes:") index = 0 for item in sieve: if item == b'\x00': print(index) index+=1