#[ This Nim source file is a multiple threaded implementation to perform an extremely fast Segmented Sieve of Zakiya (SSoZ) to find Twin Primes <= N. Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1. Output is the number of twin primes <= N, or in range N1 to N2; the last twin prime value for the range; and the total time of execution. This code was developed on a System76 laptop with an Intel I7 6700HQ cpu, 2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning would be needed to optimize for other hardware systems (ARM, PowerPC, etc). To compile for nim versions <= 0.19.x (can use --cc:gcc or --cc:clang) do: $ nim c --cc:gcc --d:release --threads:on twinprimes_ssoz.nim For Nim >= 0.20.0|1.0-rc1 (1.0.0 released 2019/9/23) compile as: $ nim c --cc:gcc --d:danger --threads:on twinprimes_ssoz.nim Then run executable: $ ./twinprimes_ssoz , and enter range values. Or alternatively: $ echo | ./twinprimes_ssoz Range values can be provided in either order (larger or smaller first). This source file, and updates, will be available here: https://gist.github.com/jzakiya/6c7e1868bd749a6b1add62e3e3b2341e Mathematical and technical basis for implementation are explained here: https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_ Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_ for_the_Riemann_Hypotheses https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_ https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK This code is provided free and subject to copyright and terms of the GNU General Public License Version 3, GPLv3, or greater. License copy/terms are here: http://www.gnu.org/licenses/ Copyright (c) 2017-2020 Jabari Zakiya -- jzakiya at gmail dot com Version Date: 2020/8/15 ]# import math # for sqrt, gcd, mod functions import strutils, typetraits # for number input import times, os # for timing code execution import osproc # for getting threads count import threadpool # for parallel processing import algorithm # for sort import bitops # for countSetBits {.experimental.} # required to use 'parallel' (<= 0.20.x) proc modinv(a0, b0: int): int = ## Compute modular inverse a^-1 of a to base b, e.g. a*(a^-1) mod b = 1. var (a, b, x0) = (a0, b0, 0) result = 1 if b == 1: return while a > 1: result -= (a div b) * x0 a = a mod b swap a, b swap x0, result if result < 0: result += b0 proc genPGparameters(prime: int): (int, int, seq[int], seq[int]) = ## Create prime generator parameters for given Pn echo("using Prime Generator parameters for P", prime) let primes = [2, 3, 5, 7, 11, 13, 17, 19, 23] var (modpg, res_0) = (1, 0) # compute Pn's modulus and res_0 value for prm in primes: (res_0 = prm; if prm > prime: break; modpg *= prm) var restwins: seq[int] = @[] # save upper twin pair residues here var inverses = newSeq[int](modpg+2) # save Pn's residues inverses here var (pc, inc, res) = (5, 2, 0) # use P3's PGS to generate pcs while pc < modpg div 2: # find a residue, then complement|inverse if gcd(modpg, pc) == 1: # if pc a residue let pc_mc = modpg - pc # create its modular complement var inv_r = modinv(pc, modpg) # compute residues's inverse inverses[pc] = inv_r # save its inverse inverses[inv_r] = pc # save its inverse's inverse inv_r = modinv(pc_mc, modpg) # compute complement's inverse inverses[pc_mc] = inv_r # save its inverse inverses[inv_r] = pc_mc # save its inverse's inverse if res + 2 == pc: restwins.add(pc); restwins.add(pc_mc + 2) # save hi_tp residues res = pc pc += inc; inc = inc xor 0b110 restwins.sort(system.cmp[int]); restwins.add(modpg + 1) # last residue is last hi_tp inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 # last 2 resdiues are self inverses (modpg, res_0, restwins, inverses) # Global parameters var KB = 0'u # segment size for each seg restrack primes: seq[int] # list of primes r1..sqrt(N) cnts: seq[uint] # hold twin primes counts for seg bytes lastwins: seq[uint] # holds largest twin prime <= num in each thread modpg: int # PG's modulus value res_0: int # PG's first residue value restwins: seq[int] # PG's list of twinpair residues resinvrs: seq[int] # PG's list of residues inverses proc setSieveParameters(start_num, end_num: uint): (uint, int) = ## Select at runtime best PG and segment size factor to use for input value. ## These are good estimates derived from PG data profiling. Can be improved. let range = end_num - start_num var (Bn, pg) = (0, 3) if end_num < 49'u: Bn = 1; pg = 3 elif range < 24_000_000: Bn = 16; pg = 5 elif range < 1_100_000_000'u: Bn = 32; pg = 7 elif range < 35_500_000_000'u: Bn = 64; pg = 11 elif range < 15_000_000_000_000'u: pg = 13 if range > 7_000_000_000_000'u: Bn = 384 elif range > 2_500_000_000_000'u: Bn = 320 elif range > 250_000_000_000'u: Bn = 196 else: Bn = 128 else: Bn = 384; pg = 17 (modpg, res_0, restwins, resinvrs) = genPGparameters(pg) let Kmin = (start_num - 2) div modpg.uint + 1 # number of resgroups to start_num let Kmax = (end_num - 2) div modpg.uint + 1 # number of resgroups to end_num let krange = Kmax - Kmin + 1 # number of range resgroups, at least 1 let n = if krange < 37_500_000_000_000'u: 4 elif krange < 975_000_000_000_000'u: 6 else: 8 let B = uint(Bn * 1024 * n) # set seg size to optimize for selected PG KB = if krange < B: krange else: B # segments resgroups size echo("segment size = ",KB," resgroups; seg array is [1 x ",((KB-1) shr 6) + 1,"] 64-bits") let maxpairs = krange.int * restwins.len; # maximum number of twinprime pcs echo("twinprime candidates = ", maxpairs, "; resgroups = ", krange); (krange, restwins.len) proc sozpg(val: int | int64) = ## Compute the primes r0..sqrt(input_num) and store in global 'primes' array. ## Any algorithm (fast|small) is usable. Here the SoZ for P5 is used. let md = 30 # P5's modulus value let rscnt = 8 # P5's residue count let res =[7,11,13,17,19,23,29,31] # P5's residues list let posn = [0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,3,0,4,0,0,0,5,0,0,0,0,0,6,0,7] let kmax = (val - 7) div md + 1 # number of resgroups to input value var prms = newSeq[uint8](kmax) # byte array of prime candidates init '0' let sqrtN =int(sqrt float64(val)) # integer sqrt of sqrt(input value) var (modk, r, k) = (0, -1, 0) # init residue parameters while true: # for r0..sqrtN primes mark their multiples if (r.inc; r) == rscnt: (r = 0; modk += md; k.inc) if (prms[k] and uint8(1 shl r)) != 0: continue # skip pc if not prime let prm_r = res[r] # if prime save its residue value let prime = modk + prm_r # numerate the prime value if prime > sqrtN: break # we're finished when it's > sqrtN for ri in res: # mark prime's multiples in prms let prod = prm_r * ri - 2 # compute cross-product for prm_r|ri pair let bit_r = uint8(1 shl posn[prod mod md]) # bit mask for prod's residue var kpm = k * (prime + ri) + prod div md # 1st resgroup for prime mult while kpm < kmax: prms[kpm] = prms[kpm] or bit_r; kpm += prime # prms now contains the nonprime positions for the prime candidates r0..N # extract primes into global var 'primes' primes = @[] # create empty dynamic array for primes for k in 0..kmax - 1: # for each resgroup for r, res_r in res: # extract the primes in numerical order if (prms[k] and uint8(1 shl r)) == 0: let prime = md * k + res_r if prime in res_0..val: primes.add(prime) #[ proc printprms(Kn: int, Ki: uint64, indx: int, seg: seq[uint8]) = ## Print twinprimes for given twinpair for given segment slice. ## Primes will not be displayed in sorted order, collect|sort later for that. var modk = Ki * modpg.uint # base value of 1st resgroup in slice let r_hi = restwins[indx].uint # for upper twinpair residue value for k in 0..(Kn - 1) shr 3: # for each byte of resgroups in slice for r in 0..7: # extract the primes for each resgroup if (seg[k] and uint8(1 shl r)) == 0 and (modk + r_hi) <= end_num: echo(modk + r_hi - 1) # print twinprime mid val on a line modk += modpg.uint # set base value for next resgroup ]# proc nextp_init(hi_r: int, kmin: uint): seq[uint64] = ## Initialize 'nextp' array for twinpair upper residue rhi in 'restwins'. ## Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and ## store consecutively as lo_tp|hi_tp pairs for their restracks. var nextp = newSeq[uint64](primes.len * 2) # 1st mults array for twinpair let (r_hi, r_lo) = (hi_r, hi_r - 2) # upper|lower twinpair residues vals for j, prime in primes: # for each prime r0..sqrt(N) let k = (prime - 2) div modpg # find the resgroup it's in let r = (prime - 2) mod modpg + 2 # and its residue value let r_inv = resinvrs[r] # and its residue inverse var ri = (r_lo * r_inv - 2) mod modpg + 2 # compute r's ri for r_lo var ki = uint(k * (prime + ri) + (r * ri - 2) div modpg) # and 1st mult if ki < kmin: # if 1st mult index < start_num's ki = (kmin - ki) mod prime.uint # how many indices short is it if ki > 0'u: ki = prime.uint - ki # adjust index value into range else: ki = ki - kmin # else here, adjust index if it was > nextp[2 * j] = ki # lo_tp index val >= start of range ri = (r_hi * r_inv - 2) mod modpg + 2 # compute r's ri for r_hi ki = uint(k * (prime + ri) + (r * ri - 2) div modpg) # and 1st mult if ki < kmin: # if 1st mult index < start_num's ki = (kmin - ki) mod prime.uint # how many indices short is it if ki > 0'u: ki = prime.uint - ki # adjust index value into range else: ki = ki - kmin # else here, adjust index if it was > nextp[2 * j or 1] = ki # hi_tp index val >= start of range result = nextp proc twins_sieve(indx: int, r_hi, start_num, end_num: uint) {.gcsafe.} = ## Perform in thread the ssoz for given twinpair residues for Kmax resgroups. ## First create|init 'nextp' array of 1st prime mults for given twinpair, ## stored consequtively in 'nextp', and init seg array for KB resgroups. ## For sieve, mark resgroup bits to '1' if either twinpair restrack is nonprime ## for primes mults resgroups, and update 'nextp' restrack slices acccordingly. ## Find last twin prime|sum for range, store at array[indx] for this twinpair. ## Can optionally compile to print mid twinprime values generated by twinpair. {.gcsafe.}: const S = 6 # shift value for 64 bits const BMASK = (1 shl S) - 1 # bitmask val for 64 bits var (Ki, Kmax) = ((start_num-2) div modpg.uint, (end_num-2) div modpg.uint + 1) var (sum, hi_tp, Kn) =(0'u, 0'u, KB.int) # init these parameters var seg = newSeq[uint](((KB-1) shr S) + 1) # seg array for Kb resgroups if Ki * modpg.uint + (r_hi - 2) < start_num: Ki.inc # ensure lo tps in range if (Kmax - 1) * modpg.uint + r_hi > end_num: Kmax.dec # ensure hi tps in range var nextp = nextp_init(r_hi.int, Ki) # 1st prime mults for twinpair restracks while Ki < Kmax: # for Kn resgroup size slices upto Kmax if KB > (Kmax - Ki): Kn = int(Kmax - Ki) # adjust KB size for last seg for j, prime in primes: # for each prime r0..sqrt(N) # for lower twinpair residue track var k = nextp[j * 2].int # starting from this resgroup in seg while k < Kn: # mark primenth resgroup bits prime mults seg[k shr S] = seg[k shr S] or (1 shl (k and BMASK)).uint k += prime # set next prime multiple resgroup nextp[j * 2] = uint(k - Kn) # save 1st resgroup in next eligible seg # for upper twinpair residue track k = nextp[j * 2 or 1].int # starting from this resgroup in seg while k < Kn: # mark primenth resgroup bits prime mults seg[k shr S] = seg[k shr S] or (1 shl (k and BMASK)).uint k += prime # set next prime multiple resgroup nextp[j * 2 or 1] = uint(k - Kn) # save 1st resgroup in next eligible seg # need to set as nonprime unused bits in last # mem of last seg; so fast, do for every seg seg[(Kn-1) shr S] = seg[(Kn-1) shr S] or (not 0 shl (((Kn-1) and BMASK))).uint shl 1 var cnt = 0 # initialize segment twinprimes count # then count the twinprimes in segment for m in seg[0..(Kn-1) shr S]: cnt += (1 shl S) - countSetBits(m) if cnt > 0: # if segment has twin primes sum += cnt.uint # add segment count to total range count var upk = Kn - 1 # from end of seg, count down to largest tp while (seg[upk shr S] and (1 shl (upk and BMASK)).uint) != 0: upk.dec hi_tp = Ki + upk.uint # numerate its full resgroup value #printprms(Kn, Ki, indx, seg) # optional: display twin primes in seg Ki += KB # set 1st resgroup val of next seg slice if Ki < Kmax: (for b in 0..(Kn - 1) shr S: seg[b] = 0) # set seg to all primes # when sieve done for full range # numerate largest twinprime in segs hi_tp = if r_hi > end_num: 0'u else: hi_tp * modpg.uint + r_hi lastwins[indx] = if sum == 0: 1'u else: hi_tp # store final seg tp value cnts[indx] = sum # sum for twin pair proc twinprimes_ssoz() = ## Main routine to setup, time, and display results for twin primes sieve. let x = stdin.readline.split # Inputs are 1 or 2 range values < 2**63 var end_num = max(x[0].parseUInt, 3'u) var start_num = if x.len > 1: max(x[1].parseUInt, 3'u) else: 3'u if start_num > end_num: swap start_num, end_num echo("threads = ", countProcessors()) let ts = epochTime() # start timing sieve setup execution start_num = start_num or 1 # if start_num even add 1 end_num = (end_num - 1) or 1 # if end_num even subtract 1 # select Pn, set sieving params for inputs let (range, pairscnt) = setSieveParameters(start_num, end_num) cnts = newSeq[uint](pairscnt) # array to hold count of tps for each thread lastwins = newSeq[uint](pairscnt)# array to hold largest tp for each thread if end_num < 49'u: primes.add(5) # gen sieve primes else: sozpg(int(sqrt float64(end_num))) # <= sqrt(end_num) echo("each of ", pairscnt, " threads has nextp[", 2, " x ", primes.len, "] array") var twinscnt = 0'u64 # init twinprimes range count let lo_range = uint(restwins[0] - 3) # lo_range = lo_tp - 1 for tp in [3, 5, 11, 17]: # excluded low tp values for PGs used if end_num == 3'u: break # if 3 end of range, no twin primes if tp.uint in start_num..lo_range: twinscnt += 1 # cnt small tps if any let te = epochTime() - ts # sieve setup time echo("setup time = ", te.formatFloat(ffDecimal, 6), " secs") echo("perform twinprimes ssoz sieve") let t1 = epochTime() # start timing ssoz sieve execution parallel: # perform in parallel for indx, r_hi in restwins: # for each twin pair row index spawn twins_sieve(indx, r_hi.uint, start_num, end_num) # sieve selected twin pair restracks stdout.write("\r", (indx + 1), " of ", pairscnt, " twinpairs done") sync() # when all the threads finish var last_twin = 0'u # find largest twin prime in range for i in 0..pairscnt - 1: # and determine total twin primes count twinscnt += cnts[i] if last_twin < lastwins[i]: last_twin = lastwins[i] if end_num == 5'u and twinscnt == 1: last_twin = 5'u var Kn = range mod KB # set number of resgroups in last slice if Kn == 0: Kn = KB # if multiple of seg size set to seg size let t2 = epochTime() - t1 # sieve execution time echo("\nsieve time = ", t2.formatFloat(ffDecimal, 3), " secs") echo("total time = ", (t2 + te).formatFloat(ffDecimal, 3), " secs") echo("last segment = ", Kn, " resgroups; segment slices = ", (range - 1) div KB + 1) echo("total twins = ", twinscnt, "; last twin = ", last_twin - 1, "+/-1") twinprimes_ssoz()