> > // convert lhr_mults vals > n/2 to their lhr complements n-r,
// store them, those < n/2, in lhr_del; it now holds non-pcp lhr vals
auto lhr_del = lhr_mults.map!((r_del) => r_del > ndiv2 ? n - r_del : r_del).array;
lhr_del.sort!("a < b");
lhr = setDifference(lhr, lhr_del).array;
I'm bringing attention to this issue as it might improve D.
It has to do with the fastest way to remove elements in one array|list from another.
It concerns the code in this thread to generate the prime pairs of n.
In the code I have two arrays, lhr
and lhr_del
, and want to remove|delete the elements in lhr_del
from lhr
.
In D it's:
lhr_del.sort!("a < b");
lhr = setDifference(lhr, lhr_del).array;
In the original Rust version it was done as: lhr.retain(|&m| !lhr_del.contains(&m));
This is conceptually keeping the members in lhr
not in lhr_del
.
This is slow in Rust, and was greatly improved doing:
lhr_del.sort_unstable();
lhr.retain(|&m| !lhr_del.binary_search(&m).is_ok());
Then Someone did a Rust equivalent of D
's setDifference
allowing it to be coded as:
lhr_del.sort_unstable();
let lhr: Vec<u32> = SetDiff::new(&lhr, &lhr_del).collect();
This is even faster. Here's the Rust code implementation for the u32
version.
Change the u32
references to usize
for u64
size numbers (which use more memory).
struct SetDiff<'a> {
r1: &'a [u32],
r2: &'a [u32],
}
impl<'a> SetDiff<'a> {
fn adjust_pos(&mut self) {
while !self.r1.is_empty() {
if self.r2.is_empty() || self.r1[0] < self.r2[0] {
break;
} else if self.r2[0] < self.r1 [0] {
self.r2 = &self.r2[1..];
} else {
self.r1 = &self.r1[1..];
self.r2 = &self.r2[1..];
} } }
fn new(r1: &'a [u32], r2: &'a [u32]) -> Self {
let mut s = SetDiff{ r1, r2 };
s.adjust_pos();
s
} }
impl<'a> Iterator for SetDiff<'a> {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
let val = self.r1.get(0).copied();
if val.is_some() {
self.r1 = &self.r1[1..];
}
self.adjust_pos();
return val
} }
Here are u32
time comparisons for the last two Rust versions and D.
------------------------------------------
n | b-srch | setdif | D |
------------_-|--------|--------|--------|
100,000,000 | 3.35 | 2.96 | 7.21 |
--------------|--------|------- |--------|
200,000,000 | 7.77 | 6.19 | 15.86 |
------- ------|--------|--------|--------|
300,000,000 | 6.40 | 5.73 | 10.89 |
--------------|--------|--------|--------|
400,000,000 | 14.44 | 12.80 | 33.13 |
---- ---------|--------|--------|--------|
500,000,000 | 18.44 | 16.32 | 42.58 |
--------------|--------|--------|--------|
600,000,000 | 13.47 | 12.05 | 22.22 |
---- ---------|--------|--------|--------|
700,000,000 | 21.72 | 19.51 | 46.19 |
--------------|--------|--------|--------|
800,000,000 | 30.97 | 27.51 | 71.44 |
---- ---------|--------|--------|--------|
900,000,000 | 22.95 | 18.30 | 35.66 |
--------------|--------|--------|--------|
1,000,000,000 | 38.99 | 34.81 | 88.74 |
The only substantive difference between the versions seems to be how each does its SetDif
.
At least that's the only thing I can think of that causes this much performance difference.
I thought I'd let you know.
Here are the source files, and compiling settings.
/*
D LDC2 1.40
Compile with ldc2: $ ldc2 --release -O3 -mcpu native prime_pairs_lohi.d
Run as: $ ./prime_pairs_lohi_u32 123_456
*/
module prime_pairs;
import std;
import std.datetime.stopwatch : StopWatch;
void prime_pairs_lohi(uint n) { // inputs can be of size u32
if ((n&1) == 1 || n < 4) { return writeln("Input not even n > 2"); }
if (n <= 6) { writeln([n, 1]); writeln([n/2, n/2]); writeln([n/2, n/2]); return; }
// generate the low-half-residues (lhr) r < n/2
auto ndiv2 = n/2; // llr:hhr midpoint
auto rhi = n-2; // max residue limit
uint[] lhr = iota(3, ndiv2, 2).filter!(e => gcd(e, n) == 1).array;
// identify and store all powers and cross-products of the lhr members < n-2
uint[] lhr_del; // lhr multiples, not part of a pcp
foreach(i, r; lhr) { // iterate thru lhr to find prime multiples
auto rmax = rhi / r; // ri can't multiply r with values > this
if (r < rmax) lhr_del ~= (r*r < ndiv2) ? r*r : n - r*r; // for r^2 multiples
if (lhr[i+1] > rmax) break ; // exit if product of consecutive r’s > n-2
foreach(ri; lhr[i+1..$]) { // for each residue in reduced list
if (ri > rmax) break; // exit for r if cross-product with ri > n-2
lhr_del ~= (r*ri < ndiv2) ? r*ri : n - r*ri; // store value if < n-2
} }
// remove from lhr its lhr mulitples, the pcp remain
lhr_del.sort!("a < b");
lhr = setDifference(lhr, lhr_del).array;
writeln([n, lhr.length]); // show n and pcp prime pairs count
writeln([lhr[0], n-lhr[0]]); // show first pcp prime pair of n
writeln([lhr[$-1],n-lhr[$-1]]); // show last pcp prime pair of n
}
void main(string[] args) { // directly recieve input from terminal
string[] inputs = args[1..$]; // can include '_': 123_456
auto nums = inputs.map!(i => i.filter!(n => n != '_'));
auto n = nums.map!(f => f.to!uint())[0];
auto timer = StopWatch(); // create execution timer
timer.start(); // start it
prime_pairs_lohi(n); // run routine
writeln(timer.peek()); // show timer results
}
------------------------------------------------------------------------------------
/*
Rust 1.84.1
Can compile as: $ cargo build --release
or: $ RUSTFLAGS="-C opt-level=3 -C debuginfo=0 -C target-cpu=native" cargo build --release
Run as: $ ./prime_pairs_lohi 123_456
*/
use std::time::Instant;
fn coprime(mut m: u32, mut n: u32) -> bool {
while m|1 != 1 { let t = m; m = n % m; n = t }
m > 0
}
fn prime_pairs_lohi(n : u32) { // for u32 input values
if n&1 == 1 || n < 4 { return println!("Input not even n > 2"); }
if n <= 6 { return println!("[{}, {}]\n[{}, {}]\n[{}, {}]",n,1,n/2,n/2,n/2,n/2); };
// generate the low-half-residues (lhr) r < n/2
let (ndiv2, rhi) = (n/2, n-2); // lhr:hhr midpoint, max residue limit
let mut lhr: Vec<u32> = (3..ndiv2).step_by(2).filter(|&r| coprime(r, n)).collect();
// identify and store all powers and cross-products of the lhr members < n-2
let mut lhr_del = Vec::with_capacity(lhr.len() as usize);
for i in 1..lhr.len()-1 { // iterate thru lhr to find prime multiples
let (mut j, r) = (i, lhr[i-1]); // for current residue
let rmax = rhi / r; // ri can't multiply r with values > this
if r < rmax { lhr_del.push(if r*r < ndiv2 {r*r} else {n - r*r} ); }
if lhr[i] > rmax { break } // exit if product of consecutive r’s > n-2
while lhr[j] <= rmax { // stop for r if product with ri > n-2
lhr_del.push(if r*lhr[j] < ndiv2 {r*lhr[j]} else {n - r*lhr[j]});
j += 1; // get next lhr value
} }
lhr_del.sort_unstable(); // remove from lhr its lhr mults
lhr.retain(|&m| !lhr_del.binary_search(&m).is_ok());
let lcnt = lhr.len(); // number of pcp prime pairs
println!("[{}, {}]", n, lcnt); // show n and pcp prime pairs count
println!("[{}, {}]", lhr[0],n-lhr[0]); // show first pcp prime pair of n
println!("[{}, {}]", lhr[lcnt-1], n-lhr[lcnt-1]); // show last pcp prime pair of n
}
fn main() {
let n: u32 = std::env::args()
.nth(1).expect("missing count argument")
.replace('_', "").parse().expect("one input");
let start = Instant::now();
prime_pairs_lohi(n);
println!("{:?}", start.elapsed());
}
---------------------------------------------------------------------------------------------
/*
Rust 1.84.1
Can compile as: $ cargo build --release
or: $ RUSTFLAGS="-C opt-level=3 -C debuginfo=0 -C target-cpu=native" cargo build --release
Run as: $ ./prime_pairs_lohi 123_456
*/
use std::time::Instant;
struct SetDiff<'a> {
r1: &'a [u32],
r2: &'a [u32],
}
impl<'a> SetDiff<'a> {
fn adjust_pos(&mut self) {
while !self.r1.is_empty() {
if self.r2.is_empty() || self.r1[0] < self.r2[0] {
break;
} else if self.r2[0] < self.r1 [0] {
self.r2 = &self.r2[1..];
} else {
self.r1 = &self.r1[1..];
self.r2 = &self.r2[1..];
} } }
fn new(r1: &'a [u32], r2: &'a [u32]) -> Self {
let mut s = SetDiff{ r1, r2 };
s.adjust_pos();
s
} }
impl<'a> Iterator for SetDiff<'a> {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
let val = self.r1.get(0).copied();
if val.is_some() {
self.r1 = &self.r1[1..];
}
self.adjust_pos();
return val
} }
fn coprime(mut m: u32, mut n: u32) -> bool {
while m|1 != 1 { let t = m; m = n % m; n = t }
m > 0
}
fn prime_pairs_lohi(n : u32) { // for u32 input values
if n&1 == 1 || n < 4 { return println!("Input not even n > 2"); }
if n <= 6 { return println!("[{}, {}]\n[{}, {}]\n[{}, {}]",n,1,n/2,n/2,n/2,n/2); };
// generate the low-half-residues (lhr) r < n/2
let (ndiv2, rhi) = (n/2, n-2); // lhr:hhr midpoint, max residue limit
let mut lhr: Vec<u32> = (3..ndiv2).step_by(2).filter(|&r| coprime(r, n)).collect();
// identify and store all powers and cross-products of the lhr members < n-2
let mut lhr_del = Vec::with_capacity(lhr.len() as usize);
for i in 1..lhr.len()-1 { // iterate thru lhr to find prime multiples
let (mut j, r) = (i, lhr[i-1]); // for current residue
let rmax = rhi / r; // ri can't multiply r with values > this
if r < rmax { lhr_del.push(if r*r < ndiv2 {r*r} else {n - r*r} ); }
if lhr[i] > rmax { break } // exit if product of consecutive r’s > n-2
while lhr[j] <= rmax { // stop for r if product with ri > n-2
lhr_del.push(if r*lhr[j] < ndiv2 {r*lhr[j]} else {n - r*lhr[j]});
j += 1; // get next lhr value
} }
lhr_del.sort_unstable(); // remove from lhr its lhr mults
let lhr: Vec<u32> = SetDiff::new(&lhr, &lhr_del).collect();
let lcnt = lhr.len(); // number of pcp prime pairs
println!("[{}, {}]", n, lcnt); // show n and pcp prime pairs count
println!("[{}, {}]", lhr[0],n-lhr[0]); // show first pcp prime pair of n
println!("[{}, {}]", lhr[lcnt-1], n-lhr[lcnt-1]); // show last pcp prime pair of n
}
fn main() {
let n: u32 = std::env::args()
.nth(1).expect("missing count argument")
.replace('_', "").parse().expect("one input");
let start = Instant::now();
prime_pairs_lohi(n);
println!("{:?}", start.elapsed());
}