// The Computer Language Benchmarks Game // http://benchmarksgame.alioth.debian.org/ // // contributed by the Rust Project Developers // Copyright (c) 2013-2014 The Rust Project Developers // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // // - Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in // the documentation and/or other materials provided with the // distribution. // // - Neither the name of "The Computer Language Benchmarks Game" nor // the name of "The Computer Language Shootout Benchmarks" nor the // names of its contributors may be used to endorse or promote // products derived from this software without specific prior // written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED // OF THE POSSIBILITY OF SUCH DAMAGE. use std::cmp::min; use std::io::{self, Write}; use std::sync::{Arc, Mutex}; use std::thread; const LINE_LEN: usize = 60; const BLOCK_LINES: usize = 512; const BLOCK_THOROUGHPUT: usize = LINE_LEN * BLOCK_LINES; const BLOCK_LEN: usize = BLOCK_THOROUGHPUT + BLOCK_LINES; const STDIN_BUF: usize = (LINE_LEN + 1) * 1024; const LOOKUP_SIZE: usize = 4 * 1024; const LOOKUP_SCALE: f32 = (LOOKUP_SIZE - 1) as f32; const ALU: &'static [u8] = b"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\ GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\ CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\ ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\ GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\ AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\ AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; const IUB: &'static [(u8, f32)] = &[(b'a', 0.27), (b'c', 0.12), (b'g', 0.12), (b't', 0.27), (b'B', 0.02), (b'D', 0.02), (b'H', 0.02), (b'K', 0.02), (b'M', 0.02), (b'N', 0.02), (b'R', 0.02), (b'S', 0.02), (b'V', 0.02), (b'W', 0.02), (b'Y', 0.02)]; const HOMOSAPIENS: &'static [(u8, f32)] = &[(b'a', 0.3029549426680), (b'c', 0.1979883004921), (b'g', 0.1975473066391), (b't', 0.3015094502008)]; // We need a specific Rng, // so implement this manually const MODULUS: u32 = 139968; const MULTIPLIER: u32 = 3877; const ADDITIVE: u32 = 29573; // Why doesn't rust already have this? // Algorithm directly taken from Wikipedia fn powmod(mut base: u64, mut exponent: u32, modulus: u64) -> u64 { let mut ret = 1; base %= modulus; while exponent > 0 { if exponent & 1 == 1 { ret *= base; ret %= modulus; } exponent >>= 1; base *= base; base %= modulus; } ret } // Just a typical LCRNG pub struct Rng { last: u32 } impl Rng { pub fn new() -> Rng { Rng { last: 42 } } pub fn max_value() -> u32 { MODULUS - 1 } pub fn normalize(p: f32) -> u32 { (p * MODULUS as f32).floor() as u32 } pub fn gen(&mut self) -> u32 { self.last = (self.last * MULTIPLIER + ADDITIVE) % MODULUS; self.last } // This allows us to fast-forward the RNG, // allowing us to run it in parallel. pub fn future(&self, n: u32) -> Rng { let a = MULTIPLIER as u64; let b = ADDITIVE as u64; let m = MODULUS as u64; // (a^n - 1) mod (a-1) m // x_k = ((a^n x_0 mod m) + --------------------- b) mod m // a - 1 // // Since (a - 1) divides (a^n - 1) mod (a-1) m, // the subtraction does not overflow and thus can be non-modular. // let new_seed = (powmod(a, n, m) * self.last as u64) % m + (powmod(a, n, (a-1) * m) - 1) / (a-1) * b; Rng { last: (new_seed % m) as u32 } } } // This will end up keeping track of threads, like // in the other multithreaded Rust version, in // order to keep writes in order. // // This is stolen from another multithreaded Rust // implementation, although that implementation // was not able to parallelize the RNG itself. struct BlockSubmitter { writer: W, pub waiting_on: usize, } impl BlockSubmitter { fn submit(&mut self, data: &[u8], block_num: usize) -> Option> { if block_num == self.waiting_on { self.waiting_on += 1; Some(self.submit_async(data)) } else { None } } fn submit_async(&mut self, data: &[u8]) -> io::Result<()> { self.writer.write_all(data) } } // For repeating strings as output fn fasta_static( writer: &mut W, header: &[u8], data: &[u8], mut n: usize ) -> io::Result<()> { // The aim here is to print a short(ish) string cyclically // with line breaks as appropriate. // // The secret technique is to repeat the string such that // any wanted line is a single offset in the string. // // This technique is stolen from the Haskell version. try!(writer.write_all(header)); // Maximum offset is data.len(), // Maximum read len is LINE_LEN let stream = data.iter().cloned().cycle(); let mut extended: Vec = stream.take(data.len() + LINE_LEN + 1).collect(); let mut offset = 0; while n > 0 { let write_len = min(LINE_LEN, n); let end = offset + write_len; n -= write_len; let tmp = extended[end]; extended[end] = b'\n'; try!(writer.write_all(&extended[offset..end + 1])); extended[end] = tmp; offset = end; offset %= data.len(); } Ok(()) } // For RNG streams as output fn fasta( submitter: &Arc>>, header: &[u8], table: &'static [(u8, f32)], rng: &mut Rng, n: usize ) -> io::Result<()> { // Here the lookup table is part of the algorithm and needs the // original probabilities (scaled with the LOOKUP_SCALE), because // Isaac says so :-) fn sum_and_scale(a: &'static [(u8, f32)]) -> Vec<(u8, f32)> { let mut p = 0f32; let mut result: Vec<(u8, f32)> = a.iter().map(|e| { p += e.1; (e.0, p * LOOKUP_SCALE) }).collect(); let result_len = result.len(); result[result_len - 1].1 = LOOKUP_SCALE; result } fn make_lookup(a: &[(u8, f32)]) -> [(u8, f32); LOOKUP_SIZE] { let mut lookup = [(0, 0f32); LOOKUP_SIZE]; let mut j = 0; for (i, slot) in lookup.iter_mut().enumerate() { while a[j].1 < (i as f32) { j += 1; } *slot = a[j]; } lookup } { try!(submitter.lock().unwrap().submit_async(header)); } let lookup_table = Arc::new(make_lookup(&sum_and_scale(table))); let thread_count = 4; let mut threads = Vec::new(); for block_num in (0..thread_count) { let offset = BLOCK_THOROUGHPUT * block_num; let local_submitter = submitter.clone(); let local_lookup_table = lookup_table.clone(); let local_rng = rng.future(offset as u32); threads.push(thread::spawn(move || { gen_block( local_submitter, local_lookup_table, local_rng, n.saturating_sub(offset), block_num, thread_count ) })); } for thread in threads { try!(thread.join().unwrap()); } *rng = rng.future(n as u32); Ok(()) } // A very optimized writer. // I have a feeling a simpler version wouldn't slow // things down too much, though, since the RNG // is the really heavy hitter. fn gen_block( submitter: Arc>>, lookup_table: Arc<[(u8, f32)]>, mut rng: Rng, mut length: usize, mut block_num: usize, block_stride: usize, ) -> io::Result<()> { // Include newlines in block length += length / LINE_LEN; let block: &mut [u8] = &mut [b'\n'; BLOCK_LEN]; while length > 0 { { let gen_into = &mut block[..min(length, BLOCK_LEN)]; // Write random numbers, skipping newlines for (i, byte) in gen_into.iter_mut().enumerate() { if (i + 1) % (LINE_LEN + 1) != 0 { let p = rng.gen() as f32 * (LOOKUP_SCALE / MODULUS as f32); *byte = lookup_table[p as usize..LOOKUP_SIZE].iter().find( |le| le.1 >= p).unwrap().0; } } } let write_out = { if length >= BLOCK_LEN { &mut *block } else if length % (LINE_LEN + 1) == 0 { &mut block[..length] } else { &mut block[..length + 1] } }; *write_out.last_mut().unwrap() = b'\n'; loop { // Make sure to release lock before calling `yield_now` let res = { submitter.lock().unwrap().submit(write_out, block_num) }; match res { Some(result) => { try!(result); break; } None => std::thread::yield_now() } } block_num += block_stride; rng = rng.future((BLOCK_THOROUGHPUT * (block_stride - 1)) as u32); length = length.saturating_sub(BLOCK_LEN * (block_stride - 1)); length = length.saturating_sub(BLOCK_LEN); } Ok(()) } fn run(writer: W) -> io::Result<()> { let n = std::env::args_os().nth(1) .and_then(|s| s.into_string().ok()) .and_then(|n| n.parse().ok()) .unwrap_or(1000); let rng = &mut Rng::new(); // Use automatic buffering for the static version... let mut writer = io::BufWriter::with_capacity(STDIN_BUF, writer); try!(fasta_static(&mut writer, b">ONE Homo sapiens alu\n", ALU, n * 2)); // ...but the dynamic version does its own buffering already let writer = try!(writer.into_inner()); let submitter = Arc::new(Mutex::new(BlockSubmitter { writer: writer, waiting_on: 0 })); { submitter.lock().unwrap().waiting_on = 0; } try!(fasta(&submitter, b">TWO IUB ambiguity codes\n", &IUB, rng, n * 3)); { submitter.lock().unwrap().waiting_on = 0; } try!(fasta(&submitter, b">THREE Homo sapiens frequency\n", &HOMOSAPIENS, rng, n * 5)); Ok(()) } fn main() { run(io::stdout()).unwrap() }