From e76e83cb104f6bf09247879cdf424d1097279710 Mon Sep 17 00:00:00 2001
From: Guillaume Pinot <texitoi@texitoi.eu>
Date: Wed, 4 Dec 2013 09:03:55 +0100
Subject: [PATCH] rewrite of shootout-reverse-complement.rs

This version is inspired by the best version in C by Mr Ledrug,
but without the parallelisation.
---
 src/test/bench/shootout-reverse-complement.rs | 212 ++++++------------
 1 file changed, 70 insertions(+), 142 deletions(-)

diff --git a/src/test/bench/shootout-reverse-complement.rs b/src/test/bench/shootout-reverse-complement.rs
index d0e13865347d8..e9d0d60ac0bd8 100644
--- a/src/test/bench/shootout-reverse-complement.rs
+++ b/src/test/bench/shootout-reverse-complement.rs
@@ -1,151 +1,79 @@
-// xfail-pretty
-// xfail-test
-
-use std::cast::transmute;
-use std::libc::{STDOUT_FILENO, c_int, fdopen, fgets, fopen, fputc, fwrite};
-use std::libc::{size_t};
-use std::ptr::null;
-
-static LINE_LEN: u32 = 80;
-
-static COMPLEMENTS: [u8, ..256] = [
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-    0,
-    'T' as u8,
-    'V' as u8,
-    'G' as u8,
-    'H' as u8,
-    0,
-    0,
-    'C' as u8,
-    'D' as u8,
-    0,
-    0,
-    'M' as u8,
-    0,
-    'K' as u8,
-    'N' as u8,
-    0,
-    0,
-    0,
-    'Y' as u8,
-    'S' as u8,
-    'A' as u8,
-    'A' as u8,
-    'B' as u8,
-    'W' as u8,
-    0,
-    'R' as u8,
-    0,
-    0,
-    0,
-    0,
-    0,
-    0,
-
-    0,
-    'T' as u8,
-    'V' as u8,
-    'G' as u8,
-    'H' as u8,
-    0,
-    0,
-    'C' as u8,
-    'D' as u8,
-    0,
-    0,
-    'M' as u8,
-    0,
-    'K' as u8,
-    'N' as u8,
-    0,
-    0,
-    0,
-    'Y' as u8,
-    'S' as u8,
-    'A' as u8,
-    'A' as u8,
-    'B' as u8,
-    'W' as u8,
-    0,
-    'R' as u8,
-    0,
-    0,
-    0,
-    0,
-    0,
-    0,
-
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-];
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::iter::range_step;
+use std::io::{stdin, stdout, File};
+
+static LINE_LEN: uint = 60;
+
+fn make_complements() -> [u8, ..256] {
+    let transforms = [
+        ('A', 'T'), ('C', 'G'), ('G', 'C'), ('T', 'A'),
+        ('U', 'A'), ('M', 'K'), ('R', 'Y'), ('W', 'W'),
+        ('S', 'S'), ('Y', 'R'), ('K', 'M'), ('V', 'B'),
+        ('H', 'D'), ('D', 'H'), ('B', 'V'), ('N', 'N'),
+        ('\n', '\n')];
+    let mut complements: [u8, ..256] = [0, ..256];
+    for (i, c) in complements.mut_iter().enumerate() {
+        *c = i as u8;
+    }
+    let lower = 'A' as u8 - 'a' as u8;
+    for &(from, to) in transforms.iter() {
+        complements[from as u8] = to as u8;
+        complements[from as u8 - lower] = to as u8;
+    }
+    complements
+}
 
 fn main() {
-    unsafe {
-        let mode = "r";
-        //let stdin = fdopen(STDIN_FILENO as c_int, transmute(&mode[0]));
-        let path = "reversecomplement-input.txt";
-        let stdin = fopen(transmute(&path[0]), transmute(&mode[0]));
-        let mode = "w";
-        let stdout = fdopen(STDOUT_FILENO as c_int, transmute(&mode[0]));
-
-        let mut out: ~[u8] = ~[];
-        out.reserve(12777888);
-        let mut pos = 0;
-
-        loop {
-            let needed = pos + (LINE_LEN as uint) + 1;
-            if out.capacity() < needed {
-                out.reserve_at_least(needed);
-            }
-
-            let mut ptr = out.unsafe_mut_ref(pos);
-            if fgets(transmute(ptr), LINE_LEN as c_int, stdin) == null() {
-                break;
-            }
-
-            // Don't change lines that begin with '>' or ';'.
-            let first = *ptr;
-            if first == ('>' as u8) {
-                while *ptr != 0 {
-                    ptr = ptr.offset(1);
-                }
-                *ptr = '\n' as u8;
-
-                pos = (ptr as uint) - (out.unsafe_ref(0) as uint);
-                fwrite(transmute(out.unsafe_ref(0)),
-                       1,
-                       pos as size_t,
-                       stdout);
-
-                pos = 0;
-                continue;
+    let complements = make_complements();
+    let mut data = if std::os::getenv("RUST_BENCH").is_some() {
+        File::open(&Path::init("shootout-k-nucleotide.data")).read_to_end()
+    } else {
+        stdin().read_to_end()
+    };
+
+    for seq in data.mut_split(|c| *c == '>' as u8) {
+        // skip header and last \n
+        let begin = match seq.iter().position(|c| *c == '\n' as u8) {
+            None => continue,
+            Some(c) => c
+        };
+        let len = seq.len();
+        let seq = seq.mut_slice(begin + 1, len - 1);
+
+        // arrange line breaks
+        let len = seq.len();
+        let off = LINE_LEN - len % (LINE_LEN + 1);
+        for i in range_step(LINE_LEN, len, LINE_LEN + 1) {
+            for j in std::iter::count(i, -1).take(off) {
+                seq[j] = seq[j - 1];
             }
+            seq[i - off] = '\n' as u8;
+        }
 
-            // Complement other lines.
-            loop {
-                let ch = *ptr;
-                if ch == 0 {
-                    break;
+        // reverse complement, as
+        //    seq.reverse(); for c in seq.mut_iter() {*c = complements[*c]}
+        // but faster:
+        let mut it = seq.mut_iter();
+        loop {
+            match (it.next(), it.next_back()) {
+                (Some(front), Some(back)) => {
+                    let tmp = complements[*front];
+                    *front = complements[*back];
+                    *back = tmp;
                 }
-                *ptr = COMPLEMENTS.unsafe_get(ch as uint);
-                ptr = ptr.offset(1);
+                _ => break // vector exhausted.
             }
-            *ptr = '\n' as u8;
-
-            pos = (ptr as uint) - (out.unsafe_ref(0) as uint);
         }
-
-        fwrite(transmute(out.unsafe_ref(0)), 1, pos as size_t, stdout);
     }
+
+    stdout().write(data);
 }