Skip to content

Commit bd5305f

Browse files
committed
auto merge of #10799 : TeXitoi/rust/shootout-reverse-complement-resurected, r=alexcrichton
This version is inspired by the best version in C by Mr Ledrug, but without the parallelisation.
2 parents 64bcfd2 + e76e83c commit bd5305f

File tree

1 file changed

+70
-142
lines changed

1 file changed

+70
-142
lines changed
+70-142
Original file line numberDiff line numberDiff line change
@@ -1,151 +1,79 @@
1-
// xfail-pretty
2-
// xfail-test
3-
4-
use std::cast::transmute;
5-
use std::libc::{STDOUT_FILENO, c_int, fdopen, fgets, fopen, fputc, fwrite};
6-
use std::libc::{size_t};
7-
use std::ptr::null;
8-
9-
static LINE_LEN: u32 = 80;
10-
11-
static COMPLEMENTS: [u8, ..256] = [
12-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16-
17-
0,
18-
'T' as u8,
19-
'V' as u8,
20-
'G' as u8,
21-
'H' as u8,
22-
0,
23-
0,
24-
'C' as u8,
25-
'D' as u8,
26-
0,
27-
0,
28-
'M' as u8,
29-
0,
30-
'K' as u8,
31-
'N' as u8,
32-
0,
33-
0,
34-
0,
35-
'Y' as u8,
36-
'S' as u8,
37-
'A' as u8,
38-
'A' as u8,
39-
'B' as u8,
40-
'W' as u8,
41-
0,
42-
'R' as u8,
43-
0,
44-
0,
45-
0,
46-
0,
47-
0,
48-
0,
49-
50-
0,
51-
'T' as u8,
52-
'V' as u8,
53-
'G' as u8,
54-
'H' as u8,
55-
0,
56-
0,
57-
'C' as u8,
58-
'D' as u8,
59-
0,
60-
0,
61-
'M' as u8,
62-
0,
63-
'K' as u8,
64-
'N' as u8,
65-
0,
66-
0,
67-
0,
68-
'Y' as u8,
69-
'S' as u8,
70-
'A' as u8,
71-
'A' as u8,
72-
'B' as u8,
73-
'W' as u8,
74-
0,
75-
'R' as u8,
76-
0,
77-
0,
78-
0,
79-
0,
80-
0,
81-
0,
82-
83-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
85-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
91-
];
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
use std::iter::range_step;
12+
use std::io::{stdin, stdout, File};
13+
14+
static LINE_LEN: uint = 60;
15+
16+
fn make_complements() -> [u8, ..256] {
17+
let transforms = [
18+
('A', 'T'), ('C', 'G'), ('G', 'C'), ('T', 'A'),
19+
('U', 'A'), ('M', 'K'), ('R', 'Y'), ('W', 'W'),
20+
('S', 'S'), ('Y', 'R'), ('K', 'M'), ('V', 'B'),
21+
('H', 'D'), ('D', 'H'), ('B', 'V'), ('N', 'N'),
22+
('\n', '\n')];
23+
let mut complements: [u8, ..256] = [0, ..256];
24+
for (i, c) in complements.mut_iter().enumerate() {
25+
*c = i as u8;
26+
}
27+
let lower = 'A' as u8 - 'a' as u8;
28+
for &(from, to) in transforms.iter() {
29+
complements[from as u8] = to as u8;
30+
complements[from as u8 - lower] = to as u8;
31+
}
32+
complements
33+
}
9234

9335
fn main() {
94-
unsafe {
95-
let mode = "r";
96-
//let stdin = fdopen(STDIN_FILENO as c_int, transmute(&mode[0]));
97-
let path = "reversecomplement-input.txt";
98-
let stdin = fopen(transmute(&path[0]), transmute(&mode[0]));
99-
let mode = "w";
100-
let stdout = fdopen(STDOUT_FILENO as c_int, transmute(&mode[0]));
101-
102-
let mut out: ~[u8] = ~[];
103-
out.reserve(12777888);
104-
let mut pos = 0;
105-
106-
loop {
107-
let needed = pos + (LINE_LEN as uint) + 1;
108-
if out.capacity() < needed {
109-
out.reserve_at_least(needed);
110-
}
111-
112-
let mut ptr = out.unsafe_mut_ref(pos);
113-
if fgets(transmute(ptr), LINE_LEN as c_int, stdin) == null() {
114-
break;
115-
}
116-
117-
// Don't change lines that begin with '>' or ';'.
118-
let first = *ptr;
119-
if first == ('>' as u8) {
120-
while *ptr != 0 {
121-
ptr = ptr.offset(1);
122-
}
123-
*ptr = '\n' as u8;
124-
125-
pos = (ptr as uint) - (out.unsafe_ref(0) as uint);
126-
fwrite(transmute(out.unsafe_ref(0)),
127-
1,
128-
pos as size_t,
129-
stdout);
130-
131-
pos = 0;
132-
continue;
36+
let complements = make_complements();
37+
let mut data = if std::os::getenv("RUST_BENCH").is_some() {
38+
File::open(&Path::init("shootout-k-nucleotide.data")).read_to_end()
39+
} else {
40+
stdin().read_to_end()
41+
};
42+
43+
for seq in data.mut_split(|c| *c == '>' as u8) {
44+
// skip header and last \n
45+
let begin = match seq.iter().position(|c| *c == '\n' as u8) {
46+
None => continue,
47+
Some(c) => c
48+
};
49+
let len = seq.len();
50+
let seq = seq.mut_slice(begin + 1, len - 1);
51+
52+
// arrange line breaks
53+
let len = seq.len();
54+
let off = LINE_LEN - len % (LINE_LEN + 1);
55+
for i in range_step(LINE_LEN, len, LINE_LEN + 1) {
56+
for j in std::iter::count(i, -1).take(off) {
57+
seq[j] = seq[j - 1];
13358
}
59+
seq[i - off] = '\n' as u8;
60+
}
13461

135-
// Complement other lines.
136-
loop {
137-
let ch = *ptr;
138-
if ch == 0 {
139-
break;
62+
// reverse complement, as
63+
// seq.reverse(); for c in seq.mut_iter() {*c = complements[*c]}
64+
// but faster:
65+
let mut it = seq.mut_iter();
66+
loop {
67+
match (it.next(), it.next_back()) {
68+
(Some(front), Some(back)) => {
69+
let tmp = complements[*front];
70+
*front = complements[*back];
71+
*back = tmp;
14072
}
141-
*ptr = COMPLEMENTS.unsafe_get(ch as uint);
142-
ptr = ptr.offset(1);
73+
_ => break // vector exhausted.
14374
}
144-
*ptr = '\n' as u8;
145-
146-
pos = (ptr as uint) - (out.unsafe_ref(0) as uint);
14775
}
148-
149-
fwrite(transmute(out.unsafe_ref(0)), 1, pos as size_t, stdout);
15076
}
77+
78+
stdout().write(data);
15179
}

0 commit comments

Comments
 (0)