Skip to content

Commit 67692c1

Browse files
committed
lib: Reimplement CacheLeaf mechanics using read_to_string()
This is because BufReader/BufRead doesn't give us the interface we actually need, because those iterators emit a stream of Some(Err()) repeatedly when given a directory as the underlying fh. Subsequently, the iterator never terminates, and collect() then becomes an efficient implementation of a memory exhauster. Bug: rust-lang/rust#64144
1 parent 90c1fc0 commit 67692c1

File tree

1 file changed

+17
-29
lines changed

1 file changed

+17
-29
lines changed

src/lib.rs

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,7 @@ impl Default for CacheLeaf {
5555
}
5656
}
5757

58-
use std::{
59-
fs::File,
60-
io::{BufRead, BufReader},
61-
path::PathBuf,
62-
};
58+
use std::{fs::File, io::Read, path::PathBuf};
6359

6460
impl CacheLeaf {
6561
/// Construct a [CacheLeaf] by reading a specified input file
@@ -71,7 +67,7 @@ impl CacheLeaf {
7167
/// ```
7268
pub fn read_file(f: PathBuf) -> Result<Self, ErrorKind> {
7369
let mut me: Self = Default::default();
74-
let my_file = File::open(&f)?;
70+
let mut my_file = File::open(&f)?;
7571

7672
// This is a clusterfuck really, the internal .modified takes a lot of
7773
// mangling to get the internal unix-time value out of the metadata,
@@ -87,18 +83,17 @@ impl CacheLeaf {
8783
0,
8884
);
8985

90-
// Note the default of 8k for BufReader is excessive for us, as it
91-
// accounts for 8/9ths of the overall heap size, which is
92-
// silly when you consider the file we're reading is typically
93-
// under 200 *bytes*, and all lines are under *21* bytes each,
94-
// and the whole point of using BufReader is to get the lines()
95-
// abstraction.
86+
// We would use a BufReader here, but that cocks up amazingly when
87+
// some idiot passes a directory as the PathBuf, and BufReader
88+
// repeatedly invokes File::read() which repeatedly returns an
89+
// Err(), and as an Err() is a Some(Err()) not a None(),
90+
// doesn't end the iteration, and so on the next iteration ... it
91+
// calls read() again, gets the same result, and subsequently
92+
// iterates forever doing nothing.
9693
//
97-
// With an 8k buffer we're twice as bad as the native ccache
98-
// implementation for heap usage, with a 100byte buffer, we're
99-
// 1/5th of the native ccache's heap use :)
100-
let buf = BufReader::with_capacity(100, my_file);
101-
94+
// We have a <1k file, who cares!?
95+
let mut buf = String::new();
96+
my_file.read_to_string(&mut buf)?;
10297
// We collect all lines verbatim, and then use the FIELD_DATA_ORDER
10398
// array to pick values out of it. That way if there are lines in the
10499
// input source that we haven't coded behaviour for yet, it won't
@@ -107,25 +102,18 @@ impl CacheLeaf {
107102
// The input source having fewer items than FIELD_DATA_ORDER is gated
108103
// by the field_addr <= last_line control, so too-few lines will
109104
// result in just a bunch of 0 entries in the dataset.
110-
let lines: Vec<std::io::Result<String>> = buf.lines().collect();
105+
let lines: Vec<&str> = buf.lines().collect();
111106
let last_line = lines.len() - 1;
112107

113108
for field in FIELD_DATA_ORDER {
114109
let field_addr: usize = field.as_usize();
115110
if field_addr <= last_line {
116-
if let Ok(line) = &lines[field_addr] {
117-
if let Ok(v) = line.parse::<u64>() {
118-
me.fields.set_field(*field, v);
119-
} else {
120-
unimplemented!(
121-
"Line {} in {:?} did not parse as u64",
122-
field_addr,
123-
f
124-
);
125-
}
111+
let line = &lines[field_addr];
112+
if let Ok(v) = line.parse::<u64>() {
113+
me.fields.set_field(*field, v);
126114
} else {
127115
unimplemented!(
128-
"Line {} in {:?} did not read correctly",
116+
"Line {} in {:?} did not parse as u64",
129117
field_addr,
130118
f
131119
);

0 commit comments

Comments
 (0)