Skip to content

Commit 45dc58f

Browse files
Merge pull request #373 from Mark-Simulacrum/s3
Utilize S3 as the backing file store
2 parents 9f9fa7c + 46e5331 commit 45dc58f

File tree

11 files changed

+372
-112
lines changed

11 files changed

+372
-112
lines changed

Cargo.lock

+238-25
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ html5ever = "0.22"
3232
cargo = { git = "https://github.com/rust-lang/cargo.git" }
3333
schemamama = "0.3"
3434
schemamama_postgres = "0.2"
35+
rusoto_s3 = "0.40"
36+
rusoto_core = "0.40"
37+
rusoto_credential = "0.40"
3538

3639

3740
# iron dependencies

src/db/file.rs

+116-46
Original file line numberDiff line numberDiff line change
@@ -5,62 +5,111 @@
55
//! filesystem. This module is adding files into database and retrieving them.
66
77

8-
use std::path::Path;
8+
use std::path::{PathBuf, Path};
99
use postgres::Connection;
1010
use rustc_serialize::json::{Json, ToJson};
11-
use std::fs::File;
11+
use std::fs;
1212
use std::io::Read;
1313
use error::Result;
1414
use failure::err_msg;
15-
16-
17-
fn file_path(prefix: &str, name: &str) -> String {
18-
match prefix.is_empty() {
19-
true => name.to_owned(),
20-
false => format!("{}/{}", prefix, name),
21-
}
22-
}
15+
use rusoto_s3::{S3, PutObjectRequest, GetObjectRequest, S3Client};
16+
use rusoto_core::region::Region;
17+
use rusoto_credential::EnvironmentProvider;
2318

2419

2520
fn get_file_list_from_dir<P: AsRef<Path>>(path: P,
26-
prefix: &str,
27-
files: &mut Vec<String>)
21+
files: &mut Vec<PathBuf>)
2822
-> Result<()> {
2923
let path = path.as_ref();
3024

3125
for file in try!(path.read_dir()) {
3226
let file = try!(file);
3327

3428
if try!(file.file_type()).is_file() {
35-
file.file_name().to_str().map(|name| files.push(file_path(prefix, name)));
29+
files.push(file.path());
3630
} else if try!(file.file_type()).is_dir() {
37-
file.file_name()
38-
.to_str()
39-
.map(|name| get_file_list_from_dir(file.path(), &file_path(prefix, name), files));
31+
try!(get_file_list_from_dir(file.path(), files));
4032
}
4133
}
4234

4335
Ok(())
4436
}
4537

4638

47-
pub fn get_file_list<P: AsRef<Path>>(path: P) -> Result<Vec<String>> {
39+
pub fn get_file_list<P: AsRef<Path>>(path: P) -> Result<Vec<PathBuf>> {
4840
let path = path.as_ref();
49-
let mut files: Vec<String> = Vec::new();
41+
let mut files = Vec::new();
5042

5143
if !path.exists() {
5244
return Err(err_msg("File not found"));
5345
} else if path.is_file() {
54-
path.file_name()
55-
.and_then(|name| name.to_str())
56-
.map(|name| files.push(format!("{}", name)));
46+
files.push(PathBuf::from(path.file_name().unwrap()));
5747
} else if path.is_dir() {
58-
try!(get_file_list_from_dir(path, "", &mut files));
48+
try!(get_file_list_from_dir(path, &mut files));
49+
for file_path in &mut files {
50+
// We want the paths in this list to not be {path}/bar.txt but just bar.txt
51+
*file_path = PathBuf::from(file_path.strip_prefix(path).unwrap());
52+
}
5953
}
6054

6155
Ok(files)
6256
}
6357

58+
pub struct Blob {
59+
pub path: String,
60+
pub mime: String,
61+
pub date_updated: time::Timespec,
62+
pub content: Vec<u8>,
63+
}
64+
65+
pub fn get_path(conn: &Connection, path: &str) -> Option<Blob> {
66+
let rows = conn.query("SELECT path, mime, date_updated, content
67+
FROM files
68+
WHERE path = $1", &[&path]).unwrap();
69+
70+
if rows.len() == 0 {
71+
None
72+
} else {
73+
let row = rows.get(0);
74+
let mut content = row.get(3);
75+
if content == b"in-s3" {
76+
let client = s3_client();
77+
content = client.and_then(|c| c.get_object(GetObjectRequest {
78+
bucket: "rust-docs-rs".into(),
79+
key: path.into(),
80+
..Default::default()
81+
}).sync().ok()).and_then(|r| r.body).map(|b| {
82+
let mut b = b.into_blocking_read();
83+
let mut content = Vec::new();
84+
b.read_to_end(&mut content).unwrap();
85+
content
86+
}).unwrap();
87+
};
88+
89+
Some(Blob {
90+
path: row.get(0),
91+
mime: row.get(1),
92+
date_updated: row.get(2),
93+
content,
94+
})
95+
}
96+
}
97+
98+
fn s3_client() -> Option<S3Client> {
99+
// If AWS keys aren't configured, then presume we should use the DB exclusively
100+
// for file storage.
101+
if std::env::var_os("AWS_ACCESS_KEY_ID").is_none() {
102+
return None;
103+
}
104+
Some(S3Client::new_with(
105+
rusoto_core::request::HttpClient::new().unwrap(),
106+
EnvironmentProvider::default(),
107+
std::env::var("S3_ENDPOINT").ok().map(|e| Region::Custom {
108+
name: "us-west-1".to_owned(),
109+
endpoint: e,
110+
}).unwrap_or(Region::UsWest1),
111+
))
112+
}
64113

65114
/// Adds files into database and returns list of files with their mime type in Json
66115
pub fn add_path_into_database<P: AsRef<Path>>(conn: &Connection,
@@ -72,30 +121,34 @@ pub fn add_path_into_database<P: AsRef<Path>>(conn: &Connection,
72121
try!(cookie.load::<&str>(&[]));
73122

74123
let trans = try!(conn.transaction());
124+
let client = s3_client();
125+
let mut file_list_with_mimes: Vec<(String, PathBuf)> = Vec::new();
75126

76-
let mut file_list_with_mimes: Vec<(String, String)> = Vec::new();
77-
78-
for file_path_str in try!(get_file_list(&path)) {
127+
for file_path in try!(get_file_list(&path)) {
79128
let (path, content, mime) = {
80-
let path = Path::new(path.as_ref()).join(&file_path_str);
129+
let path = Path::new(path.as_ref()).join(&file_path);
81130
// Some files have insufficient permissions (like .lock file created by cargo in
82131
// documentation directory). We are skipping this files.
83-
let mut file = match File::open(path) {
132+
let mut file = match fs::File::open(path) {
84133
Ok(f) => f,
85134
Err(_) => continue,
86135
};
87136
let mut content: Vec<u8> = Vec::new();
88137
try!(file.read_to_end(&mut content));
138+
let bucket_path = Path::new(prefix).join(&file_path)
139+
.into_os_string().into_string().unwrap();
140+
89141
let mime = {
90142
let mime = try!(cookie.buffer(&content));
91143
// css's are causing some problem in browsers
92144
// magic will return text/plain for css file types
93145
// convert them to text/css
94146
// do the same for javascript files
95147
if mime == "text/plain" {
96-
if file_path_str.ends_with(".css") {
148+
let e = file_path.extension().unwrap_or_default();
149+
if e == "css" {
97150
"text/css".to_owned()
98-
} else if file_path_str.ends_with(".js") {
151+
} else if e == "js" {
99152
"application/javascript".to_owned()
100153
} else {
101154
mime.to_owned()
@@ -105,14 +158,42 @@ pub fn add_path_into_database<P: AsRef<Path>>(conn: &Connection,
105158
}
106159
};
107160

108-
file_list_with_mimes.push((mime.clone(), file_path_str.clone()));
161+
let content: Option<Vec<u8>> = if let Some(client) = &client {
162+
let s3_res = client.put_object(PutObjectRequest {
163+
acl: Some("public-read".into()),
164+
bucket: "rust-docs-rs".into(),
165+
key: bucket_path.clone(),
166+
body: Some(content.clone().into()),
167+
content_type: Some(mime.clone()),
168+
..Default::default()
169+
}).sync();
170+
match s3_res {
171+
// we've successfully uploaded the content, so steal it;
172+
// we don't want to put it in the DB
173+
Ok(_) => None,
174+
// Since s3 was configured, we want to panic on failure to upload.
175+
Err(e) => {
176+
panic!("failed to upload to {}: {:?}", bucket_path, e)
177+
},
178+
}
179+
} else {
180+
Some(content.clone().into())
181+
};
182+
183+
file_list_with_mimes.push((mime.clone(), file_path.clone()));
109184

110-
(file_path(prefix, &file_path_str), content, mime)
185+
(
186+
bucket_path,
187+
content,
188+
mime,
189+
)
111190
};
112191

113192
// check if file already exists in database
114193
let rows = try!(conn.query("SELECT COUNT(*) FROM files WHERE path = $1", &[&path]));
115194

195+
let content = content.unwrap_or_else(|| "in-s3".to_owned().into());
196+
116197
if rows.get(0).get::<usize, i64>(0) == 0 {
117198
try!(trans.query("INSERT INTO files (path, mime, content) VALUES ($1, $2, $3)",
118199
&[&path, &mime, &content]));
@@ -130,14 +211,14 @@ pub fn add_path_into_database<P: AsRef<Path>>(conn: &Connection,
130211

131212

132213

133-
fn file_list_to_json(file_list: Vec<(String, String)>) -> Result<Json> {
214+
fn file_list_to_json(file_list: Vec<(String, PathBuf)>) -> Result<Json> {
134215

135216
let mut file_list_json: Vec<Json> = Vec::new();
136217

137218
for file in file_list {
138219
let mut v: Vec<String> = Vec::new();
139220
v.push(file.0.clone());
140-
v.push(file.1.clone());
221+
v.push(file.1.into_os_string().into_string().unwrap());
141222
file_list_json.push(v.to_json());
142223
}
143224

@@ -150,8 +231,7 @@ fn file_list_to_json(file_list: Vec<(String, String)>) -> Result<Json> {
150231
mod test {
151232
extern crate env_logger;
152233
use std::env;
153-
use super::{get_file_list, add_path_into_database};
154-
use super::super::connect_db;
234+
use super::get_file_list;
155235

156236
#[test]
157237
fn test_get_file_list() {
@@ -162,16 +242,6 @@ mod test {
162242
assert!(files.unwrap().len() > 0);
163243

164244
let files = get_file_list(env::current_dir().unwrap().join("Cargo.toml")).unwrap();
165-
assert_eq!(files[0], "Cargo.toml");
166-
}
167-
168-
#[test]
169-
#[ignore]
170-
fn test_add_path_into_database() {
171-
let _ = env_logger::try_init();
172-
173-
let conn = connect_db().unwrap();
174-
let res = add_path_into_database(&conn, "example", env::current_dir().unwrap().join("src"));
175-
assert!(res.is_ok());
245+
assert_eq!(files[0], std::path::Path::new("Cargo.toml"));
176246
}
177247
}

src/db/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use r2d2;
1212
use r2d2_postgres;
1313

1414
mod add_package;
15-
mod file;
15+
pub mod file;
1616
mod migrate;
1717

1818

src/docbuilder/chroot_builder.rs

-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,6 @@ impl DocBuilder {
244244
/// Remove documentation, build directory and sources directory of a package
245245
fn clean(&self, package: &Package) -> Result<()> {
246246
debug!("Cleaning package");
247-
use std::fs::remove_dir_all;
248247
let documentation_path = PathBuf::from(&self.options.destination)
249248
.join(package.manifest().name().as_str());
250249
let source_path = source_path(&package).unwrap();

src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ extern crate toml;
3131
extern crate html5ever;
3232
extern crate schemamama;
3333
extern crate schemamama_postgres;
34+
extern crate rusoto_s3;
35+
extern crate rusoto_core;
36+
extern crate rusoto_credential;
3437

3538
pub use self::docbuilder::DocBuilder;
3639
pub use self::docbuilder::ChrootBuilderResult;

src/web/file.rs

+7-31
Original file line numberDiff line numberDiff line change
@@ -5,59 +5,35 @@ use time;
55
use postgres::Connection;
66
use iron::{Handler, Request, IronResult, Response, IronError};
77
use iron::status;
8+
use crate::db;
89

910

10-
pub struct File {
11-
pub path: String,
12-
pub mime: String,
13-
pub date_added: time::Timespec,
14-
pub date_updated: time::Timespec,
15-
pub content: Vec<u8>,
16-
}
17-
11+
pub struct File(pub db::file::Blob);
1812

1913
impl File {
2014
/// Gets file from database
2115
pub fn from_path(conn: &Connection, path: &str) -> Option<File> {
22-
23-
let rows = conn.query("SELECT path, mime, date_added, date_updated, content
24-
FROM files
25-
WHERE path = $1",
26-
&[&path])
27-
.unwrap();
28-
29-
if rows.len() == 0 {
30-
None
31-
} else {
32-
let row = rows.get(0);
33-
Some(File {
34-
path: row.get(0),
35-
mime: row.get(1),
36-
date_added: row.get(2),
37-
date_updated: row.get(3),
38-
content: row.get(4),
39-
})
40-
}
16+
Some(File(db::file::get_path(conn, path)?))
4117
}
4218

4319

4420
/// Consumes File and creates a iron response
4521
pub fn serve(self) -> Response {
4622
use iron::headers::{CacheControl, LastModified, CacheDirective, HttpDate, ContentType};
4723

48-
let mut response = Response::with((status::Ok, self.content));
24+
let mut response = Response::with((status::Ok, self.0.content));
4925
let cache = vec![CacheDirective::Public,
5026
CacheDirective::MaxAge(super::STATIC_FILE_CACHE_DURATION as u32)];
51-
response.headers.set(ContentType(self.mime.parse().unwrap()));
27+
response.headers.set(ContentType(self.0.mime.parse().unwrap()));
5228
response.headers.set(CacheControl(cache));
53-
response.headers.set(LastModified(HttpDate(time::at(self.date_updated))));
29+
response.headers.set(LastModified(HttpDate(time::at(self.0.date_updated))));
5430
response
5531
}
5632

5733

5834
/// Checks if mime type of file is "application/x-empty"
5935
pub fn is_empty(&self) -> bool {
60-
self.mime == "application/x-empty"
36+
self.0.mime == "application/x-empty"
6137
}
6238
}
6339

src/web/mod.rs

-2
Original file line numberDiff line numberDiff line change
@@ -504,8 +504,6 @@ fn opensearch_xml_handler(_: &mut Request) -> IronResult<Response> {
504504
}
505505

506506
fn ico_handler(req: &mut Request) -> IronResult<Response> {
507-
use iron::Url;
508-
509507
if let Some(&"favicon.ico") = req.url.path().last() {
510508
// if we're looking for exactly "favicon.ico", we need to defer to the handler that loads
511509
// from `public_html`, so return a 404 here to make the main handler carry on

src/web/releases.rs

-2
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,6 @@ pub fn search_handler(req: &mut Request) -> IronResult<Response> {
476476

477477
let mut resp = Response::with((status::Found, Redirect(url)));
478478
use iron::headers::{Expires, HttpDate};
479-
use time;
480479
resp.headers.set(Expires(HttpDate(time::now())));
481480
return Ok(resp);
482481
}
@@ -515,7 +514,6 @@ pub fn search_handler(req: &mut Request) -> IronResult<Response> {
515514
let mut resp = Response::with((status::Found, Redirect(url)));
516515

517516
use iron::headers::{Expires, HttpDate};
518-
use time;
519517
resp.headers.set(Expires(HttpDate(time::now())));
520518
return Ok(resp);
521519
}

0 commit comments

Comments
 (0)