Skip to content

Utilize S3 as the backing file store #373

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
263 changes: 238 additions & 25 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ html5ever = "0.22"
cargo = { git = "https://github.com/rust-lang/cargo.git" }
schemamama = "0.3"
schemamama_postgres = "0.2"
rusoto_s3 = "0.40"
rusoto_core = "0.40"
rusoto_credential = "0.40"


# iron dependencies
Expand Down
162 changes: 116 additions & 46 deletions src/db/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,62 +5,111 @@
//! filesystem. This module is adding files into database and retrieving them.


use std::path::Path;
use std::path::{PathBuf, Path};
use postgres::Connection;
use rustc_serialize::json::{Json, ToJson};
use std::fs::File;
use std::fs;
use std::io::Read;
use error::Result;
use failure::err_msg;


fn file_path(prefix: &str, name: &str) -> String {
match prefix.is_empty() {
true => name.to_owned(),
false => format!("{}/{}", prefix, name),
}
}
use rusoto_s3::{S3, PutObjectRequest, GetObjectRequest, S3Client};
use rusoto_core::region::Region;
use rusoto_credential::EnvironmentProvider;


fn get_file_list_from_dir<P: AsRef<Path>>(path: P,
prefix: &str,
files: &mut Vec<String>)
files: &mut Vec<PathBuf>)
-> Result<()> {
let path = path.as_ref();

for file in try!(path.read_dir()) {
let file = try!(file);

if try!(file.file_type()).is_file() {
file.file_name().to_str().map(|name| files.push(file_path(prefix, name)));
files.push(file.path());
} else if try!(file.file_type()).is_dir() {
file.file_name()
.to_str()
.map(|name| get_file_list_from_dir(file.path(), &file_path(prefix, name), files));
try!(get_file_list_from_dir(file.path(), files));
}
}

Ok(())
}


pub fn get_file_list<P: AsRef<Path>>(path: P) -> Result<Vec<String>> {
pub fn get_file_list<P: AsRef<Path>>(path: P) -> Result<Vec<PathBuf>> {
let path = path.as_ref();
let mut files: Vec<String> = Vec::new();
let mut files = Vec::new();

if !path.exists() {
return Err(err_msg("File not found"));
} else if path.is_file() {
path.file_name()
.and_then(|name| name.to_str())
.map(|name| files.push(format!("{}", name)));
files.push(PathBuf::from(path.file_name().unwrap()));
} else if path.is_dir() {
try!(get_file_list_from_dir(path, "", &mut files));
try!(get_file_list_from_dir(path, &mut files));
for file_path in &mut files {
// We want the paths in this list to not be {path}/bar.txt but just bar.txt
*file_path = PathBuf::from(file_path.strip_prefix(path).unwrap());
}
}

Ok(files)
}

pub struct Blob {
pub path: String,
pub mime: String,
pub date_updated: time::Timespec,
pub content: Vec<u8>,
}

pub fn get_path(conn: &Connection, path: &str) -> Option<Blob> {
let rows = conn.query("SELECT path, mime, date_updated, content
FROM files
WHERE path = $1", &[&path]).unwrap();

if rows.len() == 0 {
None
} else {
let row = rows.get(0);
let mut content = row.get(3);
if content == b"in-s3" {
let client = s3_client();
content = client.and_then(|c| c.get_object(GetObjectRequest {
bucket: "rust-docs-rs".into(),
key: path.into(),
..Default::default()
}).sync().ok()).and_then(|r| r.body).map(|b| {
let mut b = b.into_blocking_read();
let mut content = Vec::new();
b.read_to_end(&mut content).unwrap();
content
}).unwrap();
};

Some(Blob {
path: row.get(0),
mime: row.get(1),
date_updated: row.get(2),
content,
})
}
}

fn s3_client() -> Option<S3Client> {
// If AWS keys aren't configured, then presume we should use the DB exclusively
// for file storage.
if std::env::var_os("AWS_ACCESS_KEY_ID").is_none() {
return None;
}
Some(S3Client::new_with(
rusoto_core::request::HttpClient::new().unwrap(),
EnvironmentProvider::default(),
std::env::var("S3_ENDPOINT").ok().map(|e| Region::Custom {
name: "us-west-1".to_owned(),
endpoint: e,
}).unwrap_or(Region::UsWest1),
))
}

/// Adds files into database and returns list of files with their mime type in Json
pub fn add_path_into_database<P: AsRef<Path>>(conn: &Connection,
Expand All @@ -72,30 +121,34 @@ pub fn add_path_into_database<P: AsRef<Path>>(conn: &Connection,
try!(cookie.load::<&str>(&[]));

let trans = try!(conn.transaction());
let client = s3_client();
let mut file_list_with_mimes: Vec<(String, PathBuf)> = Vec::new();

let mut file_list_with_mimes: Vec<(String, String)> = Vec::new();

for file_path_str in try!(get_file_list(&path)) {
for file_path in try!(get_file_list(&path)) {
let (path, content, mime) = {
let path = Path::new(path.as_ref()).join(&file_path_str);
let path = Path::new(path.as_ref()).join(&file_path);
// Some files have insufficient permissions (like .lock file created by cargo in
// documentation directory). We are skipping this files.
let mut file = match File::open(path) {
let mut file = match fs::File::open(path) {
Ok(f) => f,
Err(_) => continue,
};
let mut content: Vec<u8> = Vec::new();
try!(file.read_to_end(&mut content));
let bucket_path = Path::new(prefix).join(&file_path)
.into_os_string().into_string().unwrap();

let mime = {
let mime = try!(cookie.buffer(&content));
// css's are causing some problem in browsers
// magic will return text/plain for css file types
// convert them to text/css
// do the same for javascript files
if mime == "text/plain" {
if file_path_str.ends_with(".css") {
let e = file_path.extension().unwrap_or_default();
if e == "css" {
"text/css".to_owned()
} else if file_path_str.ends_with(".js") {
} else if e == "js" {
"application/javascript".to_owned()
} else {
mime.to_owned()
Expand All @@ -105,14 +158,42 @@ pub fn add_path_into_database<P: AsRef<Path>>(conn: &Connection,
}
};

file_list_with_mimes.push((mime.clone(), file_path_str.clone()));
let content: Option<Vec<u8>> = if let Some(client) = &client {
let s3_res = client.put_object(PutObjectRequest {
acl: Some("public-read".into()),
bucket: "rust-docs-rs".into(),
key: bucket_path.clone(),
body: Some(content.clone().into()),
content_type: Some(mime.clone()),
..Default::default()
}).sync();
match s3_res {
// we've successfully uploaded the content, so steal it;
// we don't want to put it in the DB
Ok(_) => None,
// Since s3 was configured, we want to panic on failure to upload.
Err(e) => {
panic!("failed to upload to {}: {:?}", bucket_path, e)
},
}
} else {
Some(content.clone().into())
};

file_list_with_mimes.push((mime.clone(), file_path.clone()));

(file_path(prefix, &file_path_str), content, mime)
(
bucket_path,
content,
mime,
)
};

// check if file already exists in database
let rows = try!(conn.query("SELECT COUNT(*) FROM files WHERE path = $1", &[&path]));

let content = content.unwrap_or_else(|| "in-s3".to_owned().into());

if rows.get(0).get::<usize, i64>(0) == 0 {
try!(trans.query("INSERT INTO files (path, mime, content) VALUES ($1, $2, $3)",
&[&path, &mime, &content]));
Expand All @@ -130,14 +211,14 @@ pub fn add_path_into_database<P: AsRef<Path>>(conn: &Connection,



fn file_list_to_json(file_list: Vec<(String, String)>) -> Result<Json> {
fn file_list_to_json(file_list: Vec<(String, PathBuf)>) -> Result<Json> {

let mut file_list_json: Vec<Json> = Vec::new();

for file in file_list {
let mut v: Vec<String> = Vec::new();
v.push(file.0.clone());
v.push(file.1.clone());
v.push(file.1.into_os_string().into_string().unwrap());
file_list_json.push(v.to_json());
}

Expand All @@ -150,8 +231,7 @@ fn file_list_to_json(file_list: Vec<(String, String)>) -> Result<Json> {
mod test {
extern crate env_logger;
use std::env;
use super::{get_file_list, add_path_into_database};
use super::super::connect_db;
use super::get_file_list;

#[test]
fn test_get_file_list() {
Expand All @@ -162,16 +242,6 @@ mod test {
assert!(files.unwrap().len() > 0);

let files = get_file_list(env::current_dir().unwrap().join("Cargo.toml")).unwrap();
assert_eq!(files[0], "Cargo.toml");
}

#[test]
#[ignore]
fn test_add_path_into_database() {
let _ = env_logger::try_init();

let conn = connect_db().unwrap();
let res = add_path_into_database(&conn, "example", env::current_dir().unwrap().join("src"));
assert!(res.is_ok());
assert_eq!(files[0], std::path::Path::new("Cargo.toml"));
}
}
2 changes: 1 addition & 1 deletion src/db/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use r2d2;
use r2d2_postgres;

mod add_package;
mod file;
pub mod file;
mod migrate;


Expand Down
1 change: 0 additions & 1 deletion src/docbuilder/chroot_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,6 @@ impl DocBuilder {
/// Remove documentation, build directory and sources directory of a package
fn clean(&self, package: &Package) -> Result<()> {
debug!("Cleaning package");
use std::fs::remove_dir_all;
let documentation_path = PathBuf::from(&self.options.destination)
.join(package.manifest().name().as_str());
let source_path = source_path(&package).unwrap();
Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ extern crate toml;
extern crate html5ever;
extern crate schemamama;
extern crate schemamama_postgres;
extern crate rusoto_s3;
extern crate rusoto_core;
extern crate rusoto_credential;

pub use self::docbuilder::DocBuilder;
pub use self::docbuilder::ChrootBuilderResult;
Expand Down
38 changes: 7 additions & 31 deletions src/web/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,35 @@ use time;
use postgres::Connection;
use iron::{Handler, Request, IronResult, Response, IronError};
use iron::status;
use crate::db;


pub struct File {
pub path: String,
pub mime: String,
pub date_added: time::Timespec,
pub date_updated: time::Timespec,
pub content: Vec<u8>,
}

pub struct File(pub db::file::Blob);

impl File {
/// Gets file from database
pub fn from_path(conn: &Connection, path: &str) -> Option<File> {

let rows = conn.query("SELECT path, mime, date_added, date_updated, content
FROM files
WHERE path = $1",
&[&path])
.unwrap();

if rows.len() == 0 {
None
} else {
let row = rows.get(0);
Some(File {
path: row.get(0),
mime: row.get(1),
date_added: row.get(2),
date_updated: row.get(3),
content: row.get(4),
})
}
Some(File(db::file::get_path(conn, path)?))
}


/// Consumes File and creates a iron response
pub fn serve(self) -> Response {
use iron::headers::{CacheControl, LastModified, CacheDirective, HttpDate, ContentType};

let mut response = Response::with((status::Ok, self.content));
let mut response = Response::with((status::Ok, self.0.content));
let cache = vec![CacheDirective::Public,
CacheDirective::MaxAge(super::STATIC_FILE_CACHE_DURATION as u32)];
response.headers.set(ContentType(self.mime.parse().unwrap()));
response.headers.set(ContentType(self.0.mime.parse().unwrap()));
response.headers.set(CacheControl(cache));
response.headers.set(LastModified(HttpDate(time::at(self.date_updated))));
response.headers.set(LastModified(HttpDate(time::at(self.0.date_updated))));
response
}


/// Checks if mime type of file is "application/x-empty"
pub fn is_empty(&self) -> bool {
self.mime == "application/x-empty"
self.0.mime == "application/x-empty"
}
}

Expand Down
2 changes: 0 additions & 2 deletions src/web/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -504,8 +504,6 @@ fn opensearch_xml_handler(_: &mut Request) -> IronResult<Response> {
}

fn ico_handler(req: &mut Request) -> IronResult<Response> {
use iron::Url;

if let Some(&"favicon.ico") = req.url.path().last() {
// if we're looking for exactly "favicon.ico", we need to defer to the handler that loads
// from `public_html`, so return a 404 here to make the main handler carry on
Expand Down
2 changes: 0 additions & 2 deletions src/web/releases.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,6 @@ pub fn search_handler(req: &mut Request) -> IronResult<Response> {

let mut resp = Response::with((status::Found, Redirect(url)));
use iron::headers::{Expires, HttpDate};
use time;
resp.headers.set(Expires(HttpDate(time::now())));
return Ok(resp);
}
Expand Down Expand Up @@ -515,7 +514,6 @@ pub fn search_handler(req: &mut Request) -> IronResult<Response> {
let mut resp = Response::with((status::Found, Redirect(url)));

use iron::headers::{Expires, HttpDate};
use time;
resp.headers.set(Expires(HttpDate(time::now())));
return Ok(resp);
}
Expand Down
Loading