Skip to content

Add compression for uploaded documentation #780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 11, 2020
Merged
38 changes: 38 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -38,6 +38,7 @@ lazy_static = "1.0.0"
rustwide = "0.7.1"
mime_guess = "2"
dotenv = "0.15"
zstd = "0.5"

# Data serialization and deserialization
serde = { version = "1.0", features = ["derive"] }
@@ -84,6 +85,10 @@ rand = "0.7.3"
name = "html5ever"
harness = false

[[bench]]
name = "compression"
harness = false

[build-dependencies]
time = "0.1"
git2 = { version = "0.13", default-features = false }
19 changes: 19 additions & 0 deletions benches/compression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use cratesfyi::storage::{compress, decompress};
use criterion::{black_box, criterion_group, criterion_main, Criterion};

pub fn criterion_benchmark(c: &mut Criterion) {
// this isn't a great benchmark because it only tests on one file
// ideally we would build a whole crate and compress each file, taking the average
let html = std::fs::read_to_string("benches/struct.CaptureMatches.html").unwrap();
let html_slice = html.as_bytes();
c.bench_function("compress regex html", |b| {
b.iter(|| compress(black_box(html_slice)))
});
let (compressed, alg) = compress(html_slice).unwrap();
c.bench_function("decompress regex html", |b| {
b.iter(|| decompress(black_box(compressed.as_slice()), alg))
});
}

criterion_group!(compression, criterion_benchmark);
criterion_main!(compression);
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -34,7 +34,7 @@ services:
image: minio/minio
entrypoint: >
/bin/sh -c "
mkdir /data/rust-docs-rs;
mkdir -p /data/rust-docs-rs;
minio server /data;
"
ports:
19 changes: 19 additions & 0 deletions src/db/add_package.rs
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@ use crate::{
docbuilder::BuildResult,
error::Result,
index::api::{CrateOwner, RegistryCrateData},
storage::CompressionAlgorithm,
utils::MetadataPackage,
};
use log::debug;
@@ -34,6 +35,7 @@ pub(crate) fn add_package_into_database(
cratesio_data: &RegistryCrateData,
has_docs: bool,
has_examples: bool,
compression_algorithms: std::collections::HashSet<CompressionAlgorithm>,
) -> Result<i32> {
debug!("Adding package into database");
let crate_id = initialize_package_in_database(&conn, metadata_pkg)?;
@@ -116,6 +118,7 @@ pub(crate) fn add_package_into_database(
add_keywords_into_database(&conn, &metadata_pkg, release_id)?;
add_authors_into_database(&conn, &metadata_pkg, release_id)?;
add_owners_into_database(&conn, &cratesio_data.owners, crate_id)?;
add_compression_into_database(&conn, compression_algorithms.into_iter(), release_id)?;

// Update the crates table with the new release
conn.execute(
@@ -352,3 +355,19 @@ fn add_owners_into_database(conn: &Connection, owners: &[CrateOwner], crate_id:
}
Ok(())
}

/// Add the compression algorithms used for this crate to the database
fn add_compression_into_database<I>(conn: &Connection, algorithms: I, release_id: i32) -> Result<()>
where
I: Iterator<Item = CompressionAlgorithm>,
{
let sql = "
INSERT INTO compression_rels (release, algorithm)
VALUES ($1, $2)
ON CONFLICT DO NOTHING;";
let prepared = conn.prepare_cached(sql)?;
for alg in algorithms {
prepared.execute(&[&release_id, &(alg as i32)])?;
}
Ok(())
}
11 changes: 7 additions & 4 deletions src/db/file.rs
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
//! filesystem. This module is adding files into database and retrieving them.

use crate::error::Result;
use crate::storage::Storage;
use crate::storage::{CompressionAlgorithms, Storage};
use postgres::Connection;

use serde_json::Value;
@@ -30,10 +30,13 @@ pub fn add_path_into_database<P: AsRef<Path>>(
conn: &Connection,
prefix: &str,
path: P,
) -> Result<Value> {
) -> Result<(Value, CompressionAlgorithms)> {
let mut backend = Storage::new(conn);
let file_list = backend.store_all(conn, prefix, path.as_ref())?;
file_list_to_json(file_list.into_iter().collect())
let (file_list, algorithms) = backend.store_all(conn, prefix, path.as_ref())?;
Ok((
file_list_to_json(file_list.into_iter().collect())?,
algorithms,
))
}

fn file_list_to_json(file_list: Vec<(PathBuf, String)>) -> Result<Value> {
24 changes: 24 additions & 0 deletions src/db/migrate.rs
Original file line number Diff line number Diff line change
@@ -340,6 +340,30 @@ pub fn migrate(version: Option<Version>, conn: &Connection) -> CratesfyiResult<(
ADD COLUMN content tsvector,
ADD COLUMN versions JSON DEFAULT '[]';"
),
migration!(
context,
// version
14,
// description
"Add compression",
// upgrade query
"
-- NULL indicates the file was not compressed.
-- There is no meaning assigned to the compression id in the database itself,
-- it is instead interpreted by the application.
ALTER TABLE files ADD COLUMN compression INT;
-- many to many table between releases and compression
-- stores the set of all compression algorithms used in the release files
CREATE TABLE compression_rels (
release INT NOT NULL REFERENCES releases(id),
algorithm INT,
-- make sure we don't store duplicates by accident
UNIQUE(release, algorithm)
);",
// downgrade query
"DROP TABLE compression_rels;
ALTER TABLE files DROP COLUMN compression;"
),
];

for migration in migrations {
23 changes: 13 additions & 10 deletions src/docbuilder/rustwide_builder.rs
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ use crate::db::{add_build_into_database, add_package_into_database, connect_db};
use crate::docbuilder::{crates::crates_from_path, Limits};
use crate::error::Result;
use crate::index::api::RegistryCrateData;
use crate::storage::CompressionAlgorithms;
use crate::utils::{copy_doc_dir, parse_rustc_version, CargoMetadata};
use failure::ResultExt;
use log::{debug, info, warn, LevelFilter};
@@ -333,6 +334,7 @@ impl RustwideBuilder {

let mut files_list = None;
let mut has_docs = false;
let mut algs = CompressionAlgorithms::default();
let mut successful_targets = Vec::new();
let metadata = Metadata::from_source_dir(&build.host_source_dir())?;
let BuildTargets {
@@ -345,11 +347,10 @@ impl RustwideBuilder {
if res.result.successful {
debug!("adding sources into database");
let prefix = format!("sources/{}/{}", name, version);
files_list = Some(add_path_into_database(
&conn,
&prefix,
build.host_source_dir(),
)?);
let (files, new_algs) =
add_path_into_database(&conn, &prefix, build.host_source_dir())?;
files_list = Some(files);
algs.extend(new_algs);

if let Some(name) = res.cargo_metadata.root().library_name() {
let host_target = build.host_target_dir();
@@ -376,8 +377,9 @@ impl RustwideBuilder {
&metadata,
)?;
}
self.upload_docs(&conn, name, version, local_storage.path())?;
}
let new_algs = self.upload_docs(&conn, name, version, local_storage.path())?;
algs.extend(new_algs);
};

let has_examples = build.host_source_dir().join("examples").is_dir();
if res.result.successful {
@@ -398,6 +400,7 @@ impl RustwideBuilder {
&RegistryCrateData::get_from_network(res.cargo_metadata.root())?,
has_docs,
has_examples,
algs,
)?;
add_build_into_database(&conn, release_id, &res.result)?;

@@ -572,14 +575,14 @@ impl RustwideBuilder {
name: &str,
version: &str,
local_storage: &Path,
) -> Result<()> {
) -> Result<CompressionAlgorithms> {
debug!("Adding documentation into database");
add_path_into_database(
conn,
&format!("rustdoc/{}/{}", name, version),
local_storage,
)?;
Ok(())
)
.map(|t| t.1)
}
}

21 changes: 16 additions & 5 deletions src/storage/database.rs
Original file line number Diff line number Diff line change
@@ -17,32 +17,42 @@ impl<'a> DatabaseBackend<'a> {
}

pub(super) fn get(&self, path: &str) -> Result<Blob, Error> {
use std::convert::TryInto;

let rows = self.conn.query(
"SELECT path, mime, date_updated, content FROM files WHERE path = $1;",
"SELECT path, mime, date_updated, content, compression
FROM files
WHERE path = $1;",
&[&path],
)?;

if rows.is_empty() {
Err(PathNotFoundError.into())
} else {
let row = rows.get(0);
let compression = row.get::<_, Option<i32>>("compression").map(|i| {
i.try_into()
.expect("invalid compression algorithm stored in database")
});
Ok(Blob {
path: row.get("path"),
mime: row.get("mime"),
date_updated: DateTime::from_utc(row.get::<_, NaiveDateTime>("date_updated"), Utc),
content: row.get("content"),
compression,
})
}
}

pub(super) fn store_batch(&self, batch: &[Blob], trans: &Transaction) -> Result<(), Error> {
for blob in batch {
let compression = blob.compression.map(|alg| alg as i32);
trans.query(
"INSERT INTO files (path, mime, content)
VALUES ($1, $2, $3)
"INSERT INTO files (path, mime, content, compression)
VALUES ($1, $2, $3, $4)
ON CONFLICT (path) DO UPDATE
SET mime = EXCLUDED.mime, content = EXCLUDED.content",
&[&blob.path, &blob.mime, &blob.content],
SET mime = EXCLUDED.mime, content = EXCLUDED.content, compression = EXCLUDED.compression",
&[&blob.path, &blob.mime, &blob.content, &compression],
)?;
}
Ok(())
@@ -79,6 +89,7 @@ mod tests {
mime: "text/plain".into(),
date_updated: now.trunc_subsecs(6),
content: "Hello world!".bytes().collect(),
compression: None,
},
backend.get("dir/foo.txt")?
);
Loading