From 1a2fbbbd417eba4611fa6589d333bf1e9380efd0 Mon Sep 17 00:00:00 2001 From: "xingqiang.yuan" Date: Tue, 17 Oct 2023 16:43:46 +0800 Subject: [PATCH 1/3] vec opt --- halo2_proofs/src/helpers.rs | 66 ++++++++++++++------------- halo2_proofs/src/plonk/permutation.rs | 8 ++-- 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/halo2_proofs/src/helpers.rs b/halo2_proofs/src/helpers.rs index 044798bd..e18a3a17 100644 --- a/halo2_proofs/src/helpers.rs +++ b/halo2_proofs/src/helpers.rs @@ -21,11 +21,7 @@ use num_derive::FromPrimitive; use rayon::prelude::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}; use std::io::Seek; use std::marker::PhantomData; -use std::{ - fs::{File, OpenOptions}, - io, - ops::RangeTo, -}; +use std::{fs::{File, OpenOptions}, io, mem, ops::RangeTo}; pub(crate) trait CurveRead: CurveAffine { /// Reads a compressed element from the buffer and attempts to parse it @@ -105,9 +101,9 @@ impl Serializable for (String, u32) { impl ParaSerializable for Vec> { fn vec_fetch(fd: &mut File) -> io::Result { - let columns = read_u32(fd)?; + let columns = read_u32(fd)? as usize; let mut offset = 0; - let mut offsets = vec![]; + let mut offsets = Vec::with_capacity(columns); for _ in 0..columns { let l = read_u32(fd)?; offsets.push((offset, l)); @@ -124,6 +120,7 @@ impl ParaSerializable for Vec> { .map(&fd) .unwrap() }; + //TODO: to be optimized let s: &[(u32, u32)] = unsafe { std::slice::from_raw_parts(mmap.as_ptr() as *const (u32, u32), offsets[i as usize].1 as usize) }; @@ -139,7 +136,7 @@ impl ParaSerializable for Vec> { let u = self.len() as u32; u.store(fd)?; let mut offset = 0; - let mut offsets = vec![]; + let mut offsets = Vec::with_capacity(u as usize); for i in 0..u { let l = self[i as usize].len(); offsets.push((offset, l)); @@ -151,15 +148,15 @@ impl ParaSerializable for Vec> { self.into_par_iter().enumerate().for_each(|(i, s2)| { let mut mmap = unsafe { MmapOptions::new() - .offset(position + (offsets[i as usize].0 as u64 * 8)) - .len(offsets[i as usize].1 as usize * 8) + .offset(position + (offsets[i].0 as u64 * 8)) + .len(offsets[i].1 * 8) .map_mut(&fd) .unwrap() }; let s: &[u8] = unsafe { std::slice::from_raw_parts( s2.as_ptr() as *const u8, - offsets[i as usize].1 as usize * 8, + offsets[i].1 * 8, ) }; (&mut mmap).copy_from_slice(s); @@ -173,10 +170,13 @@ impl Serializable for Polynomial { let u = read_u32(reader)?; let mut buf = vec![0u8; u as usize * 32]; reader.read_exact(&mut buf)?; - let s: &[F] = unsafe { - std::slice::from_raw_parts(buf.as_ptr() as *const F, u as usize) + let ptr = buf.as_ptr() as *mut F; + // Don't run the destructor for buf, because it used by `values` + mem::forget(buf); + let values: Vec = unsafe { + Vec::from_raw_parts(ptr, u as usize, u as usize) }; - Ok(Polynomial::new(s.to_vec())) + Ok(Polynomial::new(values)) } fn store(&self, writer: &mut W) -> io::Result<()> { let u = self.values.len() as u32; @@ -208,9 +208,10 @@ impl Serializable for VerifyingKey { let domain: EvaluationDomain = EvaluationDomain::new(j, k); let cs = read_cs::(reader)?; - let fixed_commitments: Vec<_> = (0..cs.num_fixed_columns) - .map(|_| C::read(reader)) - .collect::>()?; + let mut fixed_commitments = Vec::with_capacity(cs.num_fixed_columns); + for _ in 0..cs.num_fixed_columns { + fixed_commitments.push(C::read(reader)?); + } let permutation = permutation::VerifyingKey::read(reader, &cs.permutation)?; @@ -233,7 +234,7 @@ impl Serializable for Vec { } fn fetch(reader: &mut R) -> io::Result> { let len = read_u32(reader)?; - let mut v = vec![]; + let mut v = Vec::with_capacity(len as usize); for _ in 0..len { v.push(T::fetch(reader)?); } @@ -281,8 +282,8 @@ fn write_arguments( fn read_arguments( reader: &mut R, ) -> std::io::Result { - let len = read_u32(reader)?; - let mut cols = vec![]; + let len = read_u32(reader)? as usize; + let mut cols = Vec::with_capacity(len); for _ in 0..len { cols.push(Column::::fetch(reader)?); } @@ -336,8 +337,8 @@ fn read_queries( reader: &mut R, t: T, ) -> std::io::Result, Rotation)>> { - let mut queries = vec![]; - let len = read_u32(reader)?; + let len = read_u32(reader)? as usize; + let mut queries = Vec::with_capacity(len); for _ in 0..len { let column = read_column(reader, t)?; let rotation = read_u32(reader)?; @@ -348,8 +349,8 @@ fn read_queries( } fn read_virtual_cells(reader: &mut R) -> std::io::Result> { - let mut vcells = vec![]; - let len = read_u32(reader)?; + let len = read_u32(reader)? as usize; + let mut vcells = Vec::with_capacity(len); for _ in 0..len { let column = Column::::fetch(reader)?; let rotation = read_u32(reader)?; @@ -384,8 +385,8 @@ fn write_fixed_columns( } fn read_fixed_columns(reader: &mut R) -> std::io::Result>> { - let len = read_u32(reader)?; - let mut columns = vec![]; + let len = read_u32(reader)? as usize; + let mut columns = Vec::with_capacity(len); for _ in 0..len { columns.push(read_fixed_column(reader)?); } @@ -426,8 +427,8 @@ fn read_cs(reader: &mut R) -> io::Result(reader: &mut R) -> io::Result(reader, Fixed)?; let permutation = read_arguments(reader)?; - let mut lookups = vec![]; - let nb_lookup = read_u32(reader)?; + let nb_lookup = read_u32(reader)? as usize; + let mut lookups = Vec::with_capacity(nb_lookup); for _ in 0..nb_lookup { let input_expressions = Vec::>::fetch(reader)?; let table_expressions = Vec::>::fetch(reader)?; @@ -487,8 +488,8 @@ fn write_gates( fn read_gates( reader: &mut R, ) -> std::io::Result>> { - let nb_gates = read_u32(reader)?; - let mut gates = vec![]; + let nb_gates = read_u32(reader)? as usize; + let mut gates = Vec::with_capacity(nb_gates); for _ in 0..nb_gates { gates.push(Gate::new_with_polys_and_queries( Vec::>::fetch(reader)?, @@ -888,6 +889,7 @@ impl<'a, C: CurveAffine> AssignWitnessCollection<'a, C> { .map(&fd) .unwrap() }; + //TODO: to be optimized let s: &[C::Scalar] = unsafe { std::slice::from_raw_parts(mmap.as_ptr() as *const C::Scalar, 1 << params.k) }; diff --git a/halo2_proofs/src/plonk/permutation.rs b/halo2_proofs/src/plonk/permutation.rs index 60b9ce03..c54a92c0 100644 --- a/halo2_proofs/src/plonk/permutation.rs +++ b/halo2_proofs/src/plonk/permutation.rs @@ -88,9 +88,11 @@ impl VerifyingKey { } pub fn read(reader: &mut R, argument: &Argument) -> io::Result { - let commitments = (0..argument.columns.len()) - .map(|_| C::read(reader)) - .collect::, _>>()?; + let len = argument.columns.len(); + let mut commitments = Vec::with_capacity(len); + for _ in 0..len { + commitments.push(C::read(reader)?); + } Ok(VerifyingKey { commitments }) } } From f4ce2647f59f98b5404ce0b155a93fa416c51bae Mon Sep 17 00:00:00 2001 From: "xingqiang.yuan" Date: Tue, 17 Oct 2023 20:31:35 +0800 Subject: [PATCH 2/3] Assembly opt --- halo2_proofs/src/plonk/permutation/keygen.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/halo2_proofs/src/plonk/permutation/keygen.rs b/halo2_proofs/src/plonk/permutation/keygen.rs index a457dabb..729c8411 100644 --- a/halo2_proofs/src/plonk/permutation/keygen.rs +++ b/halo2_proofs/src/plonk/permutation/keygen.rs @@ -25,10 +25,14 @@ impl Assembly { pub(crate) fn new(n: usize, p: &Argument) -> Self { // Initialize the copy vector to keep track of copy constraints in all // the permutation arguments. - let mut columns = vec![]; + let mut columns = Vec::with_capacity(p.columns.len()); for i in 0..p.columns.len() { // Computes [(i, 0), (i, 1), ..., (i, n - 1)] - columns.push((0..n).map(|j| (i as u32, j as u32)).collect()); + let mut values = Vec::with_capacity(n); + for j in 0..n { + values.push((i as u32, j as u32)); + } + columns.push(values); } // Before any equality constraints are applied, every cell in the permutation is @@ -36,9 +40,9 @@ impl Assembly { // its own distinguished element. Assembly { columns: p.columns.clone(), - mapping: columns.clone(), - aux: columns, - sizes: vec![vec![1usize; n]; p.columns.len()], + mapping: columns, + aux: vec![], //columns, + sizes: vec![], //vec![vec![1usize; n]; p.columns.len()], } } From 5870bc6ba272411790239e8a543f3d06f7007c2a Mon Sep 17 00:00:00 2001 From: "xingqiang.yuan" Date: Thu, 19 Oct 2023 10:05:48 +0800 Subject: [PATCH 3/3] pre allocating --- halo2_proofs/src/plonk/evaluation.rs | 6 ++--- halo2_proofs/src/plonk/keygen.rs | 27 ++++++++++++++------ halo2_proofs/src/plonk/permutation/keygen.rs | 2 +- halo2_proofs/src/plonk/prover.rs | 10 ++++---- halo2_proofs/src/poly/domain.rs | 14 +++++----- 5 files changed, 36 insertions(+), 23 deletions(-) diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index 312db899..d8739e58 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -716,15 +716,15 @@ impl Evaluator { // Polynomials required for this lookup. // Calculated here so these only have to be kept in memory for the short time // they are actually needed. - let product_coset = pk.vk.domain.coeff_to_extended(lookup.product_poly.clone()); + let product_coset = pk.vk.domain.coeff_to_extended(&lookup.product_poly); let permuted_input_coset = pk .vk .domain - .coeff_to_extended(lookup.permuted_input_poly.clone()); + .coeff_to_extended(&lookup.permuted_input_poly); let permuted_table_coset = pk .vk .domain - .coeff_to_extended(lookup.permuted_table_poly.clone()); + .coeff_to_extended(&lookup.permuted_table_poly); parallelize(&mut values, |values, start| { for (i, value) in values.iter_mut().enumerate() { diff --git a/halo2_proofs/src/plonk/keygen.rs b/halo2_proofs/src/plonk/keygen.rs index 7cd9d6b6..8c944736 100644 --- a/halo2_proofs/src/plonk/keygen.rs +++ b/halo2_proofs/src/plonk/keygen.rs @@ -323,7 +323,7 @@ where #[cfg(not(feature = "cuda"))] let fixed_cosets = fixed_polys .iter() - .map(|poly| vk.domain.coeff_to_extended(poly.clone())) + .map(|poly| vk.domain.coeff_to_extended(poly)) .collect(); let timer = start_timer!(|| "assembly build pkey"); @@ -339,7 +339,12 @@ where l0[0] = C::Scalar::one(); let l0 = vk.domain.lagrange_to_coeff(l0); #[cfg(not(feature = "cuda"))] - let l0 = vk.domain.coeff_to_extended(l0); + let l0 = { + let l0_ext = vk.domain.coeff_to_extended(&l0); + drop(l0); + l0_ext + }; + // Compute l_blind(X) which evaluates to 1 for each blinding factor row // and 0 otherwise over the domain. @@ -348,14 +353,15 @@ where *evaluation = C::Scalar::one(); } let l_blind = vk.domain.lagrange_to_coeff(l_blind); - let l_blind_extended = vk.domain.coeff_to_extended(l_blind); + let l_blind_extended = vk.domain.coeff_to_extended(&l_blind); + drop(l_blind); // Compute l_last(X) which evaluates to 1 on the first inactive row (just // before the blinding factors) and 0 otherwise over the domain let mut l_last = vk.domain.empty_lagrange(); l_last[params.n as usize - cs.blinding_factors() - 1] = C::Scalar::one(); let l_last = vk.domain.lagrange_to_coeff(l_last); - let l_last_extended = vk.domain.coeff_to_extended(l_last.clone()); + let l_last_extended = vk.domain.coeff_to_extended(&l_last); // Compute l_active_row(X) let one = C::Scalar::one(); @@ -423,7 +429,7 @@ where #[cfg(not(feature = "cuda"))] let fixed_cosets = fixed_polys .iter() - .map(|poly| vk.domain.coeff_to_extended(poly.clone())) + .map(|poly| vk.domain.coeff_to_extended(poly)) .collect(); let timer = start_timer!(|| "build pk time..."); @@ -437,7 +443,11 @@ where l0[0] = C::Scalar::one(); let l0 = vk.domain.lagrange_to_coeff(l0); #[cfg(not(feature = "cuda"))] - let l0 = vk.domain.coeff_to_extended(l0); + let l0 = { + let l0_ext = vk.domain.coeff_to_extended(&l0); + drop(l0); + l0_ext + }; // Compute l_blind(X) which evaluates to 1 for each blinding factor row // and 0 otherwise over the domain. @@ -446,14 +456,15 @@ where *evaluation = C::Scalar::one(); } let l_blind = vk.domain.lagrange_to_coeff(l_blind); - let l_blind_extended = vk.domain.coeff_to_extended(l_blind); + let l_blind_extended = vk.domain.coeff_to_extended(&l_blind); + drop(l_blind); // Compute l_last(X) which evaluates to 1 on the first inactive row (just // before the blinding factors) and 0 otherwise over the domain let mut l_last = vk.domain.empty_lagrange(); l_last[params.n as usize - cs.blinding_factors() - 1] = C::Scalar::one(); let l_last = vk.domain.lagrange_to_coeff(l_last); - let l_last_extended = vk.domain.coeff_to_extended(l_last.clone()); + let l_last_extended = vk.domain.coeff_to_extended(&l_last); // Compute l_active_row(X) let one = C::Scalar::one(); diff --git a/halo2_proofs/src/plonk/permutation/keygen.rs b/halo2_proofs/src/plonk/permutation/keygen.rs index 729c8411..13bb9cab 100644 --- a/halo2_proofs/src/plonk/permutation/keygen.rs +++ b/halo2_proofs/src/plonk/permutation/keygen.rs @@ -206,7 +206,7 @@ impl Assembly { #[cfg(not(feature = "cuda"))] let cosets = polys .par_iter() - .map(|poly| domain.coeff_to_extended(poly.clone())) + .map(|poly| domain.coeff_to_extended(poly)) .collect(); ProvingKey { diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index 7331d8c0..18a01aa9 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -147,7 +147,7 @@ fn create_single_instances< #[cfg(not(feature = "cuda"))] let instance_cosets: Vec<_> = instance_polys .iter() - .map(|poly| domain.coeff_to_extended(poly.clone())) + .map(|poly| domain.coeff_to_extended(poly)) .collect(); Ok(InstanceSingle { @@ -578,7 +578,7 @@ pub fn create_proof< #[cfg(not(feature = "cuda"))] let permutation_product_coset = - domain.coeff_to_extended(permutation_product_poly.clone()); + domain.coeff_to_extended(&permutation_product_poly); let permutation_product_commitment = permutation_product_commitment_projective.to_affine(); @@ -643,7 +643,7 @@ pub fn create_proof< #[cfg(not(feature = "cuda"))] let advice_cosets: Vec<_> = advice_polys .iter() - .map(|poly| domain.coeff_to_extended(poly.clone())) + .map(|poly| domain.coeff_to_extended(poly)) .collect(); AdviceSingle:: { @@ -1068,7 +1068,7 @@ pub fn create_proof_from_witness< #[cfg(not(feature = "cuda"))] let permutation_product_coset = - domain.coeff_to_extended(permutation_product_poly.clone()); + domain.coeff_to_extended(&permutation_product_poly); let permutation_product_commitment = permutation_product_commitment_projective.to_affine(); @@ -1133,7 +1133,7 @@ pub fn create_proof_from_witness< #[cfg(not(feature = "cuda"))] let advice_cosets: Vec<_> = advice_polys .iter() - .map(|poly| domain.coeff_to_extended(poly.clone())) + .map(|poly| domain.coeff_to_extended(poly)) .collect(); AdviceSingle:: { diff --git a/halo2_proofs/src/poly/domain.rs b/halo2_proofs/src/poly/domain.rs index a7ecf23f..73bc2c74 100644 --- a/halo2_proofs/src/poly/domain.rs +++ b/halo2_proofs/src/poly/domain.rs @@ -268,19 +268,21 @@ impl EvaluationDomain { /// evaluation domain, rotating by `rotation` if desired. pub fn coeff_to_extended( &self, - mut a: Polynomial, + a: &Polynomial, ) -> Polynomial { assert_eq!(a.values.len(), 1 << self.k); - + // pre allocate memory to avoid vec growing + let mut values = Vec::with_capacity(self.extended_len()); + values.extend_from_slice(&a.values); //let timer = start_timer!(|| format!("prepare {}", self.k)); - self.distribute_powers_zeta(&mut a.values, true); + self.distribute_powers_zeta(&mut values, true); //end_timer!(timer); - a.values.resize(self.extended_len(), G::group_zero()); - best_fft(&mut a.values, self.extended_omega, self.extended_k); + values.resize(self.extended_len(), G::group_zero()); + best_fft(&mut values, self.extended_omega, self.extended_k); Polynomial { - values: a.values, + values, _marker: PhantomData, } }