Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,22 @@ homepage = "https://github.com/Wumpf/wgpu-profiler"
repository = "https://github.com/Wumpf/wgpu-profiler"
keywords = ["graphics"]
license = "MIT OR Apache-2.0"
resolver = "2"

[features]
tracy = ["tracy-client", "profiling/profile-with-tracy"]

[lib]

[dependencies]
tracy-client = { version = "0.15", optional = true }
wgpu = "0.17"

[dev-dependencies]
profiling = { version = "1" }
tracy-client = "0.15"
winit = "0.28"
futures-lite = "1"
#env_logger = "0.8.2"

[patch.crates-io]
tracy-client = { git = "https://github.com/cwfitzgerald/rust_tracy_client.git", rev = "1be35c854a7c22e09063ca25efe4438e606c6b50" }
25 changes: 21 additions & 4 deletions examples/demo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ fn scopes_to_console_recursive(results: &[GpuTimerScopeResult], indentation: u32
}

fn console_output(results: &Option<Vec<GpuTimerScopeResult>>, enabled_features: wgpu::Features) {
profiling::scope!("console_output");
print!("\x1B[2J\x1B[1;1H"); // Clear terminal and put cursor to first row first column
println!("Welcome to wgpu_profiler demo!");
println!();
Expand Down Expand Up @@ -111,7 +112,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
surface.configure(&device, &sc_desc);

// Create a new profiler instance
let mut profiler = GpuProfiler::new(4, queue.get_timestamp_period(), device.features());
let mut profiler = GpuProfiler::new(&adapter, &device, &queue, 4);
let mut latest_profiler_results = None;

event_loop.run(move |event, _, control_flow| {
Expand All @@ -137,11 +138,15 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
window.request_redraw();
}
Event::RedrawRequested(_) => {
profiling::scope!("Redraw Requested");

let frame = surface.get_current_texture().expect("Failed to acquire next surface texture");
let frame_view = frame.texture.create_view(&wgpu::TextureViewDescriptor::default());
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });

wgpu_profiler!("rendering", &mut profiler, &mut encoder, &device, {
profiling::scope!("Rendering");

let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: None,
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
Expand Down Expand Up @@ -188,8 +193,16 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
// Resolves any queries that might be in flight.
profiler.resolve_queries(&mut encoder);

queue.submit(Some(encoder.finish()));
frame.present();
{
profiling::scope!("Submit");
queue.submit(Some(encoder.finish()));
}
{
profiling::scope!("Present");
frame.present();
}

profiling::finish_frame!();

// Signal to the profiler that the frame is finished.
profiler.end_frame().unwrap();
Expand Down Expand Up @@ -228,8 +241,12 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
}

fn main() {
tracy_client::Client::start();
//env_logger::init_from_env(env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "warn"));
let event_loop = EventLoop::new();
let window = winit::window::Window::new(&event_loop).unwrap();
let window = winit::window::WindowBuilder::new()
.with_fullscreen(Some(winit::window::Fullscreen::Borderless(None)))
.build(&event_loop)
.unwrap();
futures_lite::future::block_on(run(event_loop, window));
}
38 changes: 34 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ use std::{convert::TryInto, ops::Range, thread::ThreadId};
pub mod chrometrace;
pub mod macros;
pub mod scope;
#[cfg(feature = "tracy")]
pub mod tracy;

pub struct GpuTimerScopeResult {
pub label: String,
Expand Down Expand Up @@ -107,6 +109,9 @@ pub struct GpuProfiler {

max_num_pending_frames: usize,
timestamp_to_sec: f64,

#[cfg(feature = "tracy")]
tracy_context: tracy_client::GpuContext,
}

// Public interface
Expand All @@ -133,8 +138,10 @@ impl GpuProfiler {
/// (Typical values for `max_num_pending_frames` are 2~4)
///
/// `timestamp_period` needs to be set to the result of [`wgpu::Queue::get_timestamp_period`]
pub fn new(max_num_pending_frames: usize, timestamp_period: f32, active_features: wgpu::Features) -> Self {
pub fn new(_adapter: &wgpu::Adapter, device: &wgpu::Device, queue: &wgpu::Queue, max_num_pending_frames: usize) -> Self {
assert!(max_num_pending_frames > 0);
let active_features = device.features();
let timestamp_period = queue.get_timestamp_period();
GpuProfiler {
enable_pass_timer: active_features.contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES),
enable_encoder_timer: active_features.contains(wgpu::Features::TIMESTAMP_QUERY),
Expand All @@ -154,6 +161,9 @@ impl GpuProfiler {

max_num_pending_frames,
timestamp_to_sec: timestamp_period as f64 / 1000.0 / 1000.0 / 1000.0,

#[cfg(feature = "tracy")]
tracy_context: tracy::create_tracy_gpu_client(_adapter.get_info().backend, device, queue, timestamp_period),
}
}

Expand All @@ -164,6 +174,7 @@ impl GpuProfiler {
/// May create new wgpu query objects (which is why it needs a [`wgpu::Device`] reference)
///
/// See also [`wgpu_profiler!`], [`GpuProfiler::end_scope`]
#[track_caller]
pub fn begin_scope<Recorder: ProfilerCommandRecorder>(&mut self, label: &str, encoder_or_pass: &mut Recorder, device: &wgpu::Device) {
if (encoder_or_pass.is_pass() && self.enable_pass_timer) || (!encoder_or_pass.is_pass() && self.enable_encoder_timer) {
let start_query = self.allocate_query_pair(device);
Expand All @@ -176,12 +187,16 @@ impl GpuProfiler {
let pid = std::process::id();
let tid = std::thread::current().id();

let _location = std::panic::Location::caller();

self.open_scopes.push(UnprocessedTimerScope {
label: String::from(label),
start_query,
nested_scopes: Vec::new(),
pid,
tid,
#[cfg(feature = "tracy")]
tracy_scope: self.tracy_context.span_alloc(label, "", _location.file(), _location.line()).ok(),
});
}
if self.enable_debug_marker {
Expand All @@ -196,11 +211,19 @@ impl GpuProfiler {
/// See also [`wgpu_profiler!`], [`GpuProfiler::begin_scope`]
pub fn end_scope<Recorder: ProfilerCommandRecorder>(&mut self, encoder_or_pass: &mut Recorder) {
if (encoder_or_pass.is_pass() && self.enable_pass_timer) || (!encoder_or_pass.is_pass() && self.enable_encoder_timer) {
let open_scope = self.open_scopes.pop().expect("No profiler GpuProfiler scope was previously opened");
let mut open_scope = self.open_scopes.pop().expect("No profiler GpuProfiler scope was previously opened");
encoder_or_pass.write_timestamp(
&self.active_frame.query_pools[open_scope.start_query.pool_idx as usize].query_set,
open_scope.start_query.query_idx + 1,
);

#[cfg(feature = "tracy")]
if let Some(ref mut tracy_scope) = open_scope.tracy_scope {
tracy_scope.end_zone();
}
#[cfg(not(feature = "tracy"))]
let _ = &mut open_scope;

if let Some(open_parent_scope) = self.open_scopes.last_mut() {
open_parent_scope.nested_scopes.push(open_scope);
} else {
Expand Down Expand Up @@ -329,8 +352,8 @@ impl GpuProfiler {
// Internals
// --------------------------------------------------------------------------------

const QUERY_SIZE: u32 = 8; // Newer wgpu version have QUERY_SIZE
const QUERY_SET_MAX_QUERIES: u32 = 8192; // Newer wgpu version have QUERY_SET_MAX_QUERIES
const QUERY_SIZE: u32 = wgpu::QUERY_SIZE;
const QUERY_SET_MAX_QUERIES: u32 = wgpu::QUERY_SET_MAX_QUERIES;

impl GpuProfiler {
fn reset_and_cache_unused_query_pools(&mut self, mut query_pools: Vec<QueryPool>) {
Expand Down Expand Up @@ -409,6 +432,11 @@ impl GpuProfiler {
.unwrap(),
);

#[cfg(feature = "tracy")]
if let Some(tracy_scope) = scope.tracy_scope {
tracy_scope.upload_timestamp(start_raw as i64, end_raw as i64);
}

GpuTimerScopeResult {
label: scope.label,
time: (start_raw as f64 * timestamp_to_sec)..(end_raw as f64 * timestamp_to_sec),
Expand All @@ -433,6 +461,8 @@ struct UnprocessedTimerScope {
nested_scopes: Vec<UnprocessedTimerScope>,
pub pid: u32,
pub tid: ThreadId,
#[cfg(feature = "tracy")]
tracy_scope: Option<tracy_client::GpuSpan>,
}

/// A pool of queries, consisting of a single queryset & buffer for query results.
Expand Down
13 changes: 13 additions & 0 deletions src/scope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ pub struct ManualOwningScope<'a, W: ProfilerCommandRecorder> {
impl<'a, W: ProfilerCommandRecorder> Scope<'a, W> {
/// Starts a new profiler scope. Scope is closed on drop.
#[must_use]
#[track_caller]
pub fn start(label: &str, profiler: &'a mut GpuProfiler, recorder: &'a mut W, device: &wgpu::Device) -> Self {
profiler.begin_scope(label, recorder, device);
Self { profiler, recorder }
}

/// Starts a scope nested within this one.
#[must_use]
#[track_caller]
pub fn scope(&mut self, label: &str, device: &wgpu::Device) -> Scope<'_, W> {
Scope::start(label, self.profiler, self.recorder, device)
}
Expand All @@ -45,13 +47,15 @@ impl<'a, W: ProfilerCommandRecorder> Scope<'a, W> {
impl<'a, W: ProfilerCommandRecorder> OwningScope<'a, W> {
/// Starts a new profiler scope. Scope is closed on drop.
#[must_use]
#[track_caller]
pub fn start(label: &str, profiler: &'a mut GpuProfiler, mut recorder: W, device: &wgpu::Device) -> Self {
profiler.begin_scope(label, &mut recorder, device);
Self { profiler, recorder }
}

/// Starts a scope nested within this one.
#[must_use]
#[track_caller]
pub fn scope(&mut self, label: &str, device: &wgpu::Device) -> Scope<'_, W> {
Scope::start(label, self.profiler, &mut self.recorder, device)
}
Expand All @@ -60,27 +64,31 @@ impl<'a, W: ProfilerCommandRecorder> OwningScope<'a, W> {
impl<'a, W: ProfilerCommandRecorder> ManualOwningScope<'a, W> {
/// Starts a new profiler scope. Scope is NOT closed on drop and needs to be closed manually with [`ManualOwningScope::end_scope`]
#[must_use]
#[track_caller]
pub fn start(label: &str, profiler: &'a mut GpuProfiler, mut recorder: W, device: &wgpu::Device) -> Self {
profiler.begin_scope(label, &mut recorder, device);
Self { profiler, recorder }
}

/// Starts a scope nested within this one
#[must_use]
#[track_caller]
pub fn scope(&mut self, label: &str, device: &wgpu::Device) -> Scope<'_, W> {
Scope::start(label, self.profiler, &mut self.recorder, device)
}

/// Ends the scope allowing the extraction of owned the ProfilerCommandRecorder
/// and the mutable reference to the GpuProfiler.
#[must_use]
#[track_caller]
pub fn end_scope(mut self) -> (W, &'a mut GpuProfiler) {
self.profiler.end_scope(&mut self.recorder);
(self.recorder, self.profiler)
}
}
impl<'a> Scope<'a, wgpu::CommandEncoder> {
/// Start a render pass wrapped in a OwningScope.
#[track_caller]
pub fn scoped_render_pass<'b>(
&'b mut self,
label: &str,
Expand All @@ -92,6 +100,7 @@ impl<'a> Scope<'a, wgpu::CommandEncoder> {
}

/// Start a compute pass wrapped in a OwningScope.
#[track_caller]
pub fn scoped_compute_pass(
&mut self,
label: &str,
Expand All @@ -105,6 +114,7 @@ impl<'a> Scope<'a, wgpu::CommandEncoder> {

impl<'a> OwningScope<'a, wgpu::CommandEncoder> {
/// Start a render pass wrapped in an OwningScope.
#[track_caller]
pub fn scoped_render_pass<'b>(
&'b mut self,
label: &str,
Expand All @@ -116,6 +126,7 @@ impl<'a> OwningScope<'a, wgpu::CommandEncoder> {
}

/// Start a compute pass wrapped in a OwningScope.
#[track_caller]
pub fn scoped_compute_pass(
&mut self,
label: &str,
Expand All @@ -129,6 +140,7 @@ impl<'a> OwningScope<'a, wgpu::CommandEncoder> {

impl<'a> ManualOwningScope<'a, wgpu::CommandEncoder> {
/// Start a render pass wrapped in an OwningScope.
#[track_caller]
pub fn scoped_render_pass<'b>(
&'b mut self,
label: &str,
Expand All @@ -140,6 +152,7 @@ impl<'a> ManualOwningScope<'a, wgpu::CommandEncoder> {
}

/// Start a compute pass wrapped in an OwningScope.
#[track_caller]
pub fn scoped_compute_pass(
&mut self,
label: &str,
Expand Down
53 changes: 53 additions & 0 deletions src/tracy.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
pub(crate) fn create_tracy_gpu_client(
backend: wgpu::Backend,
device: &wgpu::Device,
queue: &wgpu::Queue,
timestamp_period: f32,
) -> tracy_client::GpuContext {
let query_set = device.create_query_set(&wgpu::QuerySetDescriptor {
label: Some("wgpu-profiler gpu -> cpu sync query_set"),
ty: wgpu::QueryType::Timestamp,
count: 1,
});

let resolve_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("wgpu-profiler gpu -> cpu resolve buffer"),
size: crate::QUERY_SIZE as _,
usage: wgpu::BufferUsages::QUERY_RESOLVE | wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
});

let map_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("wgpu-profiler gpu -> cpu map buffer"),
size: crate::QUERY_SIZE as _,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});

let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("wgpu-profiler gpu -> cpu sync cmd_buf"),
});
encoder.write_timestamp(&query_set, 0);
encoder.resolve_query_set(&query_set, 0..1, &resolve_buffer, 0);
encoder.copy_buffer_to_buffer(&resolve_buffer, 0, &map_buffer, 0, crate::QUERY_SIZE as _);
queue.submit(Some(encoder.finish()));

let _ = map_buffer.slice(..).map_async(wgpu::MapMode::Read, |_| ());
device.poll(wgpu::Maintain::Wait);

let view = map_buffer.slice(..).get_mapped_range();
let timestamp: i64 = i64::from_le_bytes((*view).try_into().unwrap());

let tracy_backend = match backend {
wgpu::Backend::Empty | wgpu::Backend::Metal | wgpu::Backend::BrowserWebGpu => tracy_client::GpuContextType::Invalid,
wgpu::Backend::Vulkan => tracy_client::GpuContextType::Vulkan,
wgpu::Backend::Dx12 => tracy_client::GpuContextType::Direct3D12,
wgpu::Backend::Dx11 => tracy_client::GpuContextType::Direct3D11,
wgpu::Backend::Gl => tracy_client::GpuContextType::OpenGL,
};

tracy_client::Client::running()
.expect("tracy client not running")
.new_gpu_context(Some("wgpu"), tracy_backend, timestamp, timestamp_period)
.unwrap()
}
2 changes: 1 addition & 1 deletion tests/dropped_frame_handling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ async fn handle_dropped_frames_gracefully_async() {
.unwrap();

// max_num_pending_frames is one!
let mut profiler = wgpu_profiler::GpuProfiler::new(1, queue.get_timestamp_period(), device.features());
let mut profiler = wgpu_profiler::GpuProfiler::new(&adapter, &device, &queue, 1);

// Two frames without device poll, causing the profiler to drop a frame on the second round.
for _ in 0..2 {
Expand Down