@@ -31,10 +31,15 @@ bitflags::bitflags! {
31
31
32
32
#[ derive( Debug ) ]
33
33
pub struct Stream {
34
- raw : cuda:: cudaStream_t ,
34
+ pub raw : cuda:: cudaStream_t ,
35
35
}
36
36
37
37
impl Stream {
38
+ // /// Creates a new stream with flags.
39
+ // pub fn new(flags: StreamFlags) -> Self {
40
+ // Self {}
41
+ // }
42
+
38
43
/// Creates a new stream with flags.
39
44
pub fn new ( flags : StreamFlags ) -> CudaResult < Self > {
40
45
let mut stream = MaybeUninit :: uninit ( ) ;
@@ -47,10 +52,11 @@ impl Stream {
47
52
}
48
53
}
49
54
50
- #[ doc( hidden) ]
51
- pub fn launch ( & self , param_buf : * mut c_void ) -> CudaResult < ( ) > {
52
- unsafe { cuda:: cudaLaunchDeviceV2 ( param_buf, self . raw ) . to_result ( ) }
53
- }
55
+ // #[doc(hidden)]
56
+ // pub fn launch(&self, param_buf: *mut c_void) -> CudaResult<()> {
57
+ // unsafe { cuda::cudaLaunchDeviceV2(param_buf, core::ptr::null_mut()).to_result() }
58
+ // // unsafe { cuda::cudaLaunchDeviceV2(param_buf, self.raw).to_result() }
59
+ // }
54
60
}
55
61
56
62
impl Drop for Stream {
@@ -63,13 +69,17 @@ impl Drop for Stream {
63
69
64
70
#[ macro_export]
65
71
macro_rules! launch {
66
- ( $func: ident<<<$grid_dim: expr, $block_dim: expr, $smem_size: expr, $stream: ident>>>( $( $param: expr) ,* $( , ) ?) ) => { {
72
+ // ($func:ident<<<$grid_dim:expr, $block_dim:expr, $smem_size:expr, $stream:ident>>>($($param:expr),* $(,)?)) => {{
73
+ ( $func: ident<<<$grid_dim: expr, $block_dim: expr, ( $smem_size: expr) >>>( $( $param: expr) ,* $( , ) ?) ) => { {
67
74
use $crate:: rt:: ToResult ;
68
75
use $crate:: float:: GpuFloat ;
69
76
let grid_dim = $crate:: rt:: GridSize :: from( $grid_dim) ;
70
77
let block_dim = $crate:: rt:: BlockSize :: from( $block_dim) ;
78
+
79
+ // Get a device buffer for kernel launch.
80
+ let fptr = $func as * const ( ) ;
71
81
let mut buf = $crate:: rt:: sys:: cudaGetParameterBufferV2(
72
- & $func as * const _ as * const :: core:: ffi:: c_void,
82
+ fptr as * const :: core:: ffi:: c_void,
73
83
$crate:: rt:: sys:: dim3 {
74
84
x: grid_dim. x,
75
85
y: grid_dim. y,
@@ -80,24 +90,87 @@ macro_rules! launch {
80
90
y: block_dim. y,
81
91
z: block_dim. z
82
92
} ,
83
- $smem_size
84
- ) as * mut u8 ;
85
- unsafe {
86
- let mut offset = 0 ;
87
- $(
88
- let param = $param;
89
- let size = :: core:: mem:: size_of_val( & param) ;
90
- let mut buf_idx = ( offset as f32 / size as f32 ) . ceil( ) as usize + 1 ;
91
- offset = buf_idx * size;
92
- let ptr = & param as * const _ as * const u8 ;
93
- let dst = buf. add( offset) ;
94
- :: core:: ptr:: copy_nonoverlapping( & param as * const _ as * const u8 , dst, size) ;
95
- ) *
93
+ $smem_size,
94
+ ) ;
95
+
96
+ // Ensure buffer is not a nil ptr.
97
+ if buf. is_null( ) {
98
+ return ;
96
99
}
100
+
101
+ // Load data into buffer.
102
+ let mut offset = 0 ;
103
+ $(
104
+ let param = $param;
105
+ let size = :: core:: mem:: size_of_val( & param) ;
106
+ let param_ptr = & param as * const _ as * const :: core:: ffi:: c_void;
107
+ let dst = buf. add( offset) . copy_from( param_ptr, size) ;
108
+ offset += size;
109
+ ) *
97
110
if false {
98
111
$func( $( $param) ,* ) ;
99
112
}
100
- $stream. launch( buf as * mut :: core:: ffi:: c_void)
113
+ // unsafe {
114
+ // let mut offset = 0;
115
+ // $(
116
+ // let param = $param;
117
+ // let size = ::core::mem::size_of_val(¶m);
118
+ // let mut buf_idx = (offset as f32 / size as f32).ceil() as usize + 1;
119
+ // offset = buf_idx * size;
120
+ // let ptr = ¶m as *const _ as *const u8;
121
+ // let dst = buf.add(offset);
122
+ // ::core::ptr::copy_nonoverlapping(¶m as *const _ as *const u8, dst, size);
123
+ // )*
124
+ // }
125
+ // if false {
126
+ // $func($($param),*);
127
+ // }
128
+
129
+ // Launch the kernel.
130
+ $crate:: rt:: sys:: cudaLaunchDeviceV2( buf as * mut :: core:: ffi:: c_void, :: core:: ptr:: null_mut( ) as * mut _)
131
+
132
+ // let mut buf = $crate::rt::sys::cudaGetParameterBuffer(alignment, size) as *mut u8;
133
+
134
+ // // Populate the buffer with given arguments.
135
+ // let mut offset = 0;
136
+ // $(
137
+ // let param = $param;
138
+ // let size = ::core::mem::size_of_val(¶m);
139
+ // let buf_bytes_ptr = (buf as *mut u8).add(offset);
140
+ // ::core::ptr::copy_nonoverlapping($param as *const _, buf_bytes_ptr.into(), size);
141
+ // offset += size;
142
+ // )*
143
+
144
+ // let mut offset = 0;
145
+ // $(
146
+ // let param = $param;
147
+ // let size = ::core::mem::size_of_val(¶m);
148
+ // let mut buf_idx = (offset as f32 / size as f32).ceil() as usize + 1;
149
+ // offset = buf_idx * size;
150
+ // let ptr = ¶m as *const _ as *const u8;
151
+ // let dst = buf.add(offset);
152
+ // ::core::ptr::copy_nonoverlapping(¶m as *const _ as *const u8, dst, size);
153
+ // )*
154
+
155
+ // // Launch the kernel.
156
+ // let fptr = $func as *const ();
157
+ // $crate::rt::sys::cudaLaunchDevice(
158
+ // fptr as *const ::core::ffi::c_void,
159
+ // buf as *mut ::core::ffi::c_void,
160
+ // $crate::rt::sys::dim3 {
161
+ // x: grid_dim.x,
162
+ // y: grid_dim.y,
163
+ // z: grid_dim.z
164
+ // },
165
+ // $crate::rt::sys::dim3 {
166
+ // x: block_dim.x,
167
+ // y: block_dim.y,
168
+ // z: block_dim.z
169
+ // },
170
+ // $smem_size,
171
+ // ::core::ptr::null_mut() as *mut _,
172
+ // // $stream.raw,
173
+ // )
101
174
} } ;
102
175
}
103
176
0 commit comments