Skip to content
55 changes: 30 additions & 25 deletions caffe2/python/cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from __future__ import print_function
from __future__ import unicode_literals

from caffe2.python import brew
from caffe2.python import brew, workspace
from caffe2.python.model_helper import ModelHelper
from caffe2.proto import caffe2_pb2
import logging
Expand All @@ -17,7 +17,7 @@ class CNNModelHelper(ModelHelper):
"""

def __init__(self, order="NCHW", name=None,
use_cudnn=True, cudnn_exhaustive_search=False,
use_gpu_engine=True, gpu_engine_exhaustive_search=False,
ws_nbytes_limit=None, init_params=True,
skip_sparse_optim=False,
param_model=None):
Expand All @@ -31,8 +31,8 @@ def __init__(self, order="NCHW", name=None,

cnn_arg_scope = {
'order': order,
'use_cudnn': use_cudnn,
'cudnn_exhaustive_search': cudnn_exhaustive_search,
'use_gpu_engine': use_gpu_engine,
'gpu_engine_exhaustive_search': gpu_engine_exhaustive_search,
}
if ws_nbytes_limit:
cnn_arg_scope['ws_nbytes_limit'] = ws_nbytes_limit
Expand All @@ -45,8 +45,8 @@ def __init__(self, order="NCHW", name=None,
)

self.order = order
self.use_cudnn = use_cudnn
self.cudnn_exhaustive_search = cudnn_exhaustive_search
self.use_gpu_engine = use_gpu_engine
self.gpu_engine_exhaustive_search = gpu_engine_exhaustive_search
self.ws_nbytes_limit = ws_nbytes_limit
if self.order != "NHWC" and self.order != "NCHW":
raise ValueError(
Expand Down Expand Up @@ -79,9 +79,9 @@ def ConvNd(self, *args, **kwargs):
return brew.conv_nd(
self,
*args,
use_cudnn=self.use_cudnn,
use_gpu_engine=self.use_gpu_engine,
order=self.order,
cudnn_exhaustive_search=self.cudnn_exhaustive_search,
gpu_engine_exhaustive_search=self.gpu_engine_exhaustive_search,
ws_nbytes_limit=self.ws_nbytes_limit,
**kwargs
)
Expand All @@ -90,9 +90,9 @@ def Conv(self, *args, **kwargs):
return brew.conv(
self,
*args,
use_cudnn=self.use_cudnn,
use_gpu_engine=self.use_gpu_engine,
order=self.order,
cudnn_exhaustive_search=self.cudnn_exhaustive_search,
gpu_engine_exhaustive_search=self.gpu_engine_exhaustive_search,
ws_nbytes_limit=self.ws_nbytes_limit,
**kwargs
)
Expand All @@ -101,9 +101,9 @@ def ConvTranspose(self, *args, **kwargs):
return brew.conv_transpose(
self,
*args,
use_cudnn=self.use_cudnn,
use_gpu_engine=self.use_gpu_engine,
order=self.order,
cudnn_exhaustive_search=self.cudnn_exhaustive_search,
gpu_engine_exhaustive_search=self.gpu_engine_exhaustive_search,
ws_nbytes_limit=self.ws_nbytes_limit,
**kwargs
)
Expand All @@ -112,9 +112,9 @@ def GroupConv(self, *args, **kwargs):
return brew.group_conv(
self,
*args,
use_cudnn=self.use_cudnn,
use_gpu_engine=self.use_gpu_engine,
order=self.order,
cudnn_exhaustive_search=self.cudnn_exhaustive_search,
gpu_engine_exhaustive_search=self.gpu_engine_exhaustive_search,
ws_nbytes_limit=self.ws_nbytes_limit,
**kwargs
)
Expand All @@ -123,9 +123,9 @@ def GroupConv_Deprecated(self, *args, **kwargs):
return brew.group_conv_deprecated(
self,
*args,
use_cudnn=self.use_cudnn,
use_gpu_engine=self.use_gpu_engine,
order=self.order,
cudnn_exhaustive_search=self.cudnn_exhaustive_search,
gpu_engine_exhaustive_search=self.gpu_engine_exhaustive_search,
ws_nbytes_limit=self.ws_nbytes_limit,
**kwargs
)
Expand All @@ -147,16 +147,16 @@ def FC_Sparse(self, *args, **kwargs):

def Dropout(self, *args, **kwargs):
return brew.dropout(
self, *args, order=self.order, use_cudnn=self.use_cudnn, **kwargs
self, *args, order=self.order, use_gpu_engine=self.use_gpu_engine, **kwargs
)

def LRN(self, *args, **kwargs):
return brew.lrn(
self, *args, order=self.order, use_cudnn=self.use_cudnn, **kwargs
self, *args, order=self.order, use_gpu_engine=self.use_gpu_engine, **kwargs
)

def Softmax(self, *args, **kwargs):
return brew.softmax(self, *args, use_cudnn=self.use_cudnn, **kwargs)
return brew.softmax(self, *args, use_gpu_engine=self.use_gpu_engine, **kwargs)

def SpatialBN(self, *args, **kwargs):
return brew.spatial_bn(self, *args, order=self.order, **kwargs)
Expand All @@ -169,7 +169,7 @@ def InstanceNorm(self, *args, **kwargs):

def Relu(self, *args, **kwargs):
return brew.relu(
self, *args, order=self.order, use_cudnn=self.use_cudnn, **kwargs
self, *args, order=self.order, use_gpu_engine=self.use_gpu_engine, **kwargs
)

def PRelu(self, *args, **kwargs):
Expand All @@ -187,7 +187,7 @@ def Sum(self, *args, **kwargs):
return brew.sum(self, *args, **kwargs)

def Transpose(self, *args, **kwargs):
return brew.transpose(self, *args, use_cudnn=self.use_cudnn, **kwargs)
return brew.transpose(self, *args, use_gpu_engine=self.use_gpu_engine, **kwargs)

def Iter(self, *args, **kwargs):
return brew.iter(self, *args, **kwargs)
Expand All @@ -197,15 +197,15 @@ def Accuracy(self, *args, **kwargs):

def MaxPool(self, *args, **kwargs):
return brew.max_pool(
self, *args, use_cudnn=self.use_cudnn, order=self.order, **kwargs
self, *args, use_gpu_engine=self.use_gpu_engine, order=self.order, **kwargs
)

def MaxPoolWithIndex(self, *args, **kwargs):
return brew.max_pool_with_index(self, *args, order=self.order, **kwargs)

def AveragePool(self, *args, **kwargs):
return brew.average_pool(
self, *args, use_cudnn=self.use_cudnn, order=self.order, **kwargs
self, *args, use_gpu_engine=self.use_gpu_engine, order=self.order, **kwargs
)

@property
Expand Down Expand Up @@ -235,6 +235,11 @@ def CPU(self):
@property
def GPU(self, gpu_id=0):
device_option = caffe2_pb2.DeviceOption()
device_option.device_type = caffe2_pb2.CUDA
device_option.cuda_gpu_id = gpu_id
if workspace.has_hip_support:
device_option.device_type = caffe2_pb2.HIP
device_option.hip_gpu_id = gpu_id
else:
device_option.device_type = caffe2_pb2.CUDA
device_option.cuda_gpu_id = gpu_id

return device_option
51 changes: 35 additions & 16 deletions caffe2/python/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def device_option_equal(opt1, opt2, ignore_node_name=True, ignore_random_seed=Tr
if not opt1.device_type or not opt2.device_type:
# At least one option is for CPU, check if both are for CPU.
return not opt1.device_type and not opt2.device_type
return opt1.cuda_gpu_id == opt2.cuda_gpu_id
return (opt1.cuda_gpu_id == opt2.cuda_gpu_id) and (opt1.hip_gpu_id == opt2.hip_gpu_id)


def InferBlobDevices(net):
Expand Down Expand Up @@ -2021,16 +2021,16 @@ def DeduplicateGradientSlices(self, g, aggregator='sum'):
raise ValueError('{} is not supported'.format(aggregator))
return GradientSlice(indices=unique, values=new_g)

def RunAllOnGPU(self, gpu_id=0, use_cudnn=False):
def RunAllOnGPU(self, gpu_id=0, use_gpu_engine=False):
"""A convenient function to run everything on the GPU."""
device_option = caffe2_pb2.DeviceOption()
device_option.device_type = caffe2_pb2.CUDA if workspace.has_gpu_support else caffe2_pb2.HIP
device_option.device_type = caffe2_pb2.HIP if workspace.has_hip_support else caffe2_pb2.CUDA
device_option.cuda_gpu_id = gpu_id
device_option.hip_gpu_id = gpu_id
self._net.device_option.CopyFrom(device_option)
if use_cudnn:
if use_gpu_engine:
for op in self._net.op:
op.engine = "CUDNN"
op.engine = "MIOPEN" if workspace.has_hip_support else 'CUDNN'

def RunAllOnMKL(self):
"""A convenient function to run everything using MKLDNN."""
Expand Down Expand Up @@ -2189,27 +2189,38 @@ def extend_ops(self, new_ops):

def copy_func_between_devices(src, dst):
CPU = caffe2_pb2.CPU
CUDA = caffe2_pb2.CUDA
if workspace.has_hip_support:
GPU = caffe2_pb2.HIP`
else:
GPU = caffe2_pb2.CUDA

if src.device_type == CPU and dst.device_type == CPU:
return None

if src.device_type == CUDA and dst.device_type == CUDA:
if src.cuda_gpu_id == dst.cuda_gpu_id:
return None
if src.device_type == GPU and dst.device_type == GPU:
def fun(net, *args, **kw):
with DeviceScope(dst):
return net.Copy(*args, **kw)

if workspace.has_hip_support:
if src.hip_gpu_id == dst.hip_gpu_id:
return None
else:
return fun
else:
def fun(net, *args, **kw):
with DeviceScope(dst):
return net.Copy(*args, **kw)
return fun
if src.cuda_gpu_id == dst.cuda_gpu_id:
return None
else:
return fun


if src.device_type == CUDA and dst.device_type == CPU:
if src.device_type == GPU and dst.device_type == CPU:
def fun(net, *args, **kw):
with DeviceScope(src):
return net.CopyGPUToCPU(*args, **kw)
return fun

if src.device_type == CPU and dst.device_type == CUDA:
if src.device_type == CPU and dst.device_type == GPU:
def fun(net, *args, **kw):
with DeviceScope(dst):
return net.CopyCPUToGPU(*args, **kw)
Expand All @@ -2224,7 +2235,12 @@ def device_equal(src, dst):
comparison between empty device_options and {device_type:0, cuda_gpu_id:0}
returns not equal in some cases.
'''
return src.device_type == dst.device_type and src.cuda_gpu_id == dst.cuda_gpu_id
if workspace.has_hip_support:
gpu_id_eq = src.hip_gpu_id == dst.hip_gpu_id
else:
gpu_id_eq = src.cuda_gpu_id == dst.cuda_gpu_id

return src.device_type == dst.device_type and gpu_id_eq


def update_placeholder_op_output(op, blob_to_device):
Expand Down Expand Up @@ -2335,10 +2351,13 @@ def InjectCrossDeviceCopies(net, blob_to_device=None, blob_remap=None,
def _gen_new_name(blob, device_option):
CPU = caffe2_pb2.CPU
CUDA = caffe2_pb2.CUDA
HIP = caffe2_pb2.HIP
if device_option.device_type == CPU:
suffix = '_cpu'
elif device_option.device_type == CUDA:
suffix = '_cuda_' + str(device_option.cuda_gpu_id)
elif device_option.device_type == HIP:
suffix = '_hip_' + str(device_option.hip_gpu_id)
else:
raise RuntimeError(
"Unknown device type: {}".
Expand Down
Loading