Skip to content

Add support for float16 #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions js/common/lib/tensor-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP = new Map<string, SupportedTypedArra
['uint8', Uint8Array],
['int8', Int8Array],
['uint16', Uint16Array],
['float16', Uint16Array],
['int16', Int16Array],
['int32', Int32Array],
['bool', Uint8Array],
Expand Down
4 changes: 2 additions & 2 deletions js/common/lib/tensor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export declare namespace Tensor {
int64: BigInt64Array;
string: string[];
bool: Uint8Array;
float16: never; // hold on using Uint16Array before we have a concrete solution for float 16
float16: Uint16Array; // Keep using Uint16Array until we have a concrete solution for float 16.
float64: Float64Array;
uint32: Uint32Array;
uint64: BigUint64Array;
Expand All @@ -54,7 +54,7 @@ export declare namespace Tensor {
int64: bigint;
string: string;
bool: boolean;
float16: never; // hold on before we have a concret solution for float 16
float16: number; // Keep using Uint16Array until we have a concrete solution for float 16.
float64: number;
uint32: number;
uint64: bigint;
Expand Down
2 changes: 2 additions & 0 deletions js/web/lib/onnxjs/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,8 @@ export class ProtoUtil {
return onnx.TensorProto.DataType.INT32;
case 'uint32':
return onnx.TensorProto.DataType.UINT32;
case 'float16':
return onnx.TensorProto.DataType.FLOAT16;
case 'float32':
return onnx.TensorProto.DataType.FLOAT;
case 'float64':
Expand Down
4 changes: 4 additions & 0 deletions js/web/lib/wasm/wasm-common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ export const tensorDataTypeStringToEnum = (type: string): DataType => {
return DataType.int32;
case 'uint32':
return DataType.uint32;
case 'float16':
return DataType.float16;
case 'float32':
return DataType.float;
case 'float64':
Expand Down Expand Up @@ -80,6 +82,8 @@ export const tensorDataTypeEnumToString = (typeProto: DataType): Tensor.Type =>
return 'int32';
case DataType.uint32:
return 'uint32';
case DataType.float16:
return 'uint16';
case DataType.float:
return 'float32';
case DataType.double:
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/providers/webnn/builders/helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,5 +103,9 @@ std::vector<std::vector<NodeIndex>> GetSupportedNodes(const GraphViewer& graph_v
return supported_node_groups;
}

bool IsSupportedDataType(int32_t data_type) {
return std::find(supported_data_types.begin(), supported_data_types.end(), data_type) != supported_data_types.end();
}

} // namespace webnn
} // namespace onnxruntime
7 changes: 7 additions & 0 deletions onnxruntime/core/providers/webnn/builders/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,12 @@ inline bool CheckSingleOp(const std::string& op_type, const emscripten::val& wnn
return op_map.find(op_type) != op_map.end() && wnn_builder_[op_map.find(op_type)->second].as<bool>();
}

constexpr std::array<ONNX_NAMESPACE::TensorProto_DataType, 2> supported_data_types = {
ONNX_NAMESPACE::TensorProto_DataType_FLOAT16,
ONNX_NAMESPACE::TensorProto_DataType_FLOAT,
};

bool IsSupportedDataType(int32_t data_type);

} // namespace webnn
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const logging::Logg
if (!GetType(input, input_type, logger))
return false;

if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
if (!IsSupportedDataType(input_type)) {
LOGS(logger, VERBOSE) << "[" << node.OpType()
<< "] Input type: [" << input_type
<< "] is not supported for now";
Expand Down
16 changes: 13 additions & 3 deletions onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ Status AddInitializerInNewLayout(ModelBuilder& model_builder,
bool is_conv) {
const auto& tensor = *model_builder.GetInitializerTensors().at(name);
auto data_type = tensor.data_type();
if (data_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
if (!IsSupportedDataType(data_type)) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"The initializer of graph has unsupported type, name: ",
tensor.name(), " type: ", data_type);
Expand All @@ -122,7 +122,17 @@ Status AddInitializerInNewLayout(ModelBuilder& model_builder,

SafeInt<size_t> num_elements = SafeInt<size_t>(Product(dest_shape));

size_t element_size = 4;
size_t element_size{0};
switch (data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
element_size = sizeof(uint16_t);
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
element_size = sizeof(float);
break;
default:
break;
}
std::unique_ptr<uint8_t[]> buffer_holder(new uint8_t[element_size * num_elements]);
uint8_t* buffer = buffer_holder.get();

Expand Down Expand Up @@ -156,7 +166,7 @@ Status AddInitializerInNewLayout(ModelBuilder& model_builder,
}
}
ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(name, buffer, num_elements * element_size,
dest_shape, 4));
dest_shape, data_type));
return Status::OK();
}

Expand Down
73 changes: 55 additions & 18 deletions onnxruntime/core/providers/webnn/builders/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,23 @@ Status Model::Predict(const InlinedHashMap<std::string, OnnxTensorData>& inputs,
for (const auto& input : inputs) {
const std::string& name = input.first;
const struct OnnxTensorData tensor = input.second;
if (tensor.tensor_info.data_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"The input of graph has unsupported type, name: ",
name, " type: ", tensor.tensor_info.data_type);
}
auto num_elements = SafeInt<size_t>(Product(tensor.tensor_info.shape));
emscripten::val view{emscripten::typed_memory_view(num_elements, static_cast<const float*>(tensor.buffer))};
emscripten::val view = emscripten::val::undefined();
switch (tensor.tensor_info.data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
view = emscripten::val{emscripten::typed_memory_view(num_elements,
static_cast<const uint16_t*>(tensor.buffer))};
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
view = emscripten::val{emscripten::typed_memory_view(num_elements,
static_cast<const float*>(tensor.buffer))};
break;
default:
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"The input of graph has unsupported type, name: ",
name, " type: ", tensor.tensor_info.data_type);
}

#ifdef ENABLE_WEBASSEMBLY_THREADS
// Copy the inputs from Wasm SharedArrayBuffer to the pre-allocated ArrayBuffers.
wnn_inputs_[name].call<void>("set", view);
Expand All @@ -55,13 +65,23 @@ Status Model::Predict(const InlinedHashMap<std::string, OnnxTensorData>& inputs,
for (const auto& output : outputs) {
const std::string& name = output.first;
const struct OnnxTensorData tensor = output.second;
if (tensor.tensor_info.data_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"The input of graph has unsupported type, name: ",
name, " type: ", tensor.tensor_info.data_type);
}
auto num_elements = SafeInt<size_t>(Product(tensor.tensor_info.shape));
emscripten::val view{emscripten::typed_memory_view(num_elements, static_cast<const float*>(tensor.buffer))};
emscripten::val view = emscripten::val::undefined();
switch (tensor.tensor_info.data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
view = emscripten::val{emscripten::typed_memory_view(num_elements,
static_cast<const uint16_t*>(tensor.buffer))};
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
view = emscripten::val{emscripten::typed_memory_view(num_elements,
static_cast<const float*>(tensor.buffer))};
break;
default:
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"The output of graph has unsupported type, name: ",
name, " type: ", tensor.tensor_info.data_type);
}

#ifdef ENABLE_WEBASSEMBLY_THREADS
output_views.insert({name, view});
#else
Expand Down Expand Up @@ -102,16 +122,33 @@ void Model::AllocateInputOutputBuffers() {
for (const auto& input : inputs_) {
const auto& input_info = input_output_info_.at(input);
const auto input_shape = input_info.shape;
const auto num_elements = SafeInt<size_t>(Product(input_shape));
wnn_inputs_.set(input,
emscripten::val::global("Float32Array").new_(static_cast<const int>(num_elements)));
const int32_t num_elements = SafeInt<int32_t>(Product(input_shape));
const auto data_type = input_info.data_type;
switch (data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
wnn_inputs_.set(input, emscripten::val::global("Uint16Array").new_(num_elements));
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
wnn_inputs_.set(input, emscripten::val::global("Float32Array").new_(num_elements));
break;
default:
break;
Copy link

@fdwr fdwr May 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd bail here on an unsupported data type rather than silently continuing, probably calling ORT_THROW, unless it's really okay to ignore that output. If not here, then there are some other places that throwing make sense if they fall into the default clause.

}
}
for (const auto& output : outputs_) {
const auto& output_info = input_output_info_.at(output);
const auto output_shape = output_info.shape;
const auto num_elements = SafeInt<size_t>(Product(output_shape));
wnn_outputs_.set(output,
emscripten::val::global("Float32Array").new_(static_cast<const int>(num_elements)));
const int32_t num_elements = SafeInt<int32_t>(Product(output_shape));
const auto data_type = output_info.data_type;
switch (data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
wnn_outputs_.set(output, emscripten::val::global("Uint16Array").new_(num_elements));
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
wnn_outputs_.set(output, emscripten::val::global("Float32Array").new_(num_elements));
default:
break;
}
}
}

Expand Down
43 changes: 36 additions & 7 deletions onnxruntime/core/providers/webnn/builders/model_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,26 @@ Status ModelBuilder::RegisterInitializers() {
desc.set("dimensions", emscripten::val::array(dims));
auto data_type = tensor.data_type();
emscripten::val operand = emscripten::val::object();
if (data_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
if (IsSupportedDataType(data_type)) {
unpacked_tensors_.push_back({});
std::vector<uint8_t>& unpacked_tensor = unpacked_tensors_.back();
ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
auto num_elements = SafeInt<size_t>(Product(tensor.dims()));
desc.set("type", emscripten::val("float32"));
emscripten::val view{emscripten::typed_memory_view(num_elements,
reinterpret_cast<float*>(unpacked_tensor.data()))};
emscripten::val view = emscripten::val::undefined();
switch (data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
desc.set("type", emscripten::val("float16"));
view = emscripten::val{emscripten::typed_memory_view(num_elements,
reinterpret_cast<uint16_t*>(unpacked_tensor.data()))};
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
desc.set("type", emscripten::val("float32"));
view = emscripten::val{emscripten::typed_memory_view(num_elements,
reinterpret_cast<float*>(unpacked_tensor.data()))};
break;
default:
break;
}
#ifdef ENABLE_WEBASSEMBLY_THREADS
// Workaround for WebAssembly multi-threads enabled since WebNN API only accepts non-shared ArrayBufferView.
// https://www.w3.org/TR/webnn/#typedefdef-mlnamedarraybufferviews
Expand Down Expand Up @@ -191,6 +203,9 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i

data_type = type_proto->tensor_type().elem_type();
switch (data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
desc.set("type", emscripten::val("float16"));
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
desc.set("type", emscripten::val("float32"));
break;
Expand Down Expand Up @@ -244,14 +259,28 @@ Status ModelBuilder::AddOperations() {

Status ModelBuilder::AddOperandFromPersistMemoryBuffer(
const std::string& name, const void* buffer, const size_t size,
const std::vector<uint32_t> shape, const size_t element_size) {
const std::vector<uint32_t> shape, const int32_t data_type) {
auto persist_buffer = std::make_unique<uint8_t[]>(size);
uint8_t* dest = persist_buffer.get();
memcpy(dest, buffer, size);
emscripten::val view{emscripten::typed_memory_view(size / element_size, reinterpret_cast<const float*>(dest))};
emscripten::val view = emscripten::val::undefined();
emscripten::val desc = emscripten::val::object();
switch (data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
view = emscripten::val{emscripten::typed_memory_view(size / sizeof(uint16_t),
reinterpret_cast<const uint16_t*>(dest))};
desc.set("type", emscripten::val("float16"));
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
view = emscripten::val{emscripten::typed_memory_view(size / sizeof(float),
reinterpret_cast<const float*>(dest))};
desc.set("type", emscripten::val("float32"));
break;
default:
break;
}

desc.set("dimensions", emscripten::val::array(shape));
desc.set("type", emscripten::val("float32"));
emscripten::val operand = emscripten::val::object();
#ifdef ENABLE_WEBASSEMBLY_THREADS
// Workaround for WebAssembly multi-threads enabled since WebNN API only accepts non-shared ArrayBufferView.
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/webnn/builders/model_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class ModelBuilder {
// Add a constant operand (allocate persist buffer and move the ownership to mem_persist_buffers_).
Status AddOperandFromPersistMemoryBuffer(
const std::string& name, const void* buffer,
const size_t size, const std::vector<uint32_t> shape, const size_t element_size = 4);
const size_t size, const std::vector<uint32_t> shape, const int32_t data_type);
// Find if an output has a fuseable activation (e.g., Relu).
emscripten::val FindActivation(const Node& node, const NodeArg& output,
const InlinedHashSet<std::string> supported_nodes = {});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ common::Status WebNNExecutionProvider::Compile(const std::vector<FusedNodeAndGra

void* output_buffer;
switch (output_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
output_buffer = output_tensor.GetTensorMutableRawData();
break;
Expand Down