Skip to content

Commit 3bcdb16

Browse files
authored
Add decode_nv12 to allow convert nv12 to rgb (#874)
* Add decode_nv12 to allow convert nv12 to rgb This PR adds support of decode_nv12 decode_nv12 to allow convert nv12 to rgb. This is useful for Video Capture and MP4 decoding on macOS, as macOS's default pixel format is NV12 (hardware-accelerated) This PR fixes 825 Signed-off-by: Yong Tang <[email protected]> * Add decode_yuy2 (YUYV/YUY2/YUYV422) support This PR adds support of decode_yuy2 to allow convert yuy2 to rgb. This is useful for Video Capture in Linux (Video4Linux2) as the default format is YUYV. This PR fixes #825 Signed-off-by: Yong Tang <[email protected]>
1 parent e0266b9 commit 3bcdb16

File tree

14 files changed

+277
-1
lines changed

14 files changed

+277
-1
lines changed

WORKSPACE

+8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
workspace(name = "org_tensorflow_io")
22

33
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
4+
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
45
load("//third_party/toolchains/tf:tf_configure.bzl", "tf_configure")
56

67
tf_configure(name = "local_config_tf")
@@ -110,6 +111,13 @@ http_archive(
110111
],
111112
)
112113

114+
new_git_repository(
115+
name = "libyuv",
116+
build_file = "//third_party:libyuv.BUILD",
117+
commit = "7f00d67d7c279f13b73d3be9c2d85873a7e2fbaf",
118+
remote = "https://chromium.googlesource.com/libyuv/libyuv",
119+
)
120+
113121
http_archive(
114122
name = "libgeotiff",
115123
build_file = "//third_party:libgeotiff.BUILD",

tensorflow_io/core/BUILD

+3
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,12 @@ cc_library(
163163
"kernels/image_font_kernels.cc",
164164
"kernels/image_hdr_kernels.cc",
165165
"kernels/image_jpeg_kernels.cc",
166+
"kernels/image_nv12_kernels.cc",
166167
"kernels/image_openexr_kernels.cc",
167168
"kernels/image_pnm_kernels.cc",
168169
"kernels/image_tiff_kernels.cc",
169170
"kernels/image_webp_kernels.cc",
171+
"kernels/image_yuy2_kernels.cc",
170172
"ops/image_ops.cc",
171173
],
172174
copts = tf_io_copts(),
@@ -180,6 +182,7 @@ cc_library(
180182
"@libgeotiff",
181183
"@libtiff",
182184
"@libwebp",
185+
"@libyuv",
183186
"@openexr",
184187
"@stb",
185188
],
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#include "tensorflow/core/framework/op_kernel.h"
17+
#include "tensorflow_io/core/kernels/io_stream.h"
18+
19+
#include "libyuv/convert_argb.h"
20+
21+
namespace tensorflow {
22+
namespace io {
23+
namespace {
24+
25+
class DecodeNV12Op : public OpKernel {
26+
public:
27+
explicit DecodeNV12Op(OpKernelConstruction* context) : OpKernel(context) {
28+
env_ = context->env();
29+
}
30+
31+
void Compute(OpKernelContext* context) override {
32+
const Tensor* input_tensor;
33+
OP_REQUIRES_OK(context, context->input("input", &input_tensor));
34+
35+
const Tensor* size_tensor;
36+
OP_REQUIRES_OK(context, context->input("size", &size_tensor));
37+
38+
const tstring& input = input_tensor->scalar<tstring>()();
39+
40+
int64 channels = 3;
41+
int64 height = size_tensor->flat<int32>()(0);
42+
int64 width = size_tensor->flat<int32>()(1);
43+
44+
Tensor* image_tensor = nullptr;
45+
OP_REQUIRES_OK(
46+
context, context->allocate_output(
47+
0, TensorShape({height, width, channels}), &image_tensor));
48+
uint8* rgb = image_tensor->flat<uint8>().data();
49+
50+
uint8* y = (uint8*)&input[0];
51+
uint8* uv = (uint8*)&input[width * height];
52+
uint32 y_stride = width;
53+
uint32 uv_stride = width;
54+
uint32 rgb_stride = width * 3;
55+
int status = libyuv::NV12ToRAW(y, y_stride, uv, uv_stride, rgb, rgb_stride,
56+
width, height);
57+
OP_REQUIRES(
58+
context, (status == 0),
59+
errors::InvalidArgument("unable to convert nv12 to rgb: ", status));
60+
}
61+
62+
private:
63+
mutex mu_;
64+
Env* env_ GUARDED_BY(mu_);
65+
};
66+
REGISTER_KERNEL_BUILDER(Name("IO>DecodeNV12").Device(DEVICE_CPU), DecodeNV12Op);
67+
68+
} // namespace
69+
} // namespace io
70+
} // namespace tensorflow
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#include "tensorflow/core/framework/op_kernel.h"
17+
#include "tensorflow_io/core/kernels/io_stream.h"
18+
19+
#include "libyuv/convert_argb.h"
20+
#include "libyuv/convert_from_argb.h"
21+
22+
namespace tensorflow {
23+
namespace io {
24+
namespace {
25+
26+
class DecodeYUY2Op : public OpKernel {
27+
public:
28+
explicit DecodeYUY2Op(OpKernelConstruction* context) : OpKernel(context) {
29+
env_ = context->env();
30+
}
31+
32+
void Compute(OpKernelContext* context) override {
33+
const Tensor* input_tensor;
34+
OP_REQUIRES_OK(context, context->input("input", &input_tensor));
35+
36+
const Tensor* size_tensor;
37+
OP_REQUIRES_OK(context, context->input("size", &size_tensor));
38+
39+
const tstring& input = input_tensor->scalar<tstring>()();
40+
41+
int64 channels = 3;
42+
int64 height = size_tensor->flat<int32>()(0);
43+
int64 width = size_tensor->flat<int32>()(1);
44+
45+
Tensor* image_tensor = nullptr;
46+
OP_REQUIRES_OK(
47+
context, context->allocate_output(
48+
0, TensorShape({height, width, channels}), &image_tensor));
49+
50+
string buffer;
51+
buffer.resize(width * height * 4);
52+
uint8* argb = (uint8*)&buffer[0];
53+
uint8* yuy2 = (uint8*)&input[0];
54+
uint32 yuy2_stride = width * 2;
55+
uint32 argb_stride = width * 4;
56+
int status =
57+
libyuv::YUY2ToARGB(yuy2, yuy2_stride, argb, argb_stride, width, height);
58+
OP_REQUIRES(
59+
context, (status == 0),
60+
errors::InvalidArgument("unable to convert yuy2 to argb: ", status));
61+
62+
uint8* rgb = image_tensor->flat<uint8>().data();
63+
uint32 rgb_stride = width * 3;
64+
status =
65+
libyuv::ARGBToRAW(argb, argb_stride, rgb, rgb_stride, width, height);
66+
OP_REQUIRES(
67+
context, (status == 0),
68+
errors::InvalidArgument("unable to convert argb to rgb: ", status));
69+
}
70+
71+
private:
72+
mutex mu_;
73+
Env* env_ GUARDED_BY(mu_);
74+
};
75+
REGISTER_KERNEL_BUILDER(Name("IO>DecodeYUY2").Device(DEVICE_CPU), DecodeYUY2Op);
76+
77+
} // namespace
78+
} // namespace io
79+
} // namespace tensorflow

tensorflow_io/core/ops/image_ops.cc

+22
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,28 @@ REGISTER_OP("IO>DecodeDICOMData")
172172
loads a dicom file and returns the specified tags values as string.
173173
)doc");
174174

175+
REGISTER_OP("IO>DecodeNV12")
176+
.Input("input: string")
177+
.Input("size: int32")
178+
.Output("image: uint8")
179+
.SetShapeFn([](shape_inference::InferenceContext* c) {
180+
shape_inference::ShapeHandle unused;
181+
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
182+
c->set_output(0, c->MakeShape({c->UnknownDim(), c->UnknownDim(), 3}));
183+
return Status::OK();
184+
});
185+
186+
REGISTER_OP("IO>DecodeYUY2")
187+
.Input("input: string")
188+
.Input("size: int32")
189+
.Output("image: uint8")
190+
.SetShapeFn([](shape_inference::InferenceContext* c) {
191+
shape_inference::ShapeHandle unused;
192+
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
193+
c->set_output(0, c->MakeShape({c->UnknownDim(), c->UnknownDim(), 3}));
194+
return Status::OK();
195+
});
196+
175197
} // namespace
176198
} // namespace io
177199
} // namespace tensorflow

tensorflow_io/core/python/api/experimental/image.py

+2
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,6 @@
2323
decode_exr,
2424
decode_pnm,
2525
decode_hdr,
26+
decode_nv12,
27+
decode_yuy2,
2628
)

tensorflow_io/core/python/experimental/image_ops.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def decode_pnm(contents, dtype=tf.uint8, name=None):
137137

138138
def decode_hdr(contents, name=None):
139139
"""
140-
Decode a HDR-encoded image to a uint8 tensor.
140+
Decode a HDR-encoded image to a tf.float tensor.
141141
142142
Args:
143143
contents: A `Tensor` of type `string`. 0-D. The HDR-encoded image.
@@ -147,3 +147,35 @@ def decode_hdr(contents, name=None):
147147
A `Tensor` of type `float` and shape of `[height, width, 3]` (RGB).
148148
"""
149149
return core_ops.io_decode_hdr(contents, name=name)
150+
151+
152+
def decode_nv12(contents, size, name=None):
153+
"""
154+
Decode a NV12-encoded image to a uint8 tensor.
155+
156+
Args:
157+
contents: A `Tensor` of type `string`. 0-D. The NV12-encoded image.
158+
size: A 1-D int32 Tensor of 2 elements: height, width. The size
159+
for the images.
160+
name: A name for the operation (optional).
161+
162+
Returns:
163+
A `Tensor` of type `uint8` and shape of `[height, width, 3]` (RGB).
164+
"""
165+
return core_ops.io_decode_nv12(contents, size=size, name=name)
166+
167+
168+
def decode_yuy2(contents, size, name=None):
169+
"""
170+
Decode a YUY2-encoded image to a uint8 tensor.
171+
172+
Args:
173+
contents: A `Tensor` of type `string`. 0-D. The YUY2-encoded image.
174+
size: A 1-D int32 Tensor of 2 elements: height, width. The size
175+
for the images.
176+
name: A name for the operation (optional).
177+
178+
Returns:
179+
A `Tensor` of type `uint8` and shape of `[height, width, 3]` (RGB).
180+
"""
181+
return core_ops.io_decode_yuy2(contents, size=size, name=name)

tests/test_image/Jelly-Beans.nv12

+1
Large diffs are not rendered by default.

tests/test_image/Jelly-Beans.nv12.png

65.5 KB
Loading

tests/test_image/Jelly-Beans.tiff

192 KB
Binary file not shown.

tests/test_image/Jelly-Beans.yuy2

+1
Large diffs are not rendered by default.

tests/test_image/Jelly-Beans.yuy2.png

73.5 KB
Loading

tests/test_image_eager.py

+34
Original file line numberDiff line numberDiff line change
@@ -318,5 +318,39 @@ def test_decode_tiff_geotiff():
318318
assert np.all(png_image.numpy() == image.numpy())
319319

320320

321+
def test_decode_nv12():
322+
"""Test case for decode_nv12"""
323+
filename = os.path.join(
324+
os.path.dirname(os.path.abspath(__file__)), "test_image", "Jelly-Beans.nv12"
325+
)
326+
png_filename = os.path.join(
327+
os.path.dirname(os.path.abspath(__file__)), "test_image", "Jelly-Beans.nv12.png"
328+
)
329+
png = tf.image.decode_png(tf.io.read_file(png_filename))
330+
331+
contents = tf.io.read_file(filename)
332+
rgb = tfio.experimental.image.decode_nv12(contents, size=[256, 256])
333+
assert rgb.dtype == tf.uint8
334+
assert rgb.shape == [256, 256, 3]
335+
assert np.all(rgb == png)
336+
337+
338+
def test_decode_yuy2():
339+
"""Test case for decode_yuy2"""
340+
filename = os.path.join(
341+
os.path.dirname(os.path.abspath(__file__)), "test_image", "Jelly-Beans.yuy2"
342+
)
343+
png_filename = os.path.join(
344+
os.path.dirname(os.path.abspath(__file__)), "test_image", "Jelly-Beans.yuy2.png"
345+
)
346+
png = tf.image.decode_png(tf.io.read_file(png_filename))
347+
348+
contents = tf.io.read_file(filename)
349+
rgb = tfio.experimental.image.decode_yuy2(contents, size=[256, 256])
350+
assert rgb.dtype == tf.uint8
351+
assert rgb.shape == [256, 256, 3]
352+
assert np.all(rgb == png)
353+
354+
321355
if __name__ == "__main__":
322356
test.main()

third_party/libyuv.BUILD

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Description:
2+
# libyuv library from Chromium
3+
4+
licenses(["notice"])
5+
6+
exports_files(["LICENSE"])
7+
8+
cc_library(
9+
name = "libyuv",
10+
srcs = glob([
11+
"include/libyuv/*.h",
12+
"source/row_*.cc",
13+
"source/scale_*.cc",
14+
]) + [
15+
"source/convert_argb.cc",
16+
"source/convert_from_argb.cc",
17+
"source/cpu_id.cc",
18+
"source/planar_functions.cc",
19+
],
20+
includes = [
21+
"include",
22+
],
23+
visibility = ["//visibility:public"],
24+
)

0 commit comments

Comments
 (0)