Skip to content

Commit 4477e34

Browse files
authored
Add Video Capture Support for macOS through AVFoundation/Swift (#821)
* Add Video Capture Support for macOS through AVFoundation/Swift This PR is part of the effort in resolving 814. In 814, the feature request is to add video capture support for Linux, likely through Video4Linux. Due to some limitations Video4Linux will need a compatible USB camera first. This PR, instead tries to resolve the featue requrest on macOS first. On macOS the built-in camera could be accessed through AVFoundation's Swift API. This PR uses Swift to access AVCaptureSession/etc, and exported to C function (`cdecl`) so that it could be used in C++ kernel in tensorflow-io. Since macOS's raw video capture format is NV12 (kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange) additional work is needed to convert NV12 into RGB format, so that a whole pipeline could be built up to allow using video capture for tf.keras' inference. This PR does not resolve the NV12 => RGB yet. Will address in separate PRs. Also, since video capture is technically a continuous stream and is not repeatable, it is not possible to train based on video capture with multiple epochs. Finally, the following is a sample usage which takes video capture and saves as nv12 raw file. The NV12 raw file could be checked by using ffmpeg to convert to JPEG to validate. Note: the following is a validation YUV image could be converted to JPEG with: ``` ffmpeg -s 1280x720 -pix_fmt nv12 -i frame_{i}.yuv frame_{i}.jpg ``` Usage: ``` dataset = tfio.experimental.IODataset.stream().from_video_capture( "device").take(5) i = 0 for frame in dataset: print("Frame {}: shape({}) dtype({}) length({})".format( i, frame.shape, frame.dtype, tf.strings.length(frame))) tf.io.write_file("frame_{}.yuv".format(i), frame) i += 1 ``` Signed-off-by: Yong Tang <[email protected]> * Add Video4Linux V2 support on Linux Signed-off-by: Yong Tang <[email protected]> * Update to use device name in API calls Signed-off-by: Yong Tang <[email protected]> * Fix typo in Windows Signed-off-by: Yong Tang <[email protected]> * Fix test typo Signed-off-by: Yong Tang <[email protected]>
1 parent 812f90f commit 4477e34

File tree

9 files changed

+736
-0
lines changed

9 files changed

+736
-0
lines changed

tensorflow_io/core/BUILD

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ cc_library(
187187

188188
exports_files([
189189
"swift/audio.swift",
190+
"swift/video.swift",
190191
])
191192

192193
cc_library(
@@ -219,6 +220,26 @@ cc_library(
219220
alwayslink = 1,
220221
)
221222

223+
cc_library(
224+
name = "video_ops",
225+
srcs = [
226+
"kernels/video_kernels.cc",
227+
"kernels/video_kernels.h",
228+
"ops/video_ops.cc",
229+
],
230+
copts = tf_io_copts(),
231+
linkstatic = True,
232+
deps = [
233+
"//tensorflow_io/core:dataset_ops",
234+
] + select({
235+
"@bazel_tools//src/conditions:darwin": [
236+
"//tools/build/swift:video_swift",
237+
],
238+
"//conditions:default": [],
239+
}),
240+
alwayslink = 1,
241+
)
242+
222243
cc_library(
223244
name = "ffmpeg_3.4_ops",
224245
srcs = [
@@ -542,6 +563,7 @@ cc_binary(
542563
"//tensorflow_io/core:serialization_ops",
543564
"//tensorflow_io/core:sql_ops",
544565
"//tensorflow_io/core:text_ops",
566+
"//tensorflow_io/core:video_ops",
545567
"@local_config_tf//:libtensorflow_framework",
546568
"@local_config_tf//:tf_header_lib",
547569
] + select({
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#include "tensorflow_io/core/kernels/video_kernels.h"
17+
18+
extern "C" {
19+
#if defined(__APPLE__)
20+
void* VideoCaptureInitFunction(const char* device, int64_t* bytes,
21+
int64_t* width, int64_t* height);
22+
void VideoCaptureNextFunction(void* context, void* data, int64_t size);
23+
void VideoCaptureFiniFunction(void* context);
24+
#elif defined(_MSC_VER)
25+
void* VideoCaptureInitFunction(const char* device, int64_t* bytes,
26+
int64_t* width, int64_t* height) {
27+
return NULL;
28+
}
29+
void VideoCaptureNextFunction(void* context, void* data, int64_t size) {}
30+
void VideoCaptureFiniFunction(void* context) {}
31+
#else
32+
void* VideoCaptureInitFunction(const char* device, int64_t* bytes,
33+
int64_t* width, int64_t* height) {
34+
tensorflow::data::VideoCaptureContext* p =
35+
new tensorflow::data::VideoCaptureContext();
36+
if (p != nullptr) {
37+
tensorflow::Status status = p->Init(device, bytes, width, height);
38+
if (status.ok()) {
39+
return p;
40+
}
41+
LOG(ERROR) << "unable to initialize video capture: " << status;
42+
delete p;
43+
}
44+
return NULL;
45+
}
46+
void VideoCaptureNextFunction(void* context, void* data, int64_t size) {
47+
tensorflow::data::VideoCaptureContext* p =
48+
static_cast<tensorflow::data::VideoCaptureContext*>(context);
49+
if (p != nullptr) {
50+
tensorflow::Status status = p->Read(data, size);
51+
if (!status.ok()) {
52+
LOG(ERROR) << "unable to read video capture: " << status;
53+
}
54+
}
55+
}
56+
void VideoCaptureFiniFunction(void* context) {
57+
tensorflow::data::VideoCaptureContext* p =
58+
static_cast<tensorflow::data::VideoCaptureContext*>(context);
59+
if (p != nullptr) {
60+
delete p;
61+
}
62+
}
63+
#endif
64+
}
65+
namespace tensorflow {
66+
namespace data {
67+
namespace {
68+
69+
class VideoCaptureReadableResource : public ResourceBase {
70+
public:
71+
VideoCaptureReadableResource(Env* env)
72+
: env_(env), context_(nullptr, [](void* p) {
73+
if (p != nullptr) {
74+
VideoCaptureFiniFunction(p);
75+
}
76+
}) {}
77+
~VideoCaptureReadableResource() {}
78+
79+
Status Init(const string& input) {
80+
mutex_lock l(mu_);
81+
82+
int64_t bytes, width, height;
83+
context_.reset(
84+
VideoCaptureInitFunction(input.c_str(), &bytes, &width, &height));
85+
if (context_.get() == nullptr) {
86+
return errors::InvalidArgument("unable to open device ", input);
87+
}
88+
bytes_ = static_cast<int64>(bytes);
89+
width_ = static_cast<int64>(width);
90+
height_ = static_cast<int64>(height);
91+
return Status::OK();
92+
}
93+
Status Read(
94+
std::function<Status(const TensorShape& shape, Tensor** value_tensor)>
95+
allocate_func) {
96+
mutex_lock l(mu_);
97+
98+
Tensor* value_tensor;
99+
TF_RETURN_IF_ERROR(allocate_func(TensorShape({1}), &value_tensor));
100+
101+
string buffer;
102+
buffer.resize(bytes_);
103+
VideoCaptureNextFunction(context_.get(), (void*)&buffer[0],
104+
static_cast<int64_t>(bytes_));
105+
value_tensor->flat<string>()(0) = buffer;
106+
107+
return Status::OK();
108+
}
109+
string DebugString() const override {
110+
mutex_lock l(mu_);
111+
return "VideoCaptureReadableResource";
112+
}
113+
114+
protected:
115+
mutable mutex mu_;
116+
Env* env_ GUARDED_BY(mu_);
117+
118+
std::unique_ptr<void, void (*)(void*)> context_;
119+
int64 bytes_;
120+
int64 width_;
121+
int64 height_;
122+
};
123+
124+
class VideoCaptureReadableInitOp
125+
: public ResourceOpKernel<VideoCaptureReadableResource> {
126+
public:
127+
explicit VideoCaptureReadableInitOp(OpKernelConstruction* context)
128+
: ResourceOpKernel<VideoCaptureReadableResource>(context) {
129+
env_ = context->env();
130+
}
131+
132+
private:
133+
void Compute(OpKernelContext* context) override {
134+
ResourceOpKernel<VideoCaptureReadableResource>::Compute(context);
135+
136+
const Tensor* input_tensor;
137+
OP_REQUIRES_OK(context, context->input("input", &input_tensor));
138+
const string& input = input_tensor->scalar<string>()();
139+
140+
OP_REQUIRES_OK(context, resource_->Init(input));
141+
}
142+
Status CreateResource(VideoCaptureReadableResource** resource)
143+
EXCLUSIVE_LOCKS_REQUIRED(mu_) override {
144+
*resource = new VideoCaptureReadableResource(env_);
145+
return Status::OK();
146+
}
147+
148+
private:
149+
mutable mutex mu_;
150+
Env* env_ GUARDED_BY(mu_);
151+
};
152+
153+
class VideoCaptureReadableReadOp : public OpKernel {
154+
public:
155+
explicit VideoCaptureReadableReadOp(OpKernelConstruction* context)
156+
: OpKernel(context) {
157+
env_ = context->env();
158+
}
159+
160+
void Compute(OpKernelContext* context) override {
161+
VideoCaptureReadableResource* resource;
162+
OP_REQUIRES_OK(context,
163+
GetResourceFromContext(context, "input", &resource));
164+
core::ScopedUnref unref(resource);
165+
166+
OP_REQUIRES_OK(
167+
context, resource->Read([&](const TensorShape& shape,
168+
Tensor** value_tensor) -> Status {
169+
TF_RETURN_IF_ERROR(context->allocate_output(0, shape, value_tensor));
170+
return Status::OK();
171+
}));
172+
}
173+
174+
private:
175+
mutable mutex mu_;
176+
Env* env_ GUARDED_BY(mu_);
177+
};
178+
REGISTER_KERNEL_BUILDER(Name("IO>VideoCaptureReadableInit").Device(DEVICE_CPU),
179+
VideoCaptureReadableInitOp);
180+
REGISTER_KERNEL_BUILDER(Name("IO>VideoCaptureReadableRead").Device(DEVICE_CPU),
181+
VideoCaptureReadableReadOp);
182+
183+
} // namespace
184+
} // namespace data
185+
} // namespace tensorflow

0 commit comments

Comments
 (0)