diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index c6728182c..0306a8c06 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -187,6 +187,7 @@ cc_library( exports_files([ "swift/audio.swift", + "swift/video.swift", ]) cc_library( @@ -219,6 +220,26 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "video_ops", + srcs = [ + "kernels/video_kernels.cc", + "kernels/video_kernels.h", + "ops/video_ops.cc", + ], + copts = tf_io_copts(), + linkstatic = True, + deps = [ + "//tensorflow_io/core:dataset_ops", + ] + select({ + "@bazel_tools//src/conditions:darwin": [ + "//tools/build/swift:video_swift", + ], + "//conditions:default": [], + }), + alwayslink = 1, +) + cc_library( name = "ffmpeg_3.4_ops", srcs = [ @@ -542,6 +563,7 @@ cc_binary( "//tensorflow_io/core:serialization_ops", "//tensorflow_io/core:sql_ops", "//tensorflow_io/core:text_ops", + "//tensorflow_io/core:video_ops", "@local_config_tf//:libtensorflow_framework", "@local_config_tf//:tf_header_lib", ] + select({ diff --git a/tensorflow_io/core/kernels/video_kernels.cc b/tensorflow_io/core/kernels/video_kernels.cc new file mode 100644 index 000000000..725461768 --- /dev/null +++ b/tensorflow_io/core/kernels/video_kernels.cc @@ -0,0 +1,185 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow_io/core/kernels/video_kernels.h" + +extern "C" { +#if defined(__APPLE__) +void* VideoCaptureInitFunction(const char* device, int64_t* bytes, + int64_t* width, int64_t* height); +void VideoCaptureNextFunction(void* context, void* data, int64_t size); +void VideoCaptureFiniFunction(void* context); +#elif defined(_MSC_VER) +void* VideoCaptureInitFunction(const char* device, int64_t* bytes, + int64_t* width, int64_t* height) { + return NULL; +} +void VideoCaptureNextFunction(void* context, void* data, int64_t size) {} +void VideoCaptureFiniFunction(void* context) {} +#else +void* VideoCaptureInitFunction(const char* device, int64_t* bytes, + int64_t* width, int64_t* height) { + tensorflow::data::VideoCaptureContext* p = + new tensorflow::data::VideoCaptureContext(); + if (p != nullptr) { + tensorflow::Status status = p->Init(device, bytes, width, height); + if (status.ok()) { + return p; + } + LOG(ERROR) << "unable to initialize video capture: " << status; + delete p; + } + return NULL; +} +void VideoCaptureNextFunction(void* context, void* data, int64_t size) { + tensorflow::data::VideoCaptureContext* p = + static_cast(context); + if (p != nullptr) { + tensorflow::Status status = p->Read(data, size); + if (!status.ok()) { + LOG(ERROR) << "unable to read video capture: " << status; + } + } +} +void VideoCaptureFiniFunction(void* context) { + tensorflow::data::VideoCaptureContext* p = + static_cast(context); + if (p != nullptr) { + delete p; + } +} +#endif +} +namespace tensorflow { +namespace data { +namespace { + +class VideoCaptureReadableResource : public ResourceBase { + public: + VideoCaptureReadableResource(Env* env) + : env_(env), context_(nullptr, [](void* p) { + if (p != nullptr) { + VideoCaptureFiniFunction(p); + } + }) {} + ~VideoCaptureReadableResource() {} + + Status Init(const string& input) { + mutex_lock l(mu_); + + int64_t bytes, width, height; + context_.reset( + VideoCaptureInitFunction(input.c_str(), &bytes, &width, &height)); + if (context_.get() == nullptr) { + return errors::InvalidArgument("unable to open device ", input); + } + bytes_ = static_cast(bytes); + width_ = static_cast(width); + height_ = static_cast(height); + return Status::OK(); + } + Status Read( + std::function + allocate_func) { + mutex_lock l(mu_); + + Tensor* value_tensor; + TF_RETURN_IF_ERROR(allocate_func(TensorShape({1}), &value_tensor)); + + string buffer; + buffer.resize(bytes_); + VideoCaptureNextFunction(context_.get(), (void*)&buffer[0], + static_cast(bytes_)); + value_tensor->flat()(0) = buffer; + + return Status::OK(); + } + string DebugString() const override { + mutex_lock l(mu_); + return "VideoCaptureReadableResource"; + } + + protected: + mutable mutex mu_; + Env* env_ GUARDED_BY(mu_); + + std::unique_ptr context_; + int64 bytes_; + int64 width_; + int64 height_; +}; + +class VideoCaptureReadableInitOp + : public ResourceOpKernel { + public: + explicit VideoCaptureReadableInitOp(OpKernelConstruction* context) + : ResourceOpKernel(context) { + env_ = context->env(); + } + + private: + void Compute(OpKernelContext* context) override { + ResourceOpKernel::Compute(context); + + const Tensor* input_tensor; + OP_REQUIRES_OK(context, context->input("input", &input_tensor)); + const string& input = input_tensor->scalar()(); + + OP_REQUIRES_OK(context, resource_->Init(input)); + } + Status CreateResource(VideoCaptureReadableResource** resource) + EXCLUSIVE_LOCKS_REQUIRED(mu_) override { + *resource = new VideoCaptureReadableResource(env_); + return Status::OK(); + } + + private: + mutable mutex mu_; + Env* env_ GUARDED_BY(mu_); +}; + +class VideoCaptureReadableReadOp : public OpKernel { + public: + explicit VideoCaptureReadableReadOp(OpKernelConstruction* context) + : OpKernel(context) { + env_ = context->env(); + } + + void Compute(OpKernelContext* context) override { + VideoCaptureReadableResource* resource; + OP_REQUIRES_OK(context, + GetResourceFromContext(context, "input", &resource)); + core::ScopedUnref unref(resource); + + OP_REQUIRES_OK( + context, resource->Read([&](const TensorShape& shape, + Tensor** value_tensor) -> Status { + TF_RETURN_IF_ERROR(context->allocate_output(0, shape, value_tensor)); + return Status::OK(); + })); + } + + private: + mutable mutex mu_; + Env* env_ GUARDED_BY(mu_); +}; +REGISTER_KERNEL_BUILDER(Name("IO>VideoCaptureReadableInit").Device(DEVICE_CPU), + VideoCaptureReadableInitOp); +REGISTER_KERNEL_BUILDER(Name("IO>VideoCaptureReadableRead").Device(DEVICE_CPU), + VideoCaptureReadableReadOp); + +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tensorflow_io/core/kernels/video_kernels.h b/tensorflow_io/core/kernels/video_kernels.h new file mode 100644 index 000000000..a3e02a3b4 --- /dev/null +++ b/tensorflow_io/core/kernels/video_kernels.h @@ -0,0 +1,183 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/framework/resource_op_kernel.h" + +#if defined(__linux__) + +#include +#include +#include +#include + +#include + +static int xioctl(int fh, int request, void* arg) { + int r; + + do { + r = ioctl(fh, request, arg); + } while (-1 == r && EINTR == errno); + + return r; +} +namespace tensorflow { +namespace data { + +class VideoCaptureContext { + public: + VideoCaptureContext() + : context_(nullptr, + [](void* p) { + if (p != nullptr) { + free(p); + } + }), + fd_scope_(nullptr, [](int* p) { + if (p != nullptr) { + close(*p); + } + }) {} + ~VideoCaptureContext() {} + + Status Init(const string& device, int64_t* bytes, int64_t* width, + int64_t* height) { + device_ = device; + + const char* devname = device.c_str(); + struct stat st; + if (-1 == stat(devname, &st)) { + return errors::InvalidArgument("cannot identify '", devname, "': ", errno, + ", ", strerror(errno)); + } + + if (!S_ISCHR(st.st_mode)) { + return errors::InvalidArgument(devname, " is no device"); + } + + fd_ = open(devname, O_RDWR /* required */ | O_NONBLOCK, 0); + if (-1 == fd_) { + return errors::InvalidArgument("cannot open '", devname, "': ", errno, + ", ", strerror(errno)); + } + fd_scope_.reset(&fd_); + + struct v4l2_capability cap; + if (-1 == xioctl(fd_, VIDIOC_QUERYCAP, &cap)) { + if (EINVAL == errno) { + return errors::InvalidArgument(devname, " is no V4L2 device"); + } else { + return errors::InvalidArgument("cannot VIDIOC_QUERYCAP '", devname, + "': ", errno, ", ", strerror(errno)); + } + } + + if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) { + return errors::InvalidArgument(devname, " is no video capture device"); + } + + if (!(cap.capabilities & V4L2_CAP_READWRITE)) { + return errors::InvalidArgument(devname, " does not support read i/o"); + } + + struct v4l2_format fmt; + memset(&(fmt), 0, sizeof(fmt)); + fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + if (-1 == xioctl(fd_, VIDIOC_G_FMT, &fmt)) { + return errors::InvalidArgument("cannot VIDIOC_G_FMT '", devname, + "': ", errno, ", ", strerror(errno)); + } + + /* Buggy driver paranoia. */ + { + unsigned int min; + min = fmt.fmt.pix.width * 2; + if (fmt.fmt.pix.bytesperline < min) { + fmt.fmt.pix.bytesperline = min; + } + min = fmt.fmt.pix.bytesperline * fmt.fmt.pix.height; + if (fmt.fmt.pix.sizeimage < min) { + fmt.fmt.pix.sizeimage = min; + } + } + + if (fmt.fmt.pix.pixelformat != V4L2_PIX_FMT_YUYV) { + return errors::InvalidArgument( + "only V4L2_PIX_FMT_YUYV is supported, received ", + fmt.fmt.pix.pixelformat); + } + + *bytes = fmt.fmt.pix.sizeimage; + *width = fmt.fmt.pix.width; + *height = fmt.fmt.pix.height; + + return Status::OK(); + } + Status Read(void* data, size_t size) { + do { + fd_set fds; + struct timeval tv; + int r; + + FD_ZERO(&fds); + FD_SET(fd_, &fds); + + /* Timeout. */ + tv.tv_sec = 2; + tv.tv_usec = 0; + r = select(fd_ + 1, &fds, NULL, NULL, &tv); + + if (-1 == r) { + if (EINTR == errno) { + continue; + } + return errors::InvalidArgument("cannot select: ", errno, ", ", + strerror(errno)); + } + if (0 == r) { + return errors::InvalidArgument("select timeout"); + } + + if (-1 == read(fd_, data, size)) { + if (EAGAIN == errno) { + /* EAGAIN - continue select loop. */ + continue; + } + if (EIO == errno) { + /* Could ignore EIO, see spec. */ + /* fall through */ + } + return errors::InvalidArgument("cannot read: ", errno, ", ", + strerror(errno)); + } + // Data Obtained, break + break; + } while (true); + return Status::OK(); + } + + protected: + mutable mutex mu_; + + std::unique_ptr context_; + std::unique_ptr fd_scope_; + string device_; + int fd_; +}; + +} // namespace data +} // namespace tensorflow +#endif diff --git a/tensorflow_io/core/ops/video_ops.cc b/tensorflow_io/core/ops/video_ops.cc new file mode 100644 index 000000000..3e5539956 --- /dev/null +++ b/tensorflow_io/core/ops/video_ops.cc @@ -0,0 +1,45 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +namespace io { +namespace { + +REGISTER_OP("IO>VideoCaptureReadableInit") + .Input("input: string") + .Output("resource: resource") + .Attr("container: string = ''") + .Attr("shared_name: string = ''") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Scalar()); + return Status::OK(); + }); + +REGISTER_OP("IO>VideoCaptureReadableRead") + .Input("input: resource") + .Input("index: int64") + .Output("value: string") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->MakeShape({c->UnknownDim()})); + return Status::OK(); + }); + +} // namespace +} // namespace io +} // namespace tensorflow diff --git a/tensorflow_io/core/python/experimental/io_dataset_ops.py b/tensorflow_io/core/python/experimental/io_dataset_ops.py index a694509f6..5786e363e 100644 --- a/tensorflow_io/core/python/experimental/io_dataset_ops.py +++ b/tensorflow_io/core/python/experimental/io_dataset_ops.py @@ -28,6 +28,7 @@ from tensorflow_io.core.python.experimental import file_dataset_ops from tensorflow_io.core.python.experimental import numpy_dataset_ops from tensorflow_io.core.python.experimental import sql_dataset_ops +from tensorflow_io.core.python.experimental import video_dataset_ops class IODataset(io_dataset.IODataset): """IODataset""" @@ -269,6 +270,21 @@ def to_file(cls, class StreamIODataset(tf.data.Dataset): """StreamIODataset""" + @classmethod + def from_video_capture(cls, device, **kwargs): + """Creates an `StreamIODataset` from video capture device. + + Args: + device: A string, the name of the device. + name: A name prefix for the IODataset (optional). + + Returns: + A `IODataset`. + """ + with tf.name_scope(kwargs.get("name", "IOFromVideoCapture")): + return video_dataset_ops.VideoCaptureIODataset( + device, internal=True) + @classmethod def from_prometheus_scrape(cls, metric, diff --git a/tensorflow_io/core/python/experimental/video_dataset_ops.py b/tensorflow_io/core/python/experimental/video_dataset_ops.py new file mode 100644 index 000000000..403c3d55d --- /dev/null +++ b/tensorflow_io/core/python/experimental/video_dataset_ops.py @@ -0,0 +1,54 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""VideoCaptureDataset""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow_io.core.python.ops import core_ops + +class VideoCaptureIODataset(tf.data.Dataset): + """VideoCaptureIODataset""" + + def __init__(self, + device, + internal=True): + """VideoCaptureIODataset""" + with tf.name_scope("VideoCaptureIODataset"): + assert internal + + resource = core_ops.io_video_capture_readable_init(device) + + self._resource = resource + + dataset = tf.data.experimental.Counter() + dataset = dataset.map( + lambda i: core_ops.io_video_capture_readable_read(self._resource, i)) + dataset = dataset.apply( + tf.data.experimental.take_while( + lambda v: tf.greater(tf.shape(v)[0], 0))) + dataset = dataset.unbatch() + + self._dataset = dataset + super(VideoCaptureIODataset, self).__init__( + self._dataset._variant_tensor) # pylint: disable=protected-access + + def _inputs(self): + return [] + + @property + def element_spec(self): + return self._dataset.element_spec diff --git a/tensorflow_io/core/swift/video.swift b/tensorflow_io/core/swift/video.swift new file mode 100644 index 000000000..100cf1b48 --- /dev/null +++ b/tensorflow_io/core/swift/video.swift @@ -0,0 +1,163 @@ +import AVFoundation + +class VideoDataOutputSampleBufferDelegate : NSObject, AVCaptureVideoDataOutputSampleBufferDelegate { + + var bytes: Int64 + var width: Int64 + var height: Int64 + var copied: Int64 + var buffer: UnsafeMutableRawPointer? + var semaphore_in: DispatchSemaphore + var semaphore_out: DispatchSemaphore + + init(semaphore_in: DispatchSemaphore, semaphore_out: DispatchSemaphore) { + self.bytes = 0 + self.width = 0 + self.height = 0 + self.copied = 0 + self.buffer = nil + self.semaphore_in = semaphore_in + self.semaphore_out = semaphore_out + super.init() + } + + deinit { + // TODO: This is not invoked, memory leak? + print("VideoDataOutputSampleBufferDelegate.deinit") + } + + func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { + + print("frame dropped: \(sampleBuffer)") + } + + func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { + + semaphore_in.wait() + + defer { semaphore_out.signal() } + + if sampleBuffer.numSamples != 1 { + print("number of samples \(sampleBuffer.numSamples) is not supported") + return + } + + let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) + + let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer!) + let planeCount = CVPixelBufferGetPlaneCount(pixelBuffer!) + + if pixelFormat != kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange || planeCount != 2 { + print("PixelFormat \(pixelFormat) or PlaneCount \(planeCount) is not supported") + return + } + + let bytes = Int64(CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer!, 0) * CVPixelBufferGetHeightOfPlane(pixelBuffer!, 0) + CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer!, 1) * CVPixelBufferGetHeightOfPlane(pixelBuffer!, 1)) + let width = Int64(CVPixelBufferGetWidth(pixelBuffer!)) + let height = Int64(CVPixelBufferGetHeight(pixelBuffer!)) + + if (self.bytes == 0 || self.bytes == 0 || self.height == 0) { + self.bytes = bytes + self.width = width + self.height = height + } else if (self.bytes != bytes || self.width != width || self.height != height) { + print("Bytes \(bytes) vs. \(self.bytes), Width \(width) vs. \(self.width), Height \(height) vs. \(self.height)") + return + } + if (self.buffer != nil) { + CVPixelBufferLockBaseAddress(pixelBuffer!, CVPixelBufferLockFlags(rawValue: 0)) + + let baseAddress0 = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer!, 0) + let bytesPerRow0 = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer!, 0) + let heightOfPlane0 = CVPixelBufferGetHeightOfPlane(pixelBuffer!, 0) + self.buffer!.copyMemory(from: baseAddress0!, byteCount: bytesPerRow0 * heightOfPlane0) + + let baseAddress1 = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer!, 1) + let bytesPerRow1 = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer!, 1) + let heightOfPlane1 = CVPixelBufferGetHeightOfPlane(pixelBuffer!, 1) + + self.buffer!.advanced(by: bytesPerRow0 * Int(height)).copyMemory(from: baseAddress1!, byteCount: bytesPerRow1 * heightOfPlane1) + + CVPixelBufferUnlockBaseAddress(pixelBuffer!, CVPixelBufferLockFlags(rawValue: 0)) + + self.copied = Int64(bytesPerRow0 * heightOfPlane0 + bytesPerRow1 * heightOfPlane1) + } + } +} + +typealias VideoContext = (session: AVCaptureSession, semaphore_in: DispatchSemaphore, semaphore_out: DispatchSemaphore, delegate: VideoDataOutputSampleBufferDelegate) + +@_silgen_name("VideoCaptureInitFunction") +func VideoCaptureInitFunction(devname: UnsafePointer, bytes: UnsafeMutablePointer, width: UnsafeMutablePointer, height: UnsafeMutablePointer) -> UnsafeMutablePointer? { + + let deviceName = String(cString: devname) + + let session = AVCaptureSession() + let semaphore_in = DispatchSemaphore(value: 0) + let semaphore_out = DispatchSemaphore(value: 0) + let sampleBufferDelegate = VideoDataOutputSampleBufferDelegate(semaphore_in: semaphore_in, semaphore_out: semaphore_out) + + do { + let device = AVCaptureDevice.default(for: .video) + let deviceInput = try AVCaptureDeviceInput(device: device!) + + session.addInput(deviceInput) + } catch { + return nil + } + + let queue = DispatchQueue(label: "VideoDataOutput", attributes: []) + let output = AVCaptureVideoDataOutput() + output.videoSettings = [:] + output.alwaysDiscardsLateVideoFrames = true + output.setSampleBufferDelegate(sampleBufferDelegate, queue: queue) + + session.addOutput(output) + session.commitConfiguration() + session.startRunning() + + // Obtain the first frame to get the information + semaphore_in.signal() + semaphore_out.wait() + + if (sampleBufferDelegate.bytes == 0 || sampleBufferDelegate.width == 0 || sampleBufferDelegate.height == 0) { + return nil + } + bytes.pointee = sampleBufferDelegate.bytes + width.pointee = sampleBufferDelegate.width + height.pointee = sampleBufferDelegate.height + + let context = UnsafeMutablePointer.allocate(capacity: 1) + context.initialize(to: (session: session, semaphore_in: semaphore_in, semaphore_out: semaphore_out, delegate: sampleBufferDelegate)) + + return context +} + +@_silgen_name("VideoCaptureNextFunction") +func VideoCaptureNextFunction(context: UnsafeMutablePointer, data: UnsafeMutableRawPointer, size: Int64) -> Void { + if context != nil { + if (size < context.pointee.delegate.bytes) { + print("not enough buffer to copy: \(size) vs. \(context.pointee.delegate.bytes)") + return + } + context.pointee.delegate.buffer = data + context.pointee.delegate.copied = 0 + context.pointee.semaphore_in.signal() + context.pointee.semaphore_out.wait() + context.pointee.delegate.buffer = nil + if context.pointee.delegate.copied != context.pointee.delegate.bytes { + print("not enough buffer copied: \(context.pointee.delegate.copied) vs. \(context.pointee.delegate.bytes)") + } + context.pointee.delegate.copied = 0 + return + } +} + +@_silgen_name("VideoCaptureFiniFunction") +func VideoCaptureFiniFunction(context: UnsafeMutablePointer) -> Void { + if context != nil { + context.pointee.session.stopRunning() + context.deinitialize(count: 1) + context.deallocate() + } +} diff --git a/tests/test_io_dataset_eager.py b/tests/test_io_dataset_eager.py index 6b6cc6fc6..3ab3e78ac 100644 --- a/tests/test_io_dataset_eager.py +++ b/tests/test_io_dataset_eager.py @@ -805,6 +805,44 @@ def func(q): return args, func, expected +# video capture stream never repeat so +# we only test basic operation only. +@pytest.fixture(name="video_capture") +def fixture_video_capture(): + """fixture_video_capture + # Note: on Linux v4l2loopback is used, and the following is needed: + # gst-launch-1.0 videotestsrc ! v4l2sink device=/dev/video0 + # otherwise fmt will not work with + # $ v4l2-ctl -d /dev/video0 -V + # VIDIOC_G_FMT: failed: Invalid argument + # Note: the following is a validation + # YUV image could be converted to JPEG with: + # macOS: ffmpeg -s 1280x720 -pix_fmt nv12 -i frame_{i}.yuv frame_{i}.jpg + # Linux: ffmpeg -s 320x240 -pix_fmt yuyv422 -i frame_{i}.yuv frame_{i}.jpg + dataset = tfio.experimental.IODataset.stream().from_video_capture( + "/dev/video0").take(5) + i = 0 + for frame in dataset: + print("Frame {}: shape({}) dtype({}) length({})".format( + i, frame.shape, frame.dtype, tf.strings.length(frame))) + tf.io.write_file("frame_{}.yuv".format(i), frame) + i += 1 + """ + + args = "/dev/video0" + def func(q): + dataset = tfio.experimental.IODataset.stream().from_video_capture( + q) + dataset = dataset.map(tf.strings.length) + dataset = dataset.take(10) + return dataset + # macOS (NV12): 1382400 = (1280 + 1280 / 2) * 720 + # Linux (YUYV): 153600 = 320 * 240 * 2 + value = 1382400 if sys.platform == "darwin" else 153600 + expected = [value for _ in range(10)] + + return args, func, expected + # This test make sure dataset works in tf.keras inference. # The requirement for tf.keras inference is the support of `iter()`: # entries = [e for e in dataset] @@ -868,6 +906,14 @@ def func(q): reason="TODO PostgreSQL not tested on macOS/Windows"), ], ), + pytest.param( + "video_capture", + marks=[ + pytest.mark.skipif( + os.environ.get("TEST_VIDEO_CAPTURE", "") != "true", + reason="Video capture not enabled"), + ], + ), ], ids=[ "mnist", @@ -892,6 +938,7 @@ def func(q): "kafka[avro]", "kafka[stream]", "sql", + "capture[video]", ], ) def test_io_dataset_basic(fixture_lookup, io_dataset_fixture): @@ -966,6 +1013,14 @@ def test_io_dataset_basic(fixture_lookup, io_dataset_fixture): reason="TODO PostgreSQL not tested on macOS/Windows"), ], ), + pytest.param( + "video_capture", + marks=[ + pytest.mark.skipif( + os.environ.get("TEST_VIDEO_CAPTURE", "") != "true", + reason="Video capture not enabled"), + ], + ), ], ids=[ "mnist", @@ -988,6 +1043,7 @@ def test_io_dataset_basic(fixture_lookup, io_dataset_fixture): "kafka[avro]", "kafka[stream]", "sql", + "capture[video]", ], ) def test_io_dataset_basic_operation(fixture_lookup, io_dataset_fixture): diff --git a/tools/build/swift/BUILD b/tools/build/swift/BUILD index c65ee403a..31b7d9427 100644 --- a/tools/build/swift/BUILD +++ b/tools/build/swift/BUILD @@ -13,3 +13,15 @@ swift_library( module_name = "audio", alwayslink = True, ) + +swift_library( + name = "video_swift", + srcs = [ + "//tensorflow_io/core:swift/video.swift", + ], + linkopts = [ + "-L/usr/lib/swift", + ], + module_name = "video", + alwayslink = True, +)