diff --git a/models/object_detection_nanodet/LICENSE b/models/object_detection_nanodet/LICENSE
new file mode 100644
index 00000000..d6456956
--- /dev/null
+++ b/models/object_detection_nanodet/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/models/object_detection_nanodet/NanodetPlus.py b/models/object_detection_nanodet/NanodetPlus.py
new file mode 100644
index 00000000..84544e22
--- /dev/null
+++ b/models/object_detection_nanodet/NanodetPlus.py
@@ -0,0 +1,138 @@
+import cv2
+import numpy as np
+
+class NanoDet():
+ def __init__(self, modelPath, prob_threshold=0.35, iou_threshold=0.6):
+ with open('coco.names', 'rt') as f:
+ self.classes = f.read().rstrip('\n').split('\n')
+
+ self.num_classes = len(self.classes)
+ self.strides = (8, 16, 32, 64)
+ self.image_shape = (416, 416)
+ self.reg_max = 7
+ self.prob_threshold = prob_threshold
+ self.iou_threshold = iou_threshold
+ self.project = np.arange(self.reg_max + 1)
+ self.mean = np.array([103.53, 116.28, 123.675], dtype=np.float32).reshape(1, 1, 3)
+ self.std = np.array([57.375, 57.12, 58.395], dtype=np.float32).reshape(1, 1, 3)
+ self.net = cv2.dnn.readNet(modelPath)
+
+ self.anchors_mlvl = []
+ for i in range(len(self.strides)):
+ featmap_size = (int(self.image_shape[0] / self.strides[i]), int(self.image_shape[1] / self.strides[i]))
+ stride = self.strides[i]
+ feat_h, feat_w = featmap_size
+ shift_x = np.arange(0, feat_w) * stride
+ shift_y = np.arange(0, feat_h) * stride
+ xv, yv = np.meshgrid(shift_x, shift_y)
+ xv = xv.flatten()
+ yv = yv.flatten()
+ cx = xv + 0.5 * (stride-1)
+ cy = yv + 0.5 * (stride - 1)
+ #anchors = np.stack((cx, cy), axis=-1)
+ anchors = np.column_stack((cx, cy))
+ self.anchors_mlvl.append(anchors)
+
+ def softmax_func(self,x, axis=0):
+ x_exp = np.exp(x)
+ x_sum = np.sum(x_exp, axis=axis, keepdims=True)
+ s = x_exp / x_sum
+ return s
+
+ def pre_process(self, img):
+ img = img.astype(np.float32)
+ img = (img - self.mean) / self.std
+ blob = cv2.dnn.blobFromImage(img)
+ return blob
+
+ def infer(self, srcimg, keep_ratio=True):
+ top, left, newh, neww = 0, 0, self.image_shape[0], self.image_shape[1]
+ if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
+ hw_scale = srcimg.shape[0] / srcimg.shape[1]
+ if hw_scale > 1:
+ newh, neww = self.image_shape[0], int(self.image_shape[1] / hw_scale)
+ img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
+ left = int((self.image_shape[1] - neww) * 0.5)
+ img = cv2.copyMakeBorder(img, 0, 0, left, self.image_shape[1] - neww - left, cv2.BORDER_CONSTANT,
+ value=0) # add border
+ else:
+ newh, neww = int(self.image_shape[0] * hw_scale), self.image_shape[1]
+ img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
+ top = int((self.image_shape[0] - newh) * 0.5)
+ img = cv2.copyMakeBorder(img, top, self.image_shape[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0)
+ else:
+ img = cv2.resize(srcimg, self.image_shape, interpolation=cv2.INTER_AREA)
+
+ blob = self.pre_process(img)
+ self.net.setInput(blob)
+ outs = self.net.forward(self.net.getUnconnectedOutLayersNames())
+ det_bboxes, det_conf, det_classid = self.post_process(outs)
+ ratioh,ratiow = srcimg.shape[0]/newh,srcimg.shape[1]/neww
+
+ return left, top, ratioh, ratiow, det_bboxes, det_conf, det_classid
+
+
+ def post_process(self, preds):
+ cls_scores, bbox_preds = preds[::2], preds[1::2]
+ rescale = False
+ scale_factor = 1
+ bboxes_mlvl = []
+ scores_mlvl = []
+ for stride, cls_score, bbox_pred, anchors in zip(self.strides, cls_scores, bbox_preds, self.anchors_mlvl):
+ if cls_score.ndim==3:
+ cls_score = cls_score.squeeze(axis=0)
+ if bbox_pred.ndim==3:
+ bbox_pred = bbox_pred.squeeze(axis=0)
+ bbox_pred = self.softmax_func(bbox_pred.reshape(-1, self.reg_max + 1), axis=1)
+ bbox_pred = np.dot(bbox_pred, self.project).reshape(-1,4)
+ bbox_pred *= stride
+
+ nms_pre = 1000
+ if nms_pre > 0 and cls_score.shape[0] > nms_pre:
+ max_scores = cls_score.max(axis=1)
+ topk_inds = max_scores.argsort()[::-1][0:nms_pre]
+ anchors = anchors[topk_inds, :]
+ bbox_pred = bbox_pred[topk_inds, :]
+ cls_score = cls_score[topk_inds, :]
+
+ points = anchors
+ distance = bbox_pred
+ max_shape=self.image_shape
+ x1 = points[:, 0] - distance[:, 0]
+ y1 = points[:, 1] - distance[:, 1]
+ x2 = points[:, 0] + distance[:, 2]
+ y2 = points[:, 1] + distance[:, 3]
+
+ if max_shape is not None:
+ x1 = np.clip(x1, 0, max_shape[1])
+ y1 = np.clip(y1, 0, max_shape[0])
+ x2 = np.clip(x2, 0, max_shape[1])
+ y2 = np.clip(y2, 0, max_shape[0])
+
+ #bboxes = np.stack([x1, y1, x2, y2], axis=-1)
+ bboxes = np.column_stack([x1, y1, x2, y2])
+ bboxes_mlvl.append(bboxes)
+ scores_mlvl.append(cls_score)
+
+ bboxes_mlvl = np.concatenate(bboxes_mlvl, axis=0)
+ if rescale:
+ bboxes_mlvl /= scale_factor
+ scores_mlvl = np.concatenate(scores_mlvl, axis=0)
+ bboxes_wh = bboxes_mlvl.copy()
+ bboxes_wh[:, 2:4] = bboxes_wh[:, 2:4] - bboxes_wh[:, 0:2]
+ classIds = np.argmax(scores_mlvl, axis=1)
+ confidences = np.max(scores_mlvl, axis=1)
+
+ indices = cv2.dnn.NMSBoxes(bboxes_wh.tolist(), confidences.tolist(), self.prob_threshold, self.iou_threshold)
+
+ if len(indices)>0:
+ det_bboxes = bboxes_mlvl[indices[:]]
+ det_conf = confidences[indices[:]]
+ det_classid = classIds[indices[:]]
+
+ else:
+ det_bboxes = np.array([])
+ det_conf = np.array([])
+ det_classid = np.array([])
+
+ return det_bboxes.astype(np.float32), det_conf, det_classid
diff --git a/models/object_detection_nanodet/README.md b/models/object_detection_nanodet/README.md
new file mode 100644
index 00000000..c78f506e
--- /dev/null
+++ b/models/object_detection_nanodet/README.md
@@ -0,0 +1,140 @@
+# Nanodet
+
+Nanodet: NanoDet is a FCOS-style one-stage anchor-free object detection model which using Generalized Focal Loss as classification and regression loss.In NanoDet-Plus, we propose a novel label assignment strategy with a simple assign guidance module (AGM) and a dynamic soft label assigner (DSLA) to solve the optimal label assignment problem in lightweight model training.
+
+#### Model metrics:
+Average Precision and Recall values observed for COCO dataset classes are showed below
+
+
+Average Precision | Average Recall |
+
+
+| area | IoU | Average Precision(AP) |
+|:-------|:------|:------------------------|
+| all | 0.50:0.95 | 0.304 |
+| all | 0.50 | 0.459 |
+| all | 0.75 | 0.317 |
+| small | 0.50:0.95 | 0.107 |
+| medium | 0.50:0.95 | 0.322 |
+| large | 0.50:0.95 | 0.478 |
+
+ |
+
+ area | IoU | Average Recall |
+|:-------|:------|:----------------|
+| all | 0.50:0.95 | 0.278 |
+| all | 0.50:0.95 | 0.434 |
+| all | 0.50:0.95 | 0.462 |
+| small | 0.50:0.95 | 0.198 |
+| medium | 0.50:0.95 | 0.510 |
+| large | 0.50:0.95 | 0.702 |
+ |
+
+
+## Demo
+
+Run the following command to try the demo:
+```shell
+# Nanodet inference on image input
+python demo.py --model /path/to/model/ --input_type image --image_path /path/to/image/
+
+# Nanodet inference on video input
+python demo.py --model /path/to/model/ --input_type video
+
+#Saving outputs
+#Image output
+python demo.py --model /path/to/model/ --input_type image --image_path /path/to/image/ --save True
+
+#Video output
+python demo.py --model /path/to/model/ --input_type video --save True
+```
+Note:
+- By default input_type: image
+- image result saved as "result.jpg"
+- webcam result saved as "Webcam_result.mp4"
+
+
+## Results
+
+Here are some of the sample results that were observed using the model,
+
+
+
+
+
+
+Video inference result,
+
+
+
+
+
+## License
+
+All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
+
+
+## Reference
+
+- Nanodet: https://zhuanlan.zhihu.com/p/306530300
+- Nanodet Plus: https://zhuanlan.zhihu.com/p/449912627
+- Nanodet weight and scripts for training: https://github.com/RangiLyu/nanodet
+
+
+#### Note:
+
+- This version of nanodet: Nanodet-m-plus-1.5x_416
+- The model was trained on COCO 2017 dataset, link to dataset: https://cocodataset.org/#download
+- Below, we have results of COCO data inference
+
+| class | AP50 | mAP | class | AP50 | mAP |
+|:--------------|:-------|:------|:---------------|:-------|:------|
+| person | 67.5 | 41.8 | bicycle | 35.4 | 18.8 |
+| car | 45.0 | 25.4 | motorcycle | 58.9 | 33.1 |
+| airplane | 77.3 | 58.9 | bus | 68.8 | 56.4 |
+| train | 81.1 | 60.5 | truck | 38.6 | 24.7 |
+| boat | 35.5 | 16.7 | traffic light | 30.5 | 14.0 |
+| fire hydrant | 69.8 | 54.5 | stop sign | 60.9 | 54.6 |
+| parking meter | 55.1 | 38.5 | bench | 26.8 | 15.9 |
+| bird | 38.3 | 23.6 | cat | 82.5 | 62.1 |
+| dog | 67.0 | 51.4 | horse | 64.3 | 44.2 |
+| sheep | 57.7 | 35.8 | cow | 61.2 | 39.9 |
+| elephant | 79.9 | 56.2 | bear | 81.8 | 63.0 |
+| zebra | 85.4 | 59.5 | giraffe | 84.1 | 59.9 |
+| backpack | 12.4 | 5.9 | umbrella | 46.5 | 28.8 |
+| handbag | 8.4 | 3.7 | tie | 35.2 | 19.6 |
+| suitcase | 38.1 | 23.8 | frisbee | 60.7 | 43.9 |
+| skis | 30.5 | 14.5 | snowboard | 32.3 | 18.2 |
+| sports ball | 37.6 | 24.5 | kite | 51.1 | 30.4 |
+| baseball bat | 28.9 | 13.6 | baseball glove | 40.1 | 21.6 |
+| skateboard | 59.4 | 35.2 | surfboard | 47.9 | 26.6 |
+| tennis racket | 55.2 | 30.5 | bottle | 34.7 | 20.2 |
+| wine glass | 27.8 | 16.3 | cup | 35.5 | 23.7 |
+| fork | 25.9 | 14.8 | knife | 10.9 | 5.6 |
+| spoon | 8.7 | 4.1 | bowl | 42.8 | 29.4 |
+| banana | 35.5 | 18.5 | apple | 19.4 | 12.9 |
+| sandwich | 46.7 | 33.4 | orange | 35.2 | 25.9 |
+| broccoli | 36.4 | 19.1 | carrot | 30.9 | 17.8 |
+| hot dog | 42.7 | 29.3 | pizza | 61.0 | 44.9 |
+| donut | 47.3 | 34.0 | cake | 39.9 | 24.4 |
+| chair | 28.8 | 16.1 | couch | 60.5 | 42.6 |
+| potted plant | 29.0 | 15.3 | bed | 63.3 | 46.0 |
+| dining table | 39.6 | 27.5 | toilet | 71.3 | 55.3 |
+| tv | 66.5 | 48.1 | laptop | 62.6 | 46.9 |
+| mouse | 63.5 | 44.1 | remote | 19.8 | 10.3 |
+| keyboard | 62.1 | 41.5 | cell phone | 33.7 | 22.8 |
+| microwave | 54.9 | 39.6 | oven | 48.1 | 30.4 |
+| toaster | 30.0 | 16.4 | sink | 44.5 | 27.8 |
+| refrigerator | 63.2 | 46.1 | book | 18.4 | 7.3 |
+| clock | 57.8 | 35.8 | vase | 33.7 | 22.1 |
+| scissors | 27.8 | 17.8 | teddy bear | 54.1 | 35.4 |
+| hair drier | 2.9 | 1.1 | toothbrush | 13.1 | 8.2 |
+
+
+#### Contributor Details
+
+- Google Summer of Code'22
+- Contributor: Sri Siddarth Chakaravarthy
+- Github Profile: https://github.com/Sidd1609
+- Organisation: OpenCV
+- Project: Lightweight object detection models using OpenCV
diff --git a/models/object_detection_nanodet/coco.names b/models/object_detection_nanodet/coco.names
new file mode 100644
index 00000000..16315f2b
--- /dev/null
+++ b/models/object_detection_nanodet/coco.names
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
\ No newline at end of file
diff --git a/models/object_detection_nanodet/demo.py b/models/object_detection_nanodet/demo.py
new file mode 100644
index 00000000..69caee69
--- /dev/null
+++ b/models/object_detection_nanodet/demo.py
@@ -0,0 +1,122 @@
+import cv2
+import numpy as np
+import argparse
+import time
+import moviepy.video.io.ImageSequenceClip
+from NanodetPlus import NanoDet
+
+backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
+targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+
+try:
+ backends += [cv2.dnn.DNN_BACKEND_TIMVX]
+ targets += [cv2.dnn.DNN_TARGET_NPU]
+ help_msg_backends += "; {:d}: TIMVX"
+ help_msg_targets += "; {:d}: NPU"
+except:
+ print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/Sidd1609/5bb321c8733110ed613ec120c7c02e41 for more information.')
+
+with open('coco.names', 'rt') as f:
+ classes = f.read().rstrip('\n').split('\n')
+
+def drawPred(frame, classId, conf, left, top, right, bottom):
+ cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 0), thickness=2)
+ #label = '%.2f' % conf
+ label =''
+ label = '%s%s' % (classes[classId], label)
+ labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+ top = max(top, labelSize[1])
+ # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
+ cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
+ return frame
+
+if __name__=='__main__':
+ parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
+ parser.add_argument('--model', type=str, default='object_detection_nanodet-plus-m-1.5x-416.onnx', help="Path to the model")
+ parser.add_argument('--input_type', type=str, default='image', help="Input types: image or video")
+ parser.add_argument('--image_path', type=str, default='test2.jpg', help="Image path")
+ parser.add_argument('--confidence', default=0.35, type=float, help='Class confidence')
+ parser.add_argument('--nms', default=0.6, type=float, help='Enter nms IOU threshold')
+ parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
+ args = parser.parse_args()
+ model_net = NanoDet(modelPath= args.model ,prob_threshold=args.confidence, iou_threshold=args.nms)
+
+ if (args.input_type=="image"):
+ image = cv2.imread(args.image_path)
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+ srcimg = image
+ drawimg = srcimg.copy()
+ a = time.time()
+ #image = model_net.infer(image)
+ left, top, ratioh, ratiow, det_bboxes, det_conf, det_classid = model_net.infer(image)
+ b = time.time()
+ print('Inference_Time:'+str(b-a)+' secs')
+ for i in range(det_bboxes.shape[0]):
+ xmin, ymin, xmax, ymax = max(int((det_bboxes[i,0] - left) * ratiow), 0), max(int((det_bboxes[i,1] - top) * ratioh), 0), min(
+ int((det_bboxes[i,2] - left) * ratiow), srcimg.shape[1]), min(int((det_bboxes[i,3] - top) * ratioh), srcimg.shape[0])
+ frame = drawPred(drawimg, det_classid[i], det_conf[i], xmin, ymin, xmax, ymax)
+
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+ cv2.namedWindow(args.image_path, cv2.WINDOW_AUTOSIZE)
+ cv2.imshow(args.image_path, frame)
+ cv2.waitKey(0)
+
+ if args.save:
+ print('Resutls saved to result.jpg\n')
+ cv2.imwrite('result.jpg', frame)
+
+ else:
+ print("Press 1 to stop video capture")
+ cap = cv2.VideoCapture(0)
+ tm = cv2.TickMeter()
+ total_frames = 0
+ frame_list = []
+ Video_save = False
+ if(args.save):
+ Video_save = True
+
+ while cv2.waitKey(1) < 0:
+ hasFrame, frame = cap.read()
+ if not hasFrame:
+ print('No frames grabbed!')
+ break
+
+ frame = cv2.flip(frame, 1)
+ srcimg = frame
+ drawimg = srcimg.copy()
+ #frame = cv2.resize(frame, [args.width, args.height])
+ # Inference
+ tm.start()
+ left, top, ratioh, ratiow, det_bboxes, det_conf, det_classid = model_net.infer(frame)
+ tm.stop()
+
+ for i in range(det_bboxes.shape[0]):
+ xmin, ymin, xmax, ymax = max(int((det_bboxes[i,0] - left) * ratiow), 0), max(int((det_bboxes[i,1] - top) * ratioh), 0), min(
+ int((det_bboxes[i,2] - left) * ratiow), srcimg.shape[1]), min(int((det_bboxes[i,3] - top) * ratioh), srcimg.shape[0])
+ image = drawPred(drawimg, det_classid[i], det_conf[i], xmin, ymin, xmax, ymax)
+
+ total_frames += 1
+ fps=tm.getFPS()
+
+ if fps > 0:
+ fps_label = "FPS: %.2f" % fps
+ cv2.putText(image, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+
+ cv2.imshow("output", image)
+
+ if cv2.waitKey(1) > -1:
+ print("Stream terminated")
+ break
+
+ if(args.save):
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ frame_list.append(image)
+
+ if(Video_save):
+ clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(frame_list, fps=fps)
+ clip.write_videofile('Webcam_result.mp4')
+
+ print("Total frames: " + str(total_frames))
diff --git a/models/object_detection_nanodet/examples/results/TestResult1.png b/models/object_detection_nanodet/examples/results/TestResult1.png
new file mode 100644
index 00000000..f670101d
Binary files /dev/null and b/models/object_detection_nanodet/examples/results/TestResult1.png differ
diff --git a/models/object_detection_nanodet/examples/results/TestResult2.png b/models/object_detection_nanodet/examples/results/TestResult2.png
new file mode 100644
index 00000000..fa93096d
Binary files /dev/null and b/models/object_detection_nanodet/examples/results/TestResult2.png differ
diff --git a/models/object_detection_nanodet/examples/results/WebCamR.gif b/models/object_detection_nanodet/examples/results/WebCamR.gif
new file mode 100644
index 00000000..887d17db
Binary files /dev/null and b/models/object_detection_nanodet/examples/results/WebCamR.gif differ
diff --git a/models/object_detection_nanodet/examples/test_data/test1.jpg b/models/object_detection_nanodet/examples/test_data/test1.jpg
new file mode 100644
index 00000000..82cf812e
Binary files /dev/null and b/models/object_detection_nanodet/examples/test_data/test1.jpg differ
diff --git a/models/object_detection_nanodet/examples/test_data/test2.jpg b/models/object_detection_nanodet/examples/test_data/test2.jpg
new file mode 100644
index 00000000..570709da
Binary files /dev/null and b/models/object_detection_nanodet/examples/test_data/test2.jpg differ
diff --git a/models/object_detection_nanodet/object_detection_nanodet-plus-m-1.5x-416.onnx b/models/object_detection_nanodet/object_detection_nanodet-plus-m-1.5x-416.onnx
new file mode 100644
index 00000000..066cb860
--- /dev/null
+++ b/models/object_detection_nanodet/object_detection_nanodet-plus-m-1.5x-416.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b82da9944b88577175ee23a459dce2e26e6e4be573def65b1055dc2d9720186
+size 3800954
diff --git a/models/object_detection_yolox/LICENSE b/models/object_detection_yolox/LICENSE
new file mode 100644
index 00000000..1d4dc763
--- /dev/null
+++ b/models/object_detection_yolox/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright (c) 2021-2022 Megvii Inc. All rights reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/models/object_detection_yolox/README.md b/models/object_detection_yolox/README.md
new file mode 100644
index 00000000..c52507fc
--- /dev/null
+++ b/models/object_detection_yolox/README.md
@@ -0,0 +1,264 @@
+# YOLOX
+
+Nanodet: YOLOX is an anchor-free version of YOLO, with a simpler design but better performance! It aims to bridge the gap between research and industrial communities. YOLOX is a high-performing object detector, an improvement to the existing YOLO series. YOLO series are in constant exploration of techniques to improve the object detection techniques for optimal speed and accuracy trade-off for real-time applications.
+
+Key features of the YOLOX object detector
+- **Anchor-free detectors** significantly reduce the number of design parameters
+- **A decoupled head for classification, regression, and localization** improves the convergence speed
+- **SimOTA advanced label assignment strategy** reduces training time and avoids additional solver hyperparameters
+- **Strong data augmentations like MixUp and Mosiac** to boost YOLOX performance
+
+#### Model metrics:
+Average Precision and Recall values observed for COCO dataset classes are showed below
+
+
+##### YOLOX_S:
+Average forward time: 5.53 ms, Average NMS time: 1.71 ms, Average inference time: 7.25 ms
+
+Average Precision | Average Recall |
+
+
+| area | IoU | Average Precision(AP) |
+|:-------|:------|:------------------------|
+| all | 0.50:0.95 | 0.405 |
+| all | 0.50 | 0.593 |
+| all | 0.75 | 0.437 |
+| small | 0.50:0.95 | 0.232 |
+| medium | 0.50:0.95 | 0.448 |
+| large | 0.50:0.95 | 0.541 |
+
+ |
+
+ area | IoU | Average Recall(AR) |
+|:-------|:------|:----------------|
+| all | 0.50:0.95 | 0.326 |
+| all | 0.50:0.95 | 0.531 |
+| all | 0.50:0.95 | 0.574 |
+| small | 0.50:0.95 | 0.365 |
+| medium | 0.50:0.95 | 0.634 |
+| large | 0.50:0.95 | 0.724 |
+ |
+
+
+##### YOLOX_tiny:
+Average forward time: 2.07 ms, Average NMS time: 1.71 ms, Average inference time: 3.79 ms
+
+Average Precision | Average Recall |
+
+
+| area | IoU | Average Precision(AP) |
+|:-------|:------|:------------------------|
+| all | 0.50:0.95 | 0.328 |
+| all | 0.50 | 0.504 |
+| all | 0.75 | 0.346 |
+| small | 0.50:0.95 | 0.139 |
+| medium | 0.50:0.95 | 0.360 |
+| large | 0.50:0.95 | 0.501 |
+
+ |
+
+ area | IoU | Average Recall(AR) |
+|:-------|:------|:----------------|
+| all | 0.50:0.95 | 0.283 |
+| all | 0.50:0.95 | 0.450 |
+| all | 0.50:0.95 | 0.485 |
+| small | 0.50:0.95 | 0.226 |
+| medium | 0.50:0.95 | 0.550 |
+| large | 0.50:0.95 | 0.687 |
+ |
+
+
+##### YOLOX_nano:
+Average forward time: 1.68 ms, Average NMS time: 1.64 ms, Average inference time: 3.31 ms
+
+Average Precision | Average Recall |
+
+
+| area | IoU | Average Precision(AP) |
+|:-------|:------|:------------------------|
+| all | 0.50:0.95 | 0.258 |
+| all | 0.50 | 0.414 |
+| all | 0.75 | 0.268 |
+| small | 0.50:0.95 | 0.082 |
+| medium | 0.50:0.95 | 0.275 |
+| large | 0.50:0.95 | 0.410 |
+
+ |
+
+ area | IoU | Average Recall(AR) |
+|:-------|:------|:----------------|
+| all | 0.50:0.95 | 0.241 |
+| all | 0.50:0.95 | 0.384 |
+| all | 0.50:0.95 | 0.420 |
+| small | 0.50:0.95 | 0.157 |
+| medium | 0.50:0.95 | 0.473 |
+| large | 0.50:0.95 | 0.631 |
+ |
+
+
+## Demo
+
+Run the following command to try the demo:
+```shell
+# Nanodet inference on image input
+python demo.py --model /path/to/model/ --input_type image --image_path /path/to/image/
+
+# Nanodet inference on video input
+python demo.py --model /path/to/model/ --input_type video
+
+#Saving outputs
+#Image output
+python demo.py --model /path/to/model/ --input_type image --image_path /path/to/image/ --save True
+
+#Video output
+python demo.py --model /path/to/model/ --input_type video --save True
+
+other parameters
+--confidence: Confidence values of the predictions (default: 0.5)
+--nms: NMS threshold value for predictions (default: 0.5)
+--obj: Object threshold value (default: 0.5)
+```
+Note:
+- By default input_type: image
+- image result saved as "result.jpg"
+- webcam result saved as "Webcam_result.mp4"
+
+
+## Results
+
+Here are some of the sample results that were observed using the model (**yolox_s.onnx**),
+
+
+
+
+
+
+Video inference result,
+
+
+
+
+
+## License
+
+All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
+
+
+## Reference
+
+- YOLOX article: https://arxiv.org/abs/2107.08430
+- YOLOX weight and scripts for training: https://github.com/Megvii-BaseDetection/YOLOX
+- YOLOX blog: https://arshren.medium.com/yolox-new-improved-yolo-d430c0e4cf20
+- YOLOX-lite: https://github.com/TexasInstruments/edgeai-yolox
+
+
+#### Note:
+
+- In this repo we have used the following versions of YOLOX: YOLOX_S, YOLOX_tiny, YOLOX_nano
+- The model was trained on COCO 2017 dataset, link to dataset: https://cocodataset.org/#download
+- Below, we have per class AP results on COCO dataset for the models YOLOX_S, YOLOX_tiny, YOLOX_nano respectively
+
+##### YOLOX_S
+| class | AP | class | AP | class | AP |
+|:--------------|:-------|:-------------|:-------|:---------------|:-------|
+| person | 54.109 | bicycle | 31.580 | car | 40.447 |
+| motorcycle | 43.477 | airplane | 66.070 | bus | 64.183 |
+| train | 64.483 | truck | 35.110 | boat | 24.681 |
+| traffic light | 25.068 | fire hydrant | 64.382 | stop sign | 65.333 |
+| parking meter | 48.439 | bench | 22.653 | bird | 33.324 |
+| cat | 66.394 | dog | 60.096 | horse | 58.080 |
+| sheep | 49.456 | cow | 53.596 | elephant | 65.574 |
+| bear | 70.541 | zebra | 66.461 | giraffe | 66.780 |
+| backpack | 13.095 | umbrella | 41.614 | handbag | 12.865 |
+| tie | 29.453 | suitcase | 39.089 | frisbee | 61.712 |
+| skis | 21.623 | snowboard | 31.326 | sports ball | 39.820 |
+| kite | 41.410 | baseball bat | 27.311 | baseball glove | 36.661 |
+| skateboard | 49.374 | surfboard | 35.524 | tennis racket | 45.569 |
+| bottle | 37.270 | wine glass | 33.088 | cup | 39.835 |
+| fork | 31.620 | knife | 15.265 | spoon | 14.918 |
+| bowl | 43.251 | banana | 27.904 | apple | 17.630 |
+| sandwich | 32.789 | orange | 29.388 | broccoli | 23.187 |
+| carrot | 23.114 | hot dog | 33.716 | pizza | 52.541 |
+| donut | 47.980 | cake | 36.160 | chair | 29.707 |
+| couch | 46.175 | potted plant | 24.781 | bed | 44.323 |
+| dining table | 30.022 | toilet | 64.237 | tv | 57.301 |
+| laptop | 58.362 | mouse | 57.774 | remote | 24.271 |
+| keyboard | 48.020 | cell phone | 32.376 | microwave | 57.220 |
+| oven | 36.168 | toaster | 28.735 | sink | 38.159 |
+| refrigerator | 52.876 | book | 15.030 | clock | 48.622 |
+| vase | 37.013 | scissors | 26.307 | teddy bear | 45.676 |
+| hair drier | 7.255 | toothbrush | 19.374 | | |
+
+
+##### YOLOX_tiny
+| class | AP | class | AP | class | AP |
+|:--------------|:-------|:-------------|:-------|:---------------|:-------|
+| person | 45.685 | bicycle | 22.797 | car | 29.265 |
+| motorcycle | 37.980 | airplane | 59.446 | bus | 54.878 |
+| train | 62.459 | truck | 26.850 | boat | 16.724 |
+| traffic light | 17.527 | fire hydrant | 55.155 | stop sign | 57.120 |
+| parking meter | 37.755 | bench | 17.656 | bird | 24.382 |
+| cat | 55.792 | dog | 50.964 | horse | 49.806 |
+| sheep | 39.894 | cow | 42.855 | elephant | 58.863 |
+| bear | 62.345 | zebra | 58.389 | giraffe | 62.362 |
+| backpack | 8.131 | umbrella | 33.650 | handbag | 7.777 |
+| tie | 21.907 | suitcase | 25.593 | frisbee | 48.975 |
+| skis | 16.941 | snowboard | 19.409 | sports ball | 30.718 |
+| kite | 33.956 | baseball bat | 17.912 | baseball glove | 28.793 |
+| skateboard | 38.253 | surfboard | 28.329 | tennis racket | 33.240 |
+| bottle | 23.872 | wine glass | 20.386 | cup | 26.962 |
+| fork | 21.025 | knife | 8.434 | spoon | 6.513 |
+| bowl | 34.706 | banana | 24.050 | apple | 12.745 |
+| sandwich | 28.046 | orange | 24.216 | broccoli | 18.579 |
+| carrot | 16.283 | hot dog | 30.058 | pizza | 44.371 |
+| donut | 35.957 | cake | 29.765 | chair | 22.070 |
+| couch | 41.221 | potted plant | 19.856 | bed | 44.173 |
+| dining table | 29.000 | toilet | 60.369 | tv | 49.868 |
+| laptop | 48.858 | mouse | 47.843 | remote | 14.349 |
+| keyboard | 42.412 | cell phone | 23.536 | microwave | 51.839 |
+| oven | 32.384 | toaster | 24.209 | sink | 32.607 |
+| refrigerator | 50.156 | book | 9.534 | clock | 41.661 |
+| vase | 25.548 | scissors | 17.612 | teddy bear | 39.375 |
+| hair drier | 0.000 | toothbrush | 9.933 | | |
+
+
+##### YOLOX_nano
+| class | AP | class | AP | class | AP |
+|:--------------|:-------|:-------------|:-------|:---------------|:-------|
+| person | 38.444 | bicycle | 16.922 | car | 21.708 |
+| motorcycle | 30.753 | airplane | 47.573 | bus | 49.651 |
+| train | 55.302 | truck | 20.294 | boat | 11.919 |
+| traffic light | 12.026 | fire hydrant | 48.798 | stop sign | 52.446 |
+| parking meter | 33.439 | bench | 13.565 | bird | 16.520 |
+| cat | 42.603 | dog | 43.831 | horse | 37.338 |
+| sheep | 27.807 | cow | 33.155 | elephant | 52.374 |
+| bear | 49.737 | zebra | 52.259 | giraffe | 56.445 |
+| backpack | 5.456 | umbrella | 25.288 | handbag | 2.802 |
+| tie | 17.110 | suitcase | 17.757 | frisbee | 40.878 |
+| skis | 13.245 | snowboard | 11.443 | sports ball | 22.310 |
+| kite | 28.107 | baseball bat | 10.295 | baseball glove | 20.294 |
+| skateboard | 28.285 | surfboard | 19.142 | tennis racket | 25.253 |
+| bottle | 15.064 | wine glass | 13.412 | cup | 19.357 |
+| fork | 13.384 | knife | 4.276 | spoon | 3.460 |
+| bowl | 26.615 | banana | 18.067 | apple | 9.672 |
+| sandwich | 22.817 | orange | 23.574 | broccoli | 14.710 |
+| carrot | 10.180 | hot dog | 18.646 | pizza | 38.244 |
+| donut | 24.204 | cake | 21.330 | chair | 14.644 |
+| couch | 33.018 | potted plant | 13.252 | bed | 38.034 |
+| dining table | 24.287 | toilet | 52.986 | tv | 44.978 |
+| laptop | 44.130 | mouse | 35.173 | remote | 7.349 |
+| keyboard | 33.903 | cell phone | 19.140 | microwave | 38.800 |
+| oven | 25.890 | toaster | 10.665 | sink | 23.293 |
+| refrigerator | 42.697 | book | 6.942 | clock | 35.254 |
+| vase | 18.742 | scissors | 11.866 | teddy bear | 30.907 |
+| hair drier | 0.000 | toothbrush | 7.284 | | |
+
+
+#### Contributor Details
+
+- Google Summer of Code'22
+- Contributor: Sri Siddarth Chakaravarthy
+- Github Profile: https://github.com/Sidd1609
+- Organisation: OpenCV
+- Project: Lightweight object detection models using OpenCV
+
diff --git a/models/object_detection_yolox/YoloX.py b/models/object_detection_yolox/YoloX.py
new file mode 100644
index 00000000..7d299a15
--- /dev/null
+++ b/models/object_detection_yolox/YoloX.py
@@ -0,0 +1,125 @@
+import cv2
+import numpy as np
+
+class YoloX():
+ def __init__(self, modelPath, p6=False, confThreshold=0.35, nmsThreshold=0.5, objThreshold=0.5):
+ with open('coco.names', 'rt') as f:
+ self.classes = f.read().rstrip('\n').split('\n')
+ self.num_classes = len(self.classes)
+ self.net = cv2.dnn.readNet(modelPath)
+ self.input_size = (640, 640)
+ #self.ratio = min(self.input_size[0] / image.shape[0], self.input_size[1] / image.shape[1])
+ self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(1, 1, 3)
+ self.std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(1, 1, 3)
+ if not p6:
+ self.strides = [8, 16, 32]
+ else:
+ self.strides = [8, 16, 32, 64]
+ self.confThreshold = confThreshold
+ self.nmsThreshold = nmsThreshold
+ self.objThreshold = objThreshold
+
+ def preprocess(self, image):
+ if len(image.shape) == 3:
+ padded_img = np.ones((self.input_size[0], self.input_size[1], 3)) * 114.0
+ else:
+ padded_img = np.ones(self.input_size) * 114.0
+ img = np.array(image)
+ ratio = min(self.input_size[0] / img.shape[0], self.input_size[1] / img.shape[1])
+ resized_img = cv2.resize(
+ img, (int(img.shape[1] * ratio), int(img.shape[0] * ratio)), interpolation=cv2.INTER_LINEAR
+ ).astype(np.float32)
+ padded_img[: int(img.shape[0] * ratio), : int(img.shape[1] * ratio)] = resized_img
+ image = padded_img
+
+ image = image.astype(np.float32)
+ image = image[:, :, ::-1]
+ return image, ratio
+
+ def infer(self, srcimg):
+ img, ratio = self.preprocess(srcimg)
+ blob = cv2.dnn.blobFromImage(img)
+ self.net.setInput(blob)
+ outs = self.net.forward(self.net.getUnconnectedOutLayersNames())
+ predictions = self.postprocess(outs[0], ratio)
+
+ return predictions
+
+
+ def postprocess(self, outputs, ratio):
+ grids = []
+ expanded_strides = []
+ hsizes = [self.input_size[0] // stride for stride in self.strides]
+ wsizes = [self.input_size[1] // stride for stride in self.strides]
+
+ for hsize, wsize, stride in zip(hsizes, wsizes, self.strides):
+ xv, yv = np.meshgrid(np.arange(hsize), np.arange(wsize))
+ grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
+ grids.append(grid)
+ shape = grid.shape[:2]
+ expanded_strides.append(np.full((*shape, 1), stride))
+
+ grids = np.concatenate(grids, 1)
+ expanded_strides = np.concatenate(expanded_strides, 1)
+ outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
+ outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
+
+ predictions = outputs[0]
+
+ boxes = predictions[:, :4]
+ scores = predictions[:, 4:5] * predictions[:, 5:]
+
+ boxes_xyxy = np.ones_like(boxes)
+ boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.
+ boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.
+ boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.
+ boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.
+ boxes_xyxy /= ratio
+
+ final_dets = []
+ num_classes = scores.shape[1]
+ for cls_ind in range(num_classes):
+ cls_scores = scores[:, cls_ind]
+ valid_score_mask = cls_scores > self.confThreshold
+
+ if valid_score_mask.sum() == 0:
+ continue
+
+ else:
+ valid_scores = cls_scores[valid_score_mask]
+ valid_boxes = boxes_xyxy[valid_score_mask]
+
+ keep = []
+ x1 = valid_boxes[:, 0]
+ y1 = valid_boxes[:, 1]
+ x2 = valid_boxes[:, 2]
+ y2 = valid_boxes[:, 3]
+
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+ order = valid_scores.argsort()[::-1]
+
+ while order.size > 0:
+ i = order[0]
+ keep.append(i)
+ xx1 = np.maximum(x1[i], x1[order[1:]])
+ yy1 = np.maximum(y1[i], y1[order[1:]])
+ xx2 = np.minimum(x2[i], x2[order[1:]])
+ yy2 = np.minimum(y2[i], y2[order[1:]])
+
+ w = np.maximum(0.0, xx2 - xx1 + 1)
+ h = np.maximum(0.0, yy2 - yy1 + 1)
+ inter = w * h
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+ inds = np.where(ovr <= self.nmsThreshold)[0]
+ order = order[inds + 1]
+ if len(keep) > 0:
+ cls_inds = np.ones((len(keep), 1)) * cls_ind
+ dets = np.concatenate([valid_boxes[keep], valid_scores[keep, None], cls_inds], 1)
+ final_dets.append(dets)
+
+ if len(final_dets) == 0:
+ return None
+
+ res_dets = np.concatenate(final_dets, 0)
+ return res_dets
diff --git a/models/object_detection_yolox/coco.names b/models/object_detection_yolox/coco.names
new file mode 100644
index 00000000..16315f2b
--- /dev/null
+++ b/models/object_detection_yolox/coco.names
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
\ No newline at end of file
diff --git a/models/object_detection_yolox/demo.py b/models/object_detection_yolox/demo.py
new file mode 100644
index 00000000..566c0bf3
--- /dev/null
+++ b/models/object_detection_yolox/demo.py
@@ -0,0 +1,132 @@
+import cv2
+import numpy as np
+import argparse
+import time
+import moviepy.video.io.ImageSequenceClip
+from YoloX import YoloX
+
+backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
+targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+
+try:
+ backends += [cv2.dnn.DNN_BACKEND_TIMVX]
+ targets += [cv2.dnn.DNN_TARGET_NPU]
+ help_msg_backends += "; {:d}: TIMVX"
+ help_msg_targets += "; {:d}: NPU"
+except:
+ print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/Sidd1609/5bb321c8733110ed613ec120c7c02e41 for more information.')
+
+with open('coco.names', 'rt') as f:
+ classes = f.read().rstrip('\n').split('\n')
+
+
+def vis(dets, res_img):
+ if dets is not None:
+ final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
+ for i in range(len(final_boxes)):
+ box = final_boxes[i]
+ cls_id = int(final_cls_inds[i])
+ score = final_scores[i]
+ if score < args.confidence:
+ continue
+
+ x0 = int(box[0])
+ y0 = int(box[1])
+ x1 = int(box[2])
+ y1 = int(box[3])
+
+ text = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
+ font = cv2.FONT_HERSHEY_SIMPLEX
+ txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
+ cv2.rectangle(res_img, (x0, y0), (x1, y1), (0, 0, 255), 2)
+ cv2.rectangle(res_img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1)
+ cv2.putText(res_img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 255, 0), thickness=1)
+
+ else:
+ print("No detections")
+
+ return res_img
+
+if __name__=='__main__':
+ parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
+ parser.add_argument('--model', type=str, default='yolox_s.onnx', help="Path to the model")
+ parser.add_argument('--input_type', type=str, default='image', help="Input types: image or video")
+ parser.add_argument('--image_path', type=str, default='test2.jpg', help="Image path")
+ parser.add_argument('--video_path', type=str, default='sample.mp4', help="Video path")
+ parser.add_argument('--confidence', default=0.5, type=float, help='Class confidence')
+ parser.add_argument('--nms', default=0.5, type=float, help='Enter nms IOU threshold')
+ parser.add_argument('--obj', default=0.5, type=float, help='Enter object threshold')
+ parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This option is invalid when using camera.')
+ args = parser.parse_args()
+
+ model_net = YoloX(modelPath= args.model, confThreshold=args.confidence, nmsThreshold=args.nms, objThreshold=args.obj)
+
+ if (args.input_type=="image"):
+ srcimg = cv2.imread(args.image_path)
+
+ image = srcimg
+ a = time.time()
+ preds = model_net.infer(srcimg)
+ b = time.time()
+ print('Inference_Time:'+str(b-a)+' secs')
+
+ srcimg = vis(preds, image)
+
+ cv2.namedWindow(args.image_path, cv2.WINDOW_AUTOSIZE)
+ cv2.imshow(args.image_path, srcimg)
+ cv2.waitKey(0)
+
+ if args.save:
+ print('Resutls saved to result.jpg\n')
+ cv2.imwrite('result.jpg', image)
+
+ else:
+ print("Press 1 to stop video capture")
+ cap = cv2.VideoCapture(0)
+ tm = cv2.TickMeter()
+ total_frames = 0
+ frame_list = []
+ Video_save = False
+ if(args.save):
+ Video_save = True
+
+ while cv2.waitKey(1) < 0:
+ hasFrame, frame = cap.read()
+ if not hasFrame:
+ print('No frames grabbed!')
+ break
+
+ frame = cv2.flip(frame, 1)
+ srcimg = frame
+
+ # Inference
+ tm.start()
+ preds = model_net.infer(srcimg)
+ tm.stop()
+
+ srcimg = vis(preds, srcimg)
+
+ total_frames += 1
+ fps=tm.getFPS()
+
+ if fps > 0:
+ fps_label = "FPS: %.2f" % fps
+ cv2.putText(srcimg, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+
+ cv2.imshow("output", srcimg)
+
+ if cv2.waitKey(1) > -1:
+ print("Stream terminated")
+ break
+
+ if(args.save):
+ srcimg = cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB)
+ frame_list.append(srcimg)
+
+ if(Video_save):
+ clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(frame_list, fps=fps)
+ clip.write_videofile('Webcam_result.mp4')
+
+ print("Total frames: " + str(total_frames))
diff --git a/models/object_detection_yolox/examples/results/WebcamR.gif b/models/object_detection_yolox/examples/results/WebcamR.gif
new file mode 100644
index 00000000..fa215c1a
Binary files /dev/null and b/models/object_detection_yolox/examples/results/WebcamR.gif differ
diff --git a/models/object_detection_yolox/examples/results/result1.jpg b/models/object_detection_yolox/examples/results/result1.jpg
new file mode 100644
index 00000000..8544aa7b
Binary files /dev/null and b/models/object_detection_yolox/examples/results/result1.jpg differ
diff --git a/models/object_detection_yolox/examples/results/result2.jpg b/models/object_detection_yolox/examples/results/result2.jpg
new file mode 100644
index 00000000..eb19edab
Binary files /dev/null and b/models/object_detection_yolox/examples/results/result2.jpg differ
diff --git a/models/object_detection_yolox/examples/test_data/test1.jpg b/models/object_detection_yolox/examples/test_data/test1.jpg
new file mode 100644
index 00000000..1208cf1f
Binary files /dev/null and b/models/object_detection_yolox/examples/test_data/test1.jpg differ
diff --git a/models/object_detection_yolox/examples/test_data/test2.jpg b/models/object_detection_yolox/examples/test_data/test2.jpg
new file mode 100644
index 00000000..19023f71
Binary files /dev/null and b/models/object_detection_yolox/examples/test_data/test2.jpg differ
diff --git a/models/object_detection_yolox/yolox_nano.onnx b/models/object_detection_yolox/yolox_nano.onnx
new file mode 100644
index 00000000..03adac0e
Binary files /dev/null and b/models/object_detection_yolox/yolox_nano.onnx differ
diff --git a/models/object_detection_yolox/yolox_s.onnx b/models/object_detection_yolox/yolox_s.onnx
new file mode 100644
index 00000000..42d1fb00
Binary files /dev/null and b/models/object_detection_yolox/yolox_s.onnx differ
diff --git a/models/object_detection_yolox/yolox_tiny.onnx b/models/object_detection_yolox/yolox_tiny.onnx
new file mode 100644
index 00000000..56a25dee
Binary files /dev/null and b/models/object_detection_yolox/yolox_tiny.onnx differ