From 74a66d86c1ca1028e7b3287bfaee8e36b26a9c6f Mon Sep 17 00:00:00 2001
From: KaiHoo <kaiorhu@gmail.com>
Date: Thu, 8 Dec 2022 00:59:15 -0500
Subject: [PATCH 1/6] update README

---
 ...8xb8-16x4x1-10e-tricks_ava-kinetics-rgb.py |  2 +-
 configs/recognition/timesformer/README.md     | 20 +++++++++----------
 configs/recognition/timesformer/metafile.yml  | 12 ++++++++---
 ...aceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py | 18 ++++++++++++++---
 4 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/configs/detection/ava_kinetics/slowonly_k700-pre-r50_8xb8-16x4x1-10e-tricks_ava-kinetics-rgb.py b/configs/detection/ava_kinetics/slowonly_k700-pre-r50_8xb8-16x4x1-10e-tricks_ava-kinetics-rgb.py
index fa65298d8d..4d4a3dea6b 100644
--- a/configs/detection/ava_kinetics/slowonly_k700-pre-r50_8xb8-16x4x1-10e-tricks_ava-kinetics-rgb.py
+++ b/configs/detection/ava_kinetics/slowonly_k700-pre-r50_8xb8-16x4x1-10e-tricks_ava-kinetics-rgb.py
@@ -49,7 +49,7 @@
 # The testing is w/o. any cropping / flipping
 val_pipeline = [
     dict(
-       type='SampleAVAFrames', clip_len=16, frame_interval=4, test_mode=True),
+        type='SampleAVAFrames', clip_len=16, frame_interval=4, test_mode=True),
     dict(type='RawFrameDecode', **file_client_args),
     dict(type='Resize', scale=(-1, 256)),
     dict(type='FormatShape', input_format='NCTHW', collapse=True),
diff --git a/configs/recognition/timesformer/README.md b/configs/recognition/timesformer/README.md
index 8b3fdf2c30..c7f5f439db 100644
--- a/configs/recognition/timesformer/README.md
+++ b/configs/recognition/timesformer/README.md
@@ -20,19 +20,17 @@ We present a convolution-free approach to video classification built exclusively
 
 ### Kinetics-400
 
-| frame sampling strategy |   resolution   | gpus |        backbone         |   pretrain   | top1 acc | top5 acc | inference_time(video/s) | gpu_mem(M) |           config           |           ckpt            |           log            |
-| :---------------------: | :------------: | :--: | :---------------------: | :----------: | :------: | :------: | :---------------------: | :--------: | :------------------------: | :-----------------------: | :----------------------: |
-|         8x32x1          | short-side 320 |  8   |   TimeSformer (divST)   | ImageNet-21K |  77.96   |  93.57   |            x            |   15235    | [config](/configs/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-a4d0d01f.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
-|         8x32x1          | short-side 320 |  8   |  TimeSformer (jointST)  | ImageNet-21K |  76.93   |  93.27   |            x            |   33358    | [config](/configs/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-8022d1c0.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
-|         8x32x1          | short-side 320 |  8   | TimeSformer (spaceOnly) | ImageNet-21K |  76.98   |  92.83   |            x            |   12355    | [config](/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb_20220815-78f05367.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.log) |
-
-1. The **gpus** indicates the number of gpu (80G A100) we used to get the checkpoint. It is noteworthy that the configs we provide are used for 8 gpus as default.
-   According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you may set the learning rate proportional to the batch size if you use different GPUs or videos per GPU,
-   e.g., lr=0.005 for 8 GPUs x 8 videos/gpu and lr=0.00375 for 8 GPUs x 6 videos/gpu.
+| frame sampling strategy | resolution | gpus |        backbone         |   pretrain   | top1 acc | top5 acc | FLOPs | params |               config                |               ckpt                |                log                |
+| :---------------------: | :--------: | :--: | :---------------------: | :----------: | :------: | :------: | :---: | :----: | :---------------------------------: | :-------------------------------: | :-------------------------------: |
+|         8x32x1          |  224x224   |  8   |   TimeSformer (divST)   | ImageNet-21K |  77.69   |  93.45   | 588G  |  122M  | [config](/configs/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-a4d0d01f.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+|         8x32x1          |  224x224   |  8   |  TimeSformer (jointST)  | ImageNet-21K |  76.95   |  93.28   | 539G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-8022d1c0.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+|         8x32x1          |  224x224   |  8   | TimeSformer (spaceOnly) | ImageNet-21K |  76.93   |  92.88   | 422G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb_20220815-78f05367.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+
+1. The **gpus** indicates the number of gpus we used to get the checkpoint. If you want to use a different number of gpus or videos per gpu, the best way is to set `--auto-scale-lr` when calling `tools/train.py`, this parameter will auto-scale the learning rate according to the actual batch size and the original batch size.
 2. We keep the test setting with the [original repo](https://github.com/facebookresearch/TimeSformer) (three crop x 1 clip).
 3. The pretrained model `vit_base_patch16_224.pth` used by TimeSformer was converted from [vision_transformer](https://github.com/google-research/vision_transformer).
 
-For more details on data preparation, you can refer to the **Prepare videos** part in the [Data Preparation Tutorial](/docs/en/user_guides/2_data_prepare.md).
+For more details on data preparation, you can refer to [Kinetics400](/tools/data/kinetics/README.md).
 
 ## Train
 
@@ -46,7 +44,7 @@ Example: train TimeSformer model on Kinetics-400 dataset in a deterministic opti
 
 ```shell
 python tools/train.py configs/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py \
-    --cfg-options randomness.seed=0 randomness.deterministic=True
+    --seed=0 --deterministic
 ```
 
 For more details, you can refer to the **Training** part in the [Training and Test Tutorial](/docs/en/user_guides/4_train_test.md).
diff --git a/configs/recognition/timesformer/metafile.yml b/configs/recognition/timesformer/metafile.yml
index 7f7edd40eb..3226870a67 100644
--- a/configs/recognition/timesformer/metafile.yml
+++ b/configs/recognition/timesformer/metafile.yml
@@ -14,7 +14,9 @@ Models:
       Batch Size: 8
       Epochs: 15
       Pretrained: ImageNet-21K
-      Resolution: short-side 320
+      Resolution: 224x224
+      FLOPs: 588G
+      params: 122M
       Training Data: Kinetics-400
       Training Resources: 8 GPUs
     Modality: RGB
@@ -35,7 +37,9 @@ Models:
       Batch Size: 8
       Epochs: 15
       Pretrained: ImageNet-21K
-      Resolution: short-side 320
+      Resolution: 224x224
+      FLOPs: 539G
+      params: 86.11M
       Training Data: Kinetics-400
       Training Resources: 8 GPUs
     Modality: RGB
@@ -56,7 +60,9 @@ Models:
       Batch Size: 8
       Epochs: 15
       Pretrained: ImageNet-21K
-      Resolution: short-side 320
+      Resolution: 224x224
+      FLOPs: 422G
+      params: 86.11M
       Training Data: Kinetics-400
       Training Resources: 8 GPUs
     Modality: RGB
diff --git a/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py b/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py
index b969a33d0e..2b1bc559e5 100644
--- a/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py
+++ b/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py
@@ -35,8 +35,14 @@
 ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt'
 ann_file_test = 'data/kinetics400/kinetics400_val_list_videos.txt'
 
+# file_client_args = dict(
+#      io_backend='petrel',
+#      path_mapping=dict(
+#          {'data/kinetics400': 's3://openmmlab/datasets/action/Kinetics400'}))
+file_client_args = dict(io_backend='disk')
+
 train_pipeline = [
-    dict(type='DecordInit'),
+    dict(type='DecordInit', **file_client_args),
     dict(type='SampleFrames', clip_len=8, frame_interval=32, num_clips=1),
     dict(type='DecordDecode'),
     dict(type='RandomRescale', scale_range=(256, 320)),
@@ -46,7 +52,7 @@
     dict(type='PackActionInputs')
 ]
 val_pipeline = [
-    dict(type='DecordInit'),
+    dict(type='DecordInit', **file_client_args),
     dict(
         type='SampleFrames',
         clip_len=8,
@@ -60,7 +66,7 @@
     dict(type='PackActionInputs')
 ]
 test_pipeline = [
-    dict(type='DecordInit'),
+    dict(type='DecordInit', **file_client_args),
     dict(
         type='SampleFrames',
         clip_len=8,
@@ -136,3 +142,9 @@
 ]
 
 default_hooks = dict(checkpoint=dict(interval=5))
+
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (8 samples per GPU).
+auto_scale_lr = dict(enable=False, base_batch_size=64)

From 39815949635580e67f3272f07f8da39041eb1f91 Mon Sep 17 00:00:00 2001
From: KaiHoo <kaiorhu@gmail.com>
Date: Thu, 8 Dec 2022 19:11:32 -0500
Subject: [PATCH 2/6] update README

---
 configs/recognition/timesformer/metafile.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/configs/recognition/timesformer/metafile.yml b/configs/recognition/timesformer/metafile.yml
index 3226870a67..83e4e64dfe 100644
--- a/configs/recognition/timesformer/metafile.yml
+++ b/configs/recognition/timesformer/metafile.yml
@@ -24,8 +24,8 @@ Models:
     - Dataset: Kinetics-400
       Task: Action Recognition
       Metrics:
-        Top 1 Accuracy: 77.96
-        Top 5 Accuracy: 93.57
+        Top 1 Accuracy: 77.69
+        Top 5 Accuracy: 93.45
     Training Log: https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.log
     Weights: https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-a4d0d01f.pth
 
@@ -47,8 +47,8 @@ Models:
     - Dataset: Kinetics-400
       Task: Action Recognition
       Metrics:
-        Top 1 Accuracy: 76.93
-        Top 5 Accuracy: 93.27
+        Top 1 Accuracy: 76.95
+        Top 5 Accuracy: 93.28
     Training Log: https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.log
     Weights: https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-8022d1c0.pth
 
@@ -70,7 +70,7 @@ Models:
     - Dataset: Kinetics-400
       Task: Action Recognition
       Metrics:
-        Top 1 Accuracy: 76.98
-        Top 5 Accuracy: 92.83
+        Top 1 Accuracy: 76.93
+        Top 5 Accuracy: 92.88
     Training Log: https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.log
     Weights: https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb_20220815-78f05367.pth

From 576623e60c57292a136f86d94d99596ff45426ea Mon Sep 17 00:00:00 2001
From: KaiHoo <kaiorhu@gmail.com>
Date: Fri, 9 Dec 2022 00:44:55 -0500
Subject: [PATCH 3/6] update README

---
 configs/recognition/timesformer/README.md    | 10 +++++-----
 configs/recognition/timesformer/metafile.yml |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/configs/recognition/timesformer/README.md b/configs/recognition/timesformer/README.md
index c7f5f439db..563feb0cb8 100644
--- a/configs/recognition/timesformer/README.md
+++ b/configs/recognition/timesformer/README.md
@@ -20,11 +20,11 @@ We present a convolution-free approach to video classification built exclusively
 
 ### Kinetics-400
 
-| frame sampling strategy | resolution | gpus |        backbone         |   pretrain   | top1 acc | top5 acc | FLOPs | params |               config                |               ckpt                |                log                |
-| :---------------------: | :--------: | :--: | :---------------------: | :----------: | :------: | :------: | :---: | :----: | :---------------------------------: | :-------------------------------: | :-------------------------------: |
-|         8x32x1          |  224x224   |  8   |   TimeSformer (divST)   | ImageNet-21K |  77.69   |  93.45   | 588G  |  122M  | [config](/configs/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-a4d0d01f.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
-|         8x32x1          |  224x224   |  8   |  TimeSformer (jointST)  | ImageNet-21K |  76.95   |  93.28   | 539G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-8022d1c0.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
-|         8x32x1          |  224x224   |  8   | TimeSformer (spaceOnly) | ImageNet-21K |  76.93   |  92.88   | 422G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb_20220815-78f05367.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+| frame sampling strategy | resolution | gpus |        backbone         |   pretrain   | top1 acc | top5 acc | testing protocol | FLOPs | params |               config                |               ckpt                |                log                |
+| :---------------------: | :--------: | :--: | :---------------------: | :----------: | :------: | :------: | :---: | :----: | :---------------------------------: | :-------------------------------: | :-------------------------------: |:---: |:---: |
+|         8x32x1          |  224x224   |  8   |   TimeSformer (divST)   | ImageNet-21K |  77.69   |  93.45   | 1 clips x 3 crop |196G  |  122M  | [config](/configs/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-a4d0d01f.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+|         8x32x1          |  224x224   |  8   |  TimeSformer (jointST)  | ImageNet-21K |  76.95   |  93.28   |1 clips x 3 crop | 180G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-8022d1c0.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+|         8x32x1          |  224x224   |  8   | TimeSformer (spaceOnly) | ImageNet-21K |  76.93   |  92.88   | 1 clips x 3 crop |141G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb_20220815-78f05367.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.log) |
 
 1. The **gpus** indicates the number of gpus we used to get the checkpoint. If you want to use a different number of gpus or videos per gpu, the best way is to set `--auto-scale-lr` when calling `tools/train.py`, this parameter will auto-scale the learning rate according to the actual batch size and the original batch size.
 2. We keep the test setting with the [original repo](https://github.com/facebookresearch/TimeSformer) (three crop x 1 clip).
diff --git a/configs/recognition/timesformer/metafile.yml b/configs/recognition/timesformer/metafile.yml
index 83e4e64dfe..f144b647e3 100644
--- a/configs/recognition/timesformer/metafile.yml
+++ b/configs/recognition/timesformer/metafile.yml
@@ -15,7 +15,7 @@ Models:
       Epochs: 15
       Pretrained: ImageNet-21K
       Resolution: 224x224
-      FLOPs: 588G
+      FLOPs: 196G
       params: 122M
       Training Data: Kinetics-400
       Training Resources: 8 GPUs
@@ -38,7 +38,7 @@ Models:
       Epochs: 15
       Pretrained: ImageNet-21K
       Resolution: 224x224
-      FLOPs: 539G
+      FLOPs: 180G
       params: 86.11M
       Training Data: Kinetics-400
       Training Resources: 8 GPUs
@@ -61,7 +61,7 @@ Models:
       Epochs: 15
       Pretrained: ImageNet-21K
       Resolution: 224x224
-      FLOPs: 422G
+      FLOPs: 141G
       params: 86.11M
       Training Data: Kinetics-400
       Training Resources: 8 GPUs

From 918d1acb901e9ab2ce138359d1bae04c7c75cb90 Mon Sep 17 00:00:00 2001
From: KaiHoo <kaiorhu@gmail.com>
Date: Fri, 9 Dec 2022 00:47:39 -0500
Subject: [PATCH 4/6] update README

---
 configs/recognition/timesformer/README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/configs/recognition/timesformer/README.md b/configs/recognition/timesformer/README.md
index 563feb0cb8..91de635b53 100644
--- a/configs/recognition/timesformer/README.md
+++ b/configs/recognition/timesformer/README.md
@@ -20,11 +20,11 @@ We present a convolution-free approach to video classification built exclusively
 
 ### Kinetics-400
 
-| frame sampling strategy | resolution | gpus |        backbone         |   pretrain   | top1 acc | top5 acc | testing protocol | FLOPs | params |               config                |               ckpt                |                log                |
-| :---------------------: | :--------: | :--: | :---------------------: | :----------: | :------: | :------: | :---: | :----: | :---------------------------------: | :-------------------------------: | :-------------------------------: |:---: |:---: |
-|         8x32x1          |  224x224   |  8   |   TimeSformer (divST)   | ImageNet-21K |  77.69   |  93.45   | 1 clips x 3 crop |196G  |  122M  | [config](/configs/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-a4d0d01f.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
-|         8x32x1          |  224x224   |  8   |  TimeSformer (jointST)  | ImageNet-21K |  76.95   |  93.28   |1 clips x 3 crop | 180G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-8022d1c0.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
-|         8x32x1          |  224x224   |  8   | TimeSformer (spaceOnly) | ImageNet-21K |  76.93   |  92.88   | 1 clips x 3 crop |141G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb_20220815-78f05367.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+| frame sampling strategy | resolution | gpus |        backbone         |   pretrain   | top1 acc | top5 acc | testing protocol | FLOPs | params |             config             |             ckpt             |             log             |
+| :---------------------: | :--------: | :--: | :---------------------: | :----------: | :------: | :------: | :--------------: | :---: | :----: | :----------------------------: | :--------------------------: | :-------------------------: |
+|         8x32x1          |  224x224   |  8   |   TimeSformer (divST)   | ImageNet-21K |  77.69   |  93.45   | 1 clips x 3 crop | 196G  |  122M  | [config](/configs/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-a4d0d01f.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+|         8x32x1          |  224x224   |  8   |  TimeSformer (jointST)  | ImageNet-21K |  76.95   |  93.28   | 1 clips x 3 crop | 180G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-8022d1c0.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+|         8x32x1          |  224x224   |  8   | TimeSformer (spaceOnly) | ImageNet-21K |  76.93   |  92.88   | 1 clips x 3 crop | 141G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb_20220815-78f05367.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.log) |
 
 1. The **gpus** indicates the number of gpus we used to get the checkpoint. If you want to use a different number of gpus or videos per gpu, the best way is to set `--auto-scale-lr` when calling `tools/train.py`, this parameter will auto-scale the learning rate according to the actual batch size and the original batch size.
 2. We keep the test setting with the [original repo](https://github.com/facebookresearch/TimeSformer) (three crop x 1 clip).

From c7fcf5a83d8becfb5e950e182e5d6e8ad20cae0b Mon Sep 17 00:00:00 2001
From: KaiHoo <kaiorhu@gmail.com>
Date: Fri, 9 Dec 2022 00:48:18 -0500
Subject: [PATCH 5/6] update README

---
 configs/recognition/timesformer/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configs/recognition/timesformer/README.md b/configs/recognition/timesformer/README.md
index 91de635b53..df197e0ba9 100644
--- a/configs/recognition/timesformer/README.md
+++ b/configs/recognition/timesformer/README.md
@@ -22,9 +22,9 @@ We present a convolution-free approach to video classification built exclusively
 
 | frame sampling strategy | resolution | gpus |        backbone         |   pretrain   | top1 acc | top5 acc | testing protocol | FLOPs | params |             config             |             ckpt             |             log             |
 | :---------------------: | :--------: | :--: | :---------------------: | :----------: | :------: | :------: | :--------------: | :---: | :----: | :----------------------------: | :--------------------------: | :-------------------------: |
-|         8x32x1          |  224x224   |  8   |   TimeSformer (divST)   | ImageNet-21K |  77.69   |  93.45   | 1 clips x 3 crop | 196G  |  122M  | [config](/configs/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-a4d0d01f.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
-|         8x32x1          |  224x224   |  8   |  TimeSformer (jointST)  | ImageNet-21K |  76.95   |  93.28   | 1 clips x 3 crop | 180G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-8022d1c0.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
-|         8x32x1          |  224x224   |  8   | TimeSformer (spaceOnly) | ImageNet-21K |  76.93   |  92.88   | 1 clips x 3 crop | 141G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb_20220815-78f05367.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+|         8x32x1          |  224x224   |  8   |   TimeSformer (divST)   | ImageNet-21K |  77.69   |  93.45   | 1 clip x 3 crop  | 196G  |  122M  | [config](/configs/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-a4d0d01f.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+|         8x32x1          |  224x224   |  8   |  TimeSformer (jointST)  | ImageNet-21K |  76.95   |  93.28   | 1 clip x 3 crop  | 180G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb_20220815-8022d1c0.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_jointST_8xb8-8x32x1-15e_kinetics400-rgb.log) |
+|         8x32x1          |  224x224   |  8   | TimeSformer (spaceOnly) | ImageNet-21K |  76.93   |  92.88   | 1 clip x 3 crop  | 141G  | 86.11M | [config](/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py) | [ckpt](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb_20220815-78f05367.pth) | [log](https://download.openmmlab.com/mmaction/v1.0/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.log) |
 
 1. The **gpus** indicates the number of gpus we used to get the checkpoint. If you want to use a different number of gpus or videos per gpu, the best way is to set `--auto-scale-lr` when calling `tools/train.py`, this parameter will auto-scale the learning rate according to the actual batch size and the original batch size.
 2. We keep the test setting with the [original repo](https://github.com/facebookresearch/TimeSformer) (three crop x 1 clip).

From 28179461d740b5b6b27c9aeec5e6056f94a73c66 Mon Sep 17 00:00:00 2001
From: KaiHoo <kaiorhu@gmail.com>
Date: Mon, 12 Dec 2022 19:38:53 -0500
Subject: [PATCH 6/6] rm file args

---
 .../timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py  | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py b/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py
index 2b1bc559e5..e4379bee0c 100644
--- a/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py
+++ b/configs/recognition/timesformer/timesformer_spaceOnly_8xb8-8x32x1-15e_kinetics400-rgb.py
@@ -35,10 +35,6 @@
 ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt'
 ann_file_test = 'data/kinetics400/kinetics400_val_list_videos.txt'
 
-# file_client_args = dict(
-#      io_backend='petrel',
-#      path_mapping=dict(
-#          {'data/kinetics400': 's3://openmmlab/datasets/action/Kinetics400'}))
 file_client_args = dict(io_backend='disk')
 
 train_pipeline = [