first commit

jeffsonyu · Mar 20, 2023 · 7aaeb66 · 7aaeb66
commit 7aaeb66
Show file tree

Hide file tree

Showing 506 changed files with 66,943 additions and 0 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+. filter=lfs diff=lfs merge=lfs -text
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Songyou Peng, Michael Niemeyer, Lars Mescheder, Marc Pollefeys, Andreas Geiger
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,75 @@
+# Visual-Tactile Sensing for In-Hand Object Reconstruction
+[**Paper**] | [**Project Page**](https://sites.google.com/view/vtaco) <br>
+
+<div style="text-align: center">
+<img src="media/VTacO.png" width="1000"/>
+</div>
+
+This repository contains the implementation of the paper:
+
+**Visual-Tactile Sensing for In-Hand Object Reconstruction**  
+Wenqiang Xu*, Zhenjun Yu*, Han Xue, Ruolin Ye, Siqiong Yao, Cewu Lu (* = Equal contribution)  
+**CVPR 2023**  
+
+## Installation
+First you have to make sure that you have all dependencies in place.
+The simplest way to do so, is to use [anaconda](https://www.anaconda.com/). 
+
+You can create an anaconda environment called `vtaco` using
+```
+conda env create -f environment.yaml
+conda activate vtaco
+```
+**Note**: you might need to install **torch-scatter** mannually following [the official instruction](https://github.com/rusty1s/pytorch_scatter#pytorch-140):
+```
+pip install torch-scatter==2.0.4 -f https://pytorch-geometric.com/whl/torch-1.4.0+cu101.html
+```
+
+Next, compile the extension modules.
+You can do this via
+```
+python setup.py build_ext --inplace
+```
+
+## Dataset
+<!-- For downloading the training and testing dataset for VTacO and VTacOH, you can simply run the following command to download our preprocessed dataset:
+
+```
+bash scripts/download_data.sh
+```
+
+This script should download and unpack the data automatically into the `data/` folder, which should look like:
+```
+VTacO
+├── data
+│   ├── VTacO_AKB_class
+    │   │   │── 001
+    │   │   │   |── $class_name
+    │   │   │   |── metadata.yaml
+    │   │   │── 002
+    │   │   │── ...
+    │   │   │── 007
+    ├── VTacO_YCB
+    │   │   │── YCB
+    │   │   │── metadata.yaml
+    ├── VTacO_mesh
+    │   │   │── mesh
+    │   │   │── mesh_obj
+    │   │   │── depth_origin.txt
+``` -->
+We will soon release the dataset!
+
+## Training
+To train the Depth Estimator $U_I(\cdot)$ and the sensor pose estimator, we provide a config file `configs/tactile/tactile_test.yaml`, you can run the following command to train from scratch:
+```
+python train_depth.py configs/tactile/tactile_test.yaml
+```
+
+With the pretrained model of $U_I(\cdot)$ and the sensor pose estimator, examples for training VTacO or VTacOH are as follows: 
+```
+python train.py configs/VTacO/VTacO_AKB_001.yaml
+python train.py configs/VTacOH/VTacOH_AKB_001.yaml
+```
+**Note**: you might need to change *path* in *data*, and *model_file* in *encoder_t2d_kwargs* of the config file, to your data path and pretrained model path.  
+
+All the results will be saved in `out/` folder, including checkpoints, visualization results and logs for tensorboard.
diff --git a/.../lib.linux-x86_64-3.6/src/utils/libkdtree/pykdtree/kdtree.cpython-36m-x86_64-linux-gnu.so b/.../lib.linux-x86_64-3.6/src/utils/libkdtree/pykdtree/kdtree.cpython-36m-x86_64-linux-gnu.so
diff --git a/build/lib.linux-x86_64-3.6/src/utils/libmcubes/mcubes.cpython-36m-x86_64-linux-gnu.so b/build/lib.linux-x86_64-3.6/src/utils/libmcubes/mcubes.cpython-36m-x86_64-linux-gnu.so
diff --git a/build/lib.linux-x86_64-3.6/src/utils/libmesh/triangle_hash.cpython-36m-x86_64-linux-gnu.so b/build/lib.linux-x86_64-3.6/src/utils/libmesh/triangle_hash.cpython-36m-x86_64-linux-gnu.so
diff --git a/build/lib.linux-x86_64-3.6/src/utils/libmise/mise.cpython-36m-x86_64-linux-gnu.so b/build/lib.linux-x86_64-3.6/src/utils/libmise/mise.cpython-36m-x86_64-linux-gnu.so
diff --git a/.../lib.linux-x86_64-3.6/src/utils/libsimplify/simplify_mesh.cpython-36m-x86_64-linux-gnu.so b/.../lib.linux-x86_64-3.6/src/utils/libsimplify/simplify_mesh.cpython-36m-x86_64-linux-gnu.so
diff --git a/build/lib.linux-x86_64-3.6/src/utils/libvoxelize/voxelize.cpython-36m-x86_64-linux-gnu.so b/build/lib.linux-x86_64-3.6/src/utils/libvoxelize/voxelize.cpython-36m-x86_64-linux-gnu.so
diff --git a/build/temp.linux-x86_64-3.6/src/utils/libkdtree/pykdtree/_kdtree_core.o b/build/temp.linux-x86_64-3.6/src/utils/libkdtree/pykdtree/_kdtree_core.o
diff --git a/build/temp.linux-x86_64-3.6/src/utils/libkdtree/pykdtree/kdtree.o b/build/temp.linux-x86_64-3.6/src/utils/libkdtree/pykdtree/kdtree.o
diff --git a/build/temp.linux-x86_64-3.6/src/utils/libmcubes/marchingcubes.o b/build/temp.linux-x86_64-3.6/src/utils/libmcubes/marchingcubes.o
diff --git a/build/temp.linux-x86_64-3.6/src/utils/libmcubes/mcubes.o b/build/temp.linux-x86_64-3.6/src/utils/libmcubes/mcubes.o
diff --git a/build/temp.linux-x86_64-3.6/src/utils/libmcubes/pywrapper.o b/build/temp.linux-x86_64-3.6/src/utils/libmcubes/pywrapper.o
diff --git a/build/temp.linux-x86_64-3.6/src/utils/libmesh/triangle_hash.o b/build/temp.linux-x86_64-3.6/src/utils/libmesh/triangle_hash.o
diff --git a/build/temp.linux-x86_64-3.6/src/utils/libmise/mise.o b/build/temp.linux-x86_64-3.6/src/utils/libmise/mise.o
diff --git a/build/temp.linux-x86_64-3.6/src/utils/libsimplify/simplify_mesh.o b/build/temp.linux-x86_64-3.6/src/utils/libsimplify/simplify_mesh.o
diff --git a/build/temp.linux-x86_64-3.6/src/utils/libvoxelize/voxelize.o b/build/temp.linux-x86_64-3.6/src/utils/libvoxelize/voxelize.o
diff --git a/build/temp.win-amd64-3.9/Release/src/utils/libkdtree/pykdtree/_kdtree_core.obj b/build/temp.win-amd64-3.9/Release/src/utils/libkdtree/pykdtree/_kdtree_core.obj
diff --git a/configs/VTacO/VTacO_AKB_001.yaml b/configs/VTacO/VTacO_AKB_001.yaml
@@ -0,0 +1,126 @@
+method: vtaco
+data:
+  input_type: pointcloud
+  classes: null
+  path: ./data/VTacO_AKB_class/001
+  pointcloud_n: 3000
+  pointcloud_noise: 0.005
+  points_subsample: 100000
+  num_sample: 2048
+  points_file: points.npz
+  points_iou_file: points.npz
+  voxels_file: null
+  pointcloud_file: pointcloud.npz
+  points_unpackbits: False
+
+model:
+  train_tactile: False
+  with_img: True
+  with_contact: False
+
+  encoder: pointnet_local_pool
+  encoder_kwargs:
+    hidden_dim: 32
+    plane_type: 'grid'
+    grid_resolution: 64
+    unet3d: True
+    unet3d_kwargs:
+      num_levels: 4
+      f_maps: 32
+      in_channels: 32
+      out_channels: 32
+
+  encoder_hand: pointnet_local_pool
+  encoder_hand_kwargs:
+    hidden_dim: 32
+    plane_type: ['xz', 'xy', 'yz']
+    plane_resolution: 32
+    unet: True
+    unet_kwargs:
+      depth: 4
+      merge_mode: concat
+      start_filts: 32
+
+    out_mano: True
+    out_dim: 51
+    manolayer_kwargs: &manolayer_k
+      center_idx: 9
+      flat_hand_mean: False
+      ncomps: 45
+      side: right
+      mano_root: src/encoder/assets/mano
+      use_pca: False
+      root_rot_mode: axisang
+      joint_rot_mode: axisang
+      robust_rot: False
+      return_transf: False
+      return_full_pose: True
+
+  encoder_img: Resnet18
+  encoder_img_kwargs:
+    num_classes: 32
+
+  encoder_t2d: True
+  encoder_t2d_kwargs:
+    pretrained: True
+    model_file: ../../tactile/test/model_best.pt
+
+    encoder_img: UNet
+    encoder_img_kwargs:
+      num_classes: 1
+      in_channel: 3
+      start_filts: 32
+      depth: 3
+
+    encoder_hand: pointnet_local_pool
+    encoder_hand_kwargs:
+      c_dim: 512
+      hidden_dim: 32
+      plane_type: ['xz', 'xy', 'yz']
+      plane_resolution: 64
+      unet: True
+      unet_kwargs:
+        depth: 4
+        merge_mode: concat
+        start_flits: 32
+
+      out_mano: True
+      out_dim: 30
+      manolayer_kwargs: *manolayer_k
+
+
+  decoder: simple_local
+  decoder_kwargs:
+    sample_mode: bilinear # bilinear / nearest
+    hidden_size: 32
+  c_dim: 32
+
+training:
+  out_dir: out/VTacO/AKB_001
+  opt: Adam
+  lr: 0.0001
+  gpu: 2
+  batch_size: 4
+  model_selection_metric: iou
+  model_selection_mode: maximize
+  print_every: 100
+  visualize_every: 1
+  validate_every: 1
+  checkpoint_every: 2000
+  backup_every: 10000
+  n_workers: 8
+  n_workers_val: 4
+
+test:
+  threshold: 0.5
+  eval_mesh: true
+  eval_pointcloud: False
+  # model_file: ../AKB_all/model_best.pt
+  model_file: model.pt
+
+generation:
+  vis_all: True
+  refine: false
+  n_x: 128
+  n_z: 1
+  alpha: 0.2
diff --git a/configs/VTacOH/VTacOH_AKB_001.yaml b/configs/VTacOH/VTacOH_AKB_001.yaml
@@ -0,0 +1,98 @@
+method: conv_onet
+data:
+  input_type: pointcloud
+  classes: null
+  path: ./data/VTacO_AKB_class/001
+  pointcloud_n: 3000
+  pointcloud_noise: 0.005
+  points_subsample: 100000
+  num_sample: 2048
+  points_file: points.npz
+  points_iou_file: points.npz
+  voxels_file: null
+  pointcloud_file: pointcloud.npz
+  points_unpackbits: False
+
+model:
+  train_tactile: False
+  with_img: True
+  with_contact: False
+
+  encoder: pointnet_local_pool
+  encoder_kwargs:
+    hidden_dim: 32
+    plane_type: 'grid'
+    grid_resolution: 64
+    unet3d: True
+    unet3d_kwargs:
+      num_levels: 4
+      f_maps: 32
+      in_channels: 32
+      out_channels: 32
+
+  encoder_hand: pointnet_local_pool
+  encoder_hand_kwargs:
+    hidden_dim: 32
+    plane_type: ['xz', 'xy', 'yz']
+    plane_resolution: 32
+    unet: True
+    unet_kwargs:
+      depth: 4
+      merge_mode: concat
+      start_filts: 32
+    out_mano: True
+    out_dim: 51
+    manolayer_kwargs:
+      center_idx: 9
+      flat_hand_mean: False
+      ncomps: 45
+      side: right
+      mano_root: src/encoder/assets/mano
+      use_pca: False
+      root_rot_mode: axisang
+      joint_rot_mode: axisang
+      robust_rot: False
+      return_transf: False
+      return_full_pose: True
+
+  encoder_img: Resnet18
+  encoder_img_kwargs:
+    num_classes: 32
+
+  encoder_t2d: False
+  encoder_t2d_kwargs: False
+
+  decoder: simple_local
+  decoder_kwargs:
+    sample_mode: bilinear # bilinear / nearest
+    hidden_size: 32
+  c_dim: 32
+
+training:
+  out_dir: out/VTacOH/AKB_001
+  opt: Adam
+  lr: 0.0001
+  gpu: 0
+  batch_size: 6
+  model_selection_metric: iou
+  model_selection_mode: maximize
+  print_every: 100
+  visualize_every: 1
+  validate_every: 1
+  checkpoint_every: 3000
+  backup_every: 10000
+  n_workers: 8
+  n_workers_val: 4
+test:
+  threshold: 0.5
+  eval_mesh: true
+  eval_pointcloud: false
+  # model_file: ../AKB_all/model_best.pt
+  model_file: model.pt
+generation:
+  vis_all: True
+  vis_n_outputs: 168
+  refine: false
+  n_x: 128
+  n_z: 1
+  alpha: 0.2