index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="description"
        content="Where2comm: Communication-Efficient Collaborative Perception via Spatial Confidence Maps">
  <meta name="keywords" content="3D detection, Collaborative perception, Communication-Efficient">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <!-- <title>AirDet</title> -->

  <!-- Global site tag (gtag.js) - Google Analytics -->
  <!-- <script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script>
  <script>
    window.dataLayer = window.dataLayer || [];

    function gtag() {
      dataLayer.push(arguments);
    }

    gtag('js', new Date());

    gtag('config', 'G-PYVRSFMDRL');
  </script> -->
  <link rel="icon" type="image/png" href="./static/images/cmic.jpeg"> 
  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
        rel="stylesheet">

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
        href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="./static/css/index.css">
  <link rel="icon" href="./static/images/cmic.jpeg">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script defer src="./static/js/fontawesome.all.min.js"></script>
  <script src="./static/js/bulma-carousel.min.js"></script>
  <script src="./static/js/bulma-slider.min.js"></script>
  <script src="./static/js/index.js"></script>
</head>
<body>

<nav class="navbar" role="navigation" aria-label="main navigation">
  <div class="navbar-brand">
    <a role="button" class="navbar-burger" aria-label="menu" aria-expanded="false">
      <span aria-hidden="true"></span>
      <span aria-hidden="true"></span>
      <span aria-hidden="true"></span>
    </a>
  </div>
  <!-- <div class="navbar-menu">
    <div class="navbar-start" style="flex-grow: 1; justify-content: center;">
      <a class="navbar-item" href="https://jaraxxus-me.github.io/">
      <span class="icon">
          <i class="fas fa-home"></i>
      </span>
      </a>

      <div class="navbar-item has-dropdown is-hoverable">
        <a class="navbar-link">
          More Research
        </a>
        <div class="navbar-dropdown">
          <a class="navbar-item" href="https://ieeexplore.ieee.org/document/9561564">
            ADTrack - ICRA 2021
          </a>
          <a class="navbar-item" href="https://openaccess.thecvf.com/content/ICCV2021/papers/Cao_HiFT_Hierarchical_Feature_Transformer_for_Aerial_Tracking_ICCV_2021_paper.pdf">
            HiFT - ICCV 2021
          </a>
        </div>
      </div>
    </div> -->

  </div>
</nav>


<section class="hero">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <div class="columns is-centered">
        <div class="column has-text-centered">
          <!-- <h1 class="title is-1 publication-title"><img src="./static/images/drone.svg" width="120">AirDet&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</h1> -->
          <h1 class="title is-2 publication-title">Where2comm: Communication-Efficient Collaborative Perception via Spatial Confidence Maps</h1>
          <div class="column is-full_width">
            <h2 class="title is-4">Accepted by NeurIPS 2022 Spotlight</h2>
          </div>
          <div class="is-size-5 publication-authors">
            <span class="author-block">
              <a href="https://scholar.google.com/citations?user=XBbwb78AAAAJ&hl=zh-CN">Yue Hu</a><sup>1</sup>,</span>
            <span class="author-block">
              <a href="https://github.com/dongfeng12">Shaoheng Fang</a><sup>1</sup>,</span>
            <span class="author-block">
              <a href="https://chezacar.github.io/">Zixing Lei</a><sup>1</sup>,
            </span>
            <span class="author-block">
              <a href="https://github.com/Kay1794">Yiqi Zhong</a><sup>2</sup>,
            </span>
            <span class="author-block">
              <a href="https://siheng-chen.github.io/">Siheng Chen</a><sup>1</sup>
            </span>

          </div>

          <div class="is-size-5 publication-authors">
            <span class="author-block"><sup>1</sup>Cooperative Medianet Innovation Center, Shanghai Jiao Tong University</span>
          </div>
          <div class="is-size-5 publication-authors">
            <span class="author-block"><sup>2</sup>Department of Engineering Science, University of Southern California</span>
          </div>

          <div class="column has-text-centered">
            <div class="publication-links">
              <!-- PDF Link. -->
              <span class="link-block">
                <a href="https://arxiv.org/abs/2209.12836"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fas fa-file-pdf"></i>
                  </span>
                  <span>Paper</span>
                </a>
              </span>
              <span class="link-block">
                <a href="https://arxiv.org/abs/2209.12836"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="ai ai-arxiv"></i>
                  </span>
                  <span>arXiv</span>
                </a>
              </span>
              <!-- Video Link. -->
              <!-- <span class="link-block">
                <a href="https://www.youtube.com/watch?v=MrKrnHhk8IA"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-youtube"></i>
                  </span>
                  <span>Video</span>
                </a>
              </span> -->
              <!-- Code Link. -->
              <span class="link-block">
                <a href="https://github.com/MediaBrain-SJTU/Where2comm"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-github"></i>
                  </span>
                  <span>Code</span>
                  </a>
              </span>
              <!-- Video Link. -->
              <span class="link-block">
                <a href="https://www.youtube.com/watch?v=xl1zgGy1GA0"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-youtube"></i>
                  </span>
                  <span>Youtube</span>
                </a>
              </span>
              <!-- Video Link. -->
              <span class="link-block">
                <a href="https://www.bilibili.com/video/BV1Je4y1g7sP/?spm_id_from=333.999.0.0&vd_source=5ecff8efbea6e8fdc7e8e1f75575f7a5"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-youtube"></i>
                  </span>
                  <span>Bilibili</span>
                </a>
              </span>
              <!-- Dataset Link. -->
              <!-- <span class="link-block">
                <a href="https://github.com/Jaraxxus-Me/AirDet"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="far fa-images"></i>
                  </span>
                  <span>Data</span>
                </a>
              </span> -->
              <!-- <span class="link-block">
                <a href="https://github.com/Jaraxxus-Me/AirDet_ROS"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fas fa-robot"></i>
                  </span>
                  <span>ROS</span>
                </a>
              </span> -->
              <!-- <span class="link-block">
                <a href="https://zhuanlan.zhihu.com/p/545249730"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fas fa-blog"></i>
                  </span>
                  <span>Blog</span>
                </a>
              </span> -->
            </div>

          </div>
        </div>
      </div>
    </div>
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
    <!-- Paper video. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Video</h2>
        <div class="publication-video">
          <iframe width="951" height="535" src="https://www.youtube.com/embed/xl1zgGy1GA0" title="[NeurIPS 2022] Where2comm: Communication-Efficient Collaborative Perception" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
        </div>
      </div>
    </div>
    <!--/ Paper video. -->
  </div>
</section>

<section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">

      <img src="static\images\Intro.png" class="center"/>
      <!-- <video id="teaser" autoplay muted loop height="100%">
        <source src="./static/images/SUBT_video.mp4"
                type="video/mp4">
      </video> -->
      <!-- <img class="rounded" src="./media/nice-slam/teaser.png" > -->
      <br><br><br>
      <!-- <h2 class="subtitle has-text-centered">
        
    </h2> -->
    <!-- <h2 class="subtitle has-text-centered">
      (The <span style="color:#000000;">black</span> / <span style="color:#ff0000;">red</span> lines are the ground truth / predicted camera trajectory)
    </h2> -->
    <h2 class="is-size-6 has-text-centered">Collaborative perception could contribute to safety-critical scenarios, where the white car and the red car may collide due to occlusion. This collision could be avoided when the blue car can share a message about the red car's position. Such a message is spatially sparse, yet perceptually critical. Considering the precious communication bandwidth, each agent needs to speak to the point!</h2>
    
    </div>
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
        <!-- Paper video. -->
        <!-- <div class="columns is-centered has-text-centered">
          <div class="column is-four-fifths">
            <h2 class="title is-3">Video</h2>
            <div class="publication-video">
              <iframe src="https://www.youtube.com/embed/V5hYTz5os0M?rel=0&amp;showinfo=0"
                      frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
            </div>
          </div>
        </div> -->
        <!--/ Paper video. -->
      
    <!-- <br> -->
    <!-- Abstract. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            Multi-agent collaborative perception could significantly upgrade the perception performance by enabling agents to share complementary information with each other through communication. It inevitably results in a fundamental trade-off between perception performance and communication bandwidth. To tackle this bottleneck issue, we propose a spatial confidence map, which reflects the spatial heterogeneity of perceptual information. It empowers agents to only share spatially sparse, yet perceptually critical information, contributing to where to communicate.</p>
        </div>
      </div>
    </div>
    <!--/ Abstract. -->
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
    <!-- <br> -->
    <!-- Abstract. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Contribution</h2>
        <div class="content has-text-justified">
          <li>
            We propose a novel fine-grained spatial-aware communication strategy, where each agent can decide where to communicate and pack messages only related to the most perceptually critical spatial areas. This strategy not only enables more precise support for other agents, but also more targeted request from other agents in multi-round communication.</li>
          <li>
            We propose Where2comm, a novel collaborative perception framework based on the spatial-aware communication strategy. With the guidance of the proposed spatial confidence map, Where2comm leverages novel message packing and communication graph learning to  achieve lower communication bandwidth, and adopts confidence-aware multi-head attention to reach better perception performance.</li>
          <li>
            We conduct extensive experiments to validate Where2comm achieves state-of-the-art performance-bandwidth trade-off on multiple challenging real/simulated datasets across views and modalities. </li>         
        </div>
      </div>
    </div>
    <!--/ Abstract. -->
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
    <!-- Method. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-full_width">
        <hr>
        <h2 class="title is-3">Method</h2>
        <br>
        <img src="static\images\system-final.png" class="center"/>
        <div class="content has-text-justified">
          <br>  
          <p>
            Framework of Where2comm, a multi-round, multi-modality, multi-agent collaborative perception framework based on a spatial-confidence-aware communication strategy. Where2comm includes an observation encoder, a spatial confidence generator, the spatial confidence-aware communication module, the spatial confidence-aware message fusion module and a detection decoder. Among five modules, the proposed <strong>spatial confidence generator</strong> generates the spatial confidence map. Based on this spatial confidence map, the proposed <strong>spatial confidence-aware communication</strong> generates compact messages and sparse communication graphs to save communication bandwidth; and the proposed <strong>spatial confidence-aware message fusion</strong> module leverages informative spatial confidence priors to achieve better aggregation.</div>
      </div>
    </div>
    <hr>

    
    <!-- Applications.-->
    <div class="columns is-centered has-text-centered">
      <div class="column is-full_width">
        <h2 class="title is-3">Qualitative&nbsp;&nbsp;Results</h2>
      </div>
    </div>

    <!-- <div class="column is-full_width">
      <img src=static\images\dair_3d_18.gif>
    </div>
    <div class="content has-text-justified">
        <br>  
        <p>
          Where2comm qualitatively outperforms single agent detection. 
        </p>
        </p>
      </div>
    </div> -->
    <div class="columns is-centered has-text-centered">
    </div>
    <div class="column is-full_width">
      <img src=static\images\QualtativeResults.png >
    </div>
    <div class="content has-text-justified">
        <br>  
        <p>
          Where2comm qualitatively outperforms When2com and DiscoNet in DAIR-V2X dataset. <span style="color:#00CC66;">Green</span> and <span style="color:#ff0000;">Red</span> boxes denote ground-truth and detection, respectively. <span style="color:#ffd700;">Yellow</span> and <span style="color:#00ced1;">blue</span> denote the point clouds collected from vehicle and infrastructure, respectively. 
        </p>
        </p>
      </div>
    </div>
    <br><br>
    <hr>
    <!-- <h3 class="title is-4">Attention of Detection Head</h3> -->
    <!-- <div class="column is-full_width">
      <table><tr>
        <td><img src=static\images\single_bev_00019.png style="margin-right: 30px;"><figcaption><center>No Collaboration</center></figcaption></td>
        <td><img src=static\images\when2com_bev_00019.png style="margin-right: 30px;"><figcaption><center>When2com</center></figcaption></td>
        <td><img src=static\images\disconet_bev_00019.png style="margin-right: 30px;"><figcaption><center>DiscoNet</center></figcaption></td>
        <td><img src=static\images\where2comm_bev_00019.png style="margin-right: 30px;"><figcaption><center>Where2comm</center></figcaption></td>
        </tr></table>        
      
    </div> -->

    <!-- <div class="row">
        <div class="col-md-4">
        <img src="static\images\single_bev_00019.png" alt="agent_0" style="width:30%">
        <figcaption>No Collaboration</figcaption>
        </div>
        <div class="col-md-4">
        <img src="static\images\when2com_bev_00019.png" alt="agent_1" style="width:30%">
        <figcaption><center>When2com</center></figcaption>
        </div>
        <div class="col-md-4">
        <img src="static\images\disconet_bev_00019.png" alt="agent_2" style="width:30%">
        <figcaption>DiscoNet</figcaption>
        </div>
        <div class="col-md-4">
        <img src="static\images\where2comm_bev_00019.png" alt="agent_2" style="width:30%">
        <figcaption>DiscoNet</figcaption>
        </div>
    </div> -->
    
   
</div>
</section>


<section class="section" id="BibTeX">
  <div class="container is-max-desktop content">
    <h2 class="title">BibTeX</h2>
    <pre><code>@article{Hu22Where2comm,
      author    = {Yue Hu, Shaoheng Fang, Zixing Lei, Yiqi Zhong, Siheng Chen},
      title     = {Where2comm: Communication-Efficient Collaborative Perception via Spatial Confidence Maps},
      booktitle={Thirty-sixth Conference on Neural Information Processing Systems (NeurIPS)}
      month     = {November},  
      year      = {2022}
  }</code></pre>
  </div>
</section>


<!-- <section class="section" id="Acknowledgements">
  <div class="container is-max-desktop content">
    <h2 class="title">Acknowledgements</h2>
    The work was done when Bowen Li and Pranay Reddy were interns at The Robotics Institute, CMU. The authors would like to thank all members of the Team Explorer for providing data collected from the DARPA Subterranean Challenge. Our code is built upon <a href="https://github.com/fanq15/FewX">FewX</a>, for which we sincerely express our gratitute to the authors.
  </div>
</section> -->


<footer class="footer">
  <div class="container">
    <div class="content has-text-centered">
    </div>
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">
          <p>
            This website is licensed under a <a rel="license"
                                                href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
            Commons Attribution-ShareAlike 4.0 International License</a>.
            This webpage template is from <a href="https://github.com/nerfies/nerfies.github.io">Nerfies</a>. 
            We sincerely thank <a href="https://keunhong.com/">Keunhong Park</a> for developing and open-sourcing this template.
          </p>
        </div>
      </div>
          </p>
        </div>
      </div>
    </div>
  </div>
</footer>

</body>
</html>