index.html

<!DOCTYPE html>
<html>

<head>
  <!-- Basic -->
  <meta charset="utf-8" />
  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
  <!-- Mobile Metas -->
  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
  <!-- Site Metas -->
  <meta name="keywords" content="" />
  <meta name="description" content="" />
  <meta name="author" content="" />

  <title>Security of CodeLMs</title>

  <!-- slider stylesheet -->
  <!-- slider stylesheet -->
  <link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/OwlCarousel2/2.3.4/assets/owl.carousel.min.css" />

  <!-- bootstrap core css -->
  <link rel="stylesheet" type="text/css" href="htmls/css/bootstrap.css" />

  <!-- fonts style -->
  <link href="https://fonts.googleapis.com/css?family=Open+Sans:400,700|Poppins:400,700&display=swap" rel="stylesheet">
  <!-- Custom styles for this template -->
  <link href="htmls/css/style.css" rel="stylesheet" />
  <!-- responsive style -->
  <link href="htmls/css/responsive.css" rel="stylesheet" />
</head>

<body>
  <div class="hero_area">
    <!-- header section strats -->
    <header class="header_section">
      <div class="container-fluid">
        <!--  container-fluid-->
        <nav class="navbar navbar-expand-lg custom_nav-container pt-3">
          <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
            <span class="navbar-toggler-icon"></span>
          </button>

          <div class="collapse navbar-collapse" id="navbarSupportedContent">
            <div class="d-flex ml-auto flex-column flex-lg-row align-items-center">
              <ul class="navbar-nav">
                <!-- <li class="nav-item active">
                  <a class="nav-link" href="index.html">Home <span class="sr-only">(current)</span></a>
                </li> -->
                <li class="nav-item">
                  <a class="nav-link" href="htmls/Adversarial Attacks.html"> Adversarial Attacks </a>
                </li>
                <li class="nav-item">
                  <a class="nav-link" href="htmls/Backdoor Attacks.html"> Backdoor Attacks </a>
                </li>
                <li class="nav-item">
                  <a class="nav-link" href="htmls/member.html"> Group Members </a>
                </li>
              </ul>
              <!-- update -->
              <!-- <div class="user_option">
                <a href="htmls/member.html" style="color: #fff;">
                  <img src="htmls/images/user.png" alt="" style="height: 25px;">Group Members
                </a>
              </div> -->

            </div>
          </div>
        </nav>
      </div>
    </header>
    <!-- end header section -->

    <!-- slider section -->
    <section class=" slider_section position-relative">
      <div class="container">
        <div id="carouselExampleIndicators" class="carousel slide" data-ride="carousel">
          <ol class="carousel-indicators">
            <li data-target="#carouselExampleIndicators" data-slide-to="0" class="active"></li>
            <li data-target="#carouselExampleIndicators" data-slide-to="1"></li>
            <li data-target="#carouselExampleIndicators" data-slide-to="2"></li>
          </ol>
          <div class="carousel-inner">
            <div class="carousel-item active">
              <div class="row">
                <div class="col">
                  <div class="detail-box">
                    <div>
                      <h2>
                        welcome to
                      </h2>
                      <h1>
                        Security of CodeLMs
                      </h1>
                      <p>
                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Language models for code (CodeLMs) have 
                        emerged as powerful tools 
                        for code-related tasks, outperforming traditional methods and standard 
                        machine learning approaches. 
                      </p>
                      <!-- <div class="">
                        <a href="">
                          Contact us
                        </a>
                      </div> -->
                    </div>
                  </div>
                </div>
              </div>
            </div>
            <div class="carousel-item">
              <div class="row">
                <div class="col">
                  <div class="detail-box">
                    <div>
                      <h2>
                        welcome to

                      </h2>
                      <h1>
                        Security of CodeLMs
                      </h1>
                      <p>
                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;However, these models are susceptible to security vulnerabilities, 
                        drawing increasing research attention from domains such as software engineering,
                        artificial intelligence, and cybersecurity. 
                        Despite the growing body of research focused on the security of
                        CodeLMs, a comprehensive survey in this area remains absent.
                      </p>
                      <!-- <div class="">
                        <a href="">
                          Contact us
                        </a>
                      </div> -->
                    </div>
                  </div>
                </div>
              </div>
            </div>
            <div class="carousel-item">
              <div class="row">
                <div class="col">
                  <div class="detail-box">
                    <div>
                      <h2>
                        welcome to

                      </h2>
                      <h1>
                        Security of CodeLMs
                      </h1>
                      <p>
                        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;To address this gap, we systematically review relevant papers, 
                        organizing them based on attack and defense strategies. 
                        Furthermore, we provide an overview of commonly used language models, datasets, 
                        and evaluation metrics, and highlight open-source
                        tools and promising directions for future research in securing CodeLMs.
                      </p>
                      <!-- <div class="">
                        <a href="">
                          Contact us
                        </a>
                      </div> -->
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </div>
        </div>

      </div>
    </section>
    <!-- end slider section -->
  </div>

  <!-- do section -->

  <section class="do_section layout_padding">
    <div class="container">
      <div class="heading_container">
        <!-- <h2>
          Paper Classification
        </h2> -->
      </div>
      <div class="do_container">
        <div class="box arrow-start arrow_bg">
          <div class="img-box">
              <img src="htmls/images/icons/adv.png" alt="">
          </div>
          <div class="detail-box">
            <!-- <h6>
              Paper Class One
            </h6> -->
            <a href="htmls/Adversarial Attacks.html" style="color: rgb(1, 1, 11); text-decoration: none; font-size: 16px; font-weight: bold;">
              Adversarial Attacks
            </a>
          </div>
        </div>

        <div class="box arrow-middle arrow_bg">
          <div class="img-box">
            <img src="htmls/images/icons/backdoor.png" alt="">
          </div>
          <div class="detail-box">
            <a href="htmls/Backdoor Attacks.html" style="color: rgb(1, 1, 26); text-decoration: none; font-size: 16px; font-weight: bold;">
              Backdoor Attacks
            </a>
          </div>
        </div>

        <div class="box arrow-middle arrow_bg">
          <div class="img-box">
            <img src="htmls/images/d-3.png" alt="">
          </div>
          <div class="detail-box">
            <a href="" style="color: rgb(1, 1, 14); text-decoration: none; font-size: 16px; font-weight: bold;">
              Other Attacks
            </a>
          </div>
        </div>
      </div>
    </div>
  </section>

  <!-- end do section -->

  <!-- client section -->

  <section class="client_section">
    <div class="container">
      <div class="heading_container">
        <h2>
          Publications
        </h2>
      </div>
      <div class="carousel-wrap ">
        <div class="owl-carousel">
          <div class="item">
            <div class="box">
              <div class="img-box">
                <a href="https://arxiv.org/abs/2305.17506"><img src="htmls/images/recent_papers/BadCode.png" alt=""></a>
              </div>
              <div class="detail-box">
                <h5>
                  Backdooring Neural Code Search 
                </h5>
                <!-- <img src="htmls/images/quote.png" alt=""> -->
                <p>
                  Reusing off-the-shelf code snippets from online repositories is a common practice, 
                  which significantly enhances the productivity of software developers. 
                  To find desired code snippets, developers resort to code search engines through 
                  natural language queries. Neural code search models are hence behind many such engines. 
                  These models are based on deep learning and gain substantial attention due to their impressive 
                  performance. However, the security aspect of these models is rarely studied. 
                  Particularly, an adversary can inject a backdoor in neural code search models, 
                  which return buggy or even vulnerable code with security/privacy issues.
                   This may impact the downstream software (e.g., stock trading systems and autonomous driving) 
                   and cause financial loss and/or life-threatening incidents. In this paper, 
                   we demonstrate such attacks are feasible and can be quite stealthy. 
                   By simply modifying one variable/function name, the attacker can make 
                   buggy/vulnerable code rank in the top 11%. Our attack BADCODE features a 
                   special trigger generation and injection procedure, making the attack more 
                   effective and stealthy. The evaluation is conducted on two neural code search 
                   models and the results show our attack outperforms baselines by 60%. Our user 
                   study demonstrates that our attack is more stealthy than the baseline by two times based on the F1 score.
                </p>
              </div>
            </div>
          </div>

          <div class="item">
            <div class="box">
              <div class="img-box">
                <a href="https://arxiv.org/abs/2410.02841"><img src="htmls/images/recent_papers/DICE.png" alt=""></a>
              </div>
              <div class="detail-box">
                <h5>
                  Demonstration Attack against In-Context Learning for Code Intelligence
                </h5>
                <!-- <img src="htmls/images/quote.png" alt=""> -->
                <p>
                  Recent advancements in large language models (LLMs) have revolutionized code intelligence 
                  by improving programming productivity and alleviating challenges faced by software developers. 
                  To further improve the performance of LLMs on specific code intelligence tasks and reduce training 
                  costs, researchers reveal a new capability of LLMs: in-context learning (ICL). ICL allows LLMs to 
                  learn from a few demonstrations within a specific context, achieving impressive results without 
                  parameter updating. However, the rise of ICL introduces new security vulnerabilities in the 
                  code intelligence field. In this paper, we explore a novel security scenario based on the 
                  ICL paradigm, where attackers act as third-party ICL agencies and provide users with bad ICL 
                  content to mislead LLMs outputs in code intelligence tasks. Our study demonstrates the feasibility 
                  and risks of such a scenario, revealing how attackers can leverage malicious demonstrations to 
                  construct bad ICL content and induce LLMs to produce incorrect outputs, posing significant threats 
                  to system security. We propose a novel method to construct bad ICL content called DICE, which is 
                  composed of two stages: Demonstration Selection and Bad ICL Construction, constructing targeted bad 
                  ICL content based on the user query and transferable across different query inputs. Ultimately, our 
                  findings emphasize the critical importance of securing ICL mechanisms to protect code intelligence 
                  systems from adversarial manipulation.
                </p>
              </div>
            </div>
          </div>

          <div class="item">
            <div class="box">
              <div class="img-box">
                <a href="https://arxiv.org/abs/2408.04683"><img src="htmls/images/recent_papers/EliBadcode.png" alt=""></a>
              </div>
              <div class="detail-box">
                <h5>
                  Eliminating Backdoors in Neural Code Models via Trigger Inversion
                </h5>
                <!-- <img src="htmls/images/quote.png" alt=""> -->
                <p>
                  Neural code models (NCMs) have been widely used for addressing various code understanding tasks, 
                  such as defect detection and clone detection. However, numerous recent studies reveal that such 
                  models are vulnerable to backdoor attacks. Backdoored NCMs function normally on normal code snippets, 
                  but exhibit adversary-expected behavior on poisoned code snippets injected with the adversary-crafted 
                  trigger. It poses a significant security threat. For example, a backdoored defect detection model may 
                  misclassify user-submitted defective code as non-defective. If this insecure code is then integrated 
                  into critical systems, like autonomous driving systems, it could lead to life safety. However, there 
                  is an urgent need for effective defenses against backdoor attacks targeting NCMs.
                  To address this issue, in this paper, we innovatively propose a backdoor defense technique based 
                  on trigger inversion, called EliBadCode. EliBadCode first filters the model vocabulary for trigger 
                  tokens to reduce the search space for trigger inversion, thereby enhancing the efficiency of the 
                  trigger inversion. Then, EliBadCode introduces a sample-specific trigger position identification 
                  method, which can reduce the interference of adversarial perturbations for subsequent trigger inversion, 
                  thereby producing effective inverted triggers efficiently. Subsequently, EliBadCode employs a Greedy 
                  Coordinate Gradient algorithm to optimize the inverted trigger and designs a trigger anchoring method 
                  to purify the inverted trigger. Finally, EliBadCode eliminates backdoors through model unlearning. 
                  We evaluate the effectiveness of EliBadCode in eliminating backdoor attacks against multiple NCMs used 
                  for three safety-critical code understanding tasks. The results demonstrate that EliBadCode can 
                  effectively eliminate backdoors while having minimal adverse effects on the normal functionality of the model.  
                </p>
              </div>
            </div>
          </div>

          <!-- <div class="item">
            <div class="box">
              <div class="img-box">
                <a href="https://jos.org.cn/jos/article/abstract/nL021"><img src="htmls/images/recent_papers/EliBadcode.png" alt=""></a>
              </div>
              <div class="detail-box">
                <h5>
                  深度代码模型安全综述
                </h5>
                <p>
                  With the significant success of deep learning technology in fields such as computer vision 
                  and natural language processing, software engineering researchers have begun to explore its 
                  integration into solving software engineering tasks. Existing research results indicate that 
                  deep learning technology exhibits advantages in various software code-related tasks, such as 
                  code retrieval and code summarization, that traditional methods and machine learning approaches 
                  cannot match. These deep learning models, trained for code-related tasks, are collectively referred 
                  to as deep code models. However, similar to natural language processing and image processing models, 
                  deep code model security faces numerous challenges due to the vulnerability and lack of interpretability 
                  of neural networks. It has become a focal point in the field of software engineering. In recent years, 
                  researchers have proposed numerous attack and defense methods specific to deep code models. 
                  Nevertheless, there is currently a lack of a systematic review of security research on deep code models, 
                  hindering rapid understanding of the field for subsequent researchers. To address this gap and provide 
                  a comprehensive overview of the current state, challenges, and latest research findings in this field, 
                  this paper collected 32 relevant papers and categorized existing research results into two main classes: 
                  backdoor attacks and defense techniques, and adversarial attacks and defense techniques. 
                  The paper systematically organizes and summarizes the collected papers based on different 
                  technological categories. Subsequently, the paper outlines commonly used experimental datasets 
                  and evaluation metrics in this field. Finally, the paper analyzes key challenges faced by this 
                  field and suggests feasible future research directions, aiming to provide valuable guidance for 
                  further advancing the security of deep code.   
                </p>
              </div>
            </div>
          </div> -->

        </div>
      </div>
    </div>
  </section>

  <!-- footer section -->
  <section class="container-fluid footer_section">
    <p>
      &copy; 2024 All Rights Reserved By
      <a href="https://html.design/">The Team</a>
    </p>
  </section>
  <!-- footer section -->

  <script type="text/javascript" src="htmls/js/jquery-3.4.1.min.js"></script>
  <script type="text/javascript" src="htmls/js/bootstrap.js"></script>
  <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/OwlCarousel2/2.3.4/owl.carousel.min.js">
  </script>
  <!-- owl carousel script 
    -->
  <script type="text/javascript">
    $(".owl-carousel").owlCarousel({
      loop: true,
      margin: 0,
      navText: [],
      center: true,
      autoplay: true,
      autoplayHoverPause: true,
      responsive: {
        0: {
          items: 1
        },
        1000: {
          items: 3
        }
      }
    });
  </script>
  <!-- end owl carousel script -->

</body>

</html>