@inproceedings{liang2026icde,author={Liang, Jie and Wu, Zhiyong and Fu, Jingzhou and Zhang, Chi and Miao, Runpei and Su, Zhuo and Jiang, Yu and Ma, Shuai},title={Vireo: Human-in-the-Loop DBMS Fuzzing with Visualization and LLM Support},booktitle={Proceedings of the 42nd IEEE International Conference on Data Engineering (ICDE)},year={2026},}
SIGMOD’26
EPSC: Testing Database Management Systems via Equivalent Prepared Statement Construction
Chi Zhang, Jie Liang, Zhiyong Wu, and 3 more authors
@article{zhang2026sigmod,author={Zhang, Chi and Liang, Jie and Wu, Zhiyong and Shi, Dalong and Wang, Linzhang and Jiang, Yu},title={EPSC: Testing Database Management Systems via Equivalent Prepared Statement Construction},year={2026},issue_date={June 2026},publisher={Association for Computing Machinery},address={New York, NY, USA},volume={4},number={3},url={https://doi.org/10.1145/3802053},doi={10.1145/3802053},journal={Proc. ACM Manag. Data},month=jun,articleno={176},numpages={25},}
SIGMOD’26
Finding Missed Optimizations in DBMSs through Unbalanced Short-Circuit Query Construction
Jinhui Lai, Chi Zhang, Jie Liang, and 7 more authors
@article{lai2026sigmod2,author={Lai, Jinhui and Zhang, Chi and Liang, Jie and Zeng, Zihao and Wu, Zhiyong and Fu, Jingzhou and Zhou, Chijin and Ma, Shuai and Jiang, Yu and Xu, Zichen},title={Finding Missed Optimizations in DBMSs through Unbalanced Short-Circuit Query Construction},year={2026},issue_date={June 2026},publisher={Association for Computing Machinery},address={New York, NY, USA},volume={4},number={3},url={https://doi.org/10.1145/3802061},doi={10.1145/3802061},journal={Proc. ACM Manag. Data},month=jun,articleno={184},numpages={24},}
TOSEM
Evaluating the Effectiveness of Deep Learning Models for Foundational Program Analysis Tasks
Qian Chen, Rouyi Chen, Chenyang Yu, and 5 more authors
While deep neural networks provide state-of-the-art solutions to a wide range of programming language tasks, their effectiveness in dealing with foundational program analysis tasks remains under explored. In this paper, we present an empirical study that evaluates four prominent models of code (i.e., CuBERT, CodeBERT, GGNN, and Graph Sandwiches), plus four popular large language models (i.e., GPT3.5, GPT-4o mini, Qwen2.5-Coder, and DeepSeek Coder), in two such foundational tasks: (1) alias prediction, in which models predict whether two pointers must alias, may alias or must not alias; and (2) equivalence prediction, in which models predict whether or not two programs are semantically equivalent. At the core of this study is CodeSem, a dataset built upon the source code of real-world flagship software (e.g., Linux Kernel, GCC, MySQL) and manually validated for the two prediction tasks. Results show that all models are accurate in both prediction tasks. We also conduct a comprehensive, in-depth analysis of the results of all models in both tasks, concluding that deep learning models are generally capable of performing foundational tasks in program analysis even though in specific cases their weaknesses are also evident.Our code and evaluation data are publicly available at .
2025
SIGMOD’26
SRS: Detecting Logic Bugs of Join Implementation in DBMSs via Set Relation Synthesis
Jinhui Lai, Chi Zhang, Bingyan Li, and 6 more authors
Logic bugs can cause DBMSs to silently produce incorrect results for a given query, posing significant threats to software reliability and remaining challenging to detect. Join is a fundamental operation in DBMSs, enabling the combination of data from multiple tables; however, due to its complexity, it is also susceptible to logic bugs. Existing works detect logic bugs in join optimizations by altering query hints and system variables to alter the optimizer’s choice of execution plans. However, these approaches struggle to detect logic bugs when query hints or system variables fail to influence the optimizer’s behavior, or when the logic bugs reside in join implementation code that is unrelated to optimization. In this paper, we present Set Relation Synthesis (SRS), a black-box testing approach that detects logic bugs of join implementation in DBMSs by leveraging set relations among different join operations. SRS applies transformations to the original join queries, including modifications to join types, join orders, and join conditions, while ensuring that the outputs of both the original and transformed queries preserve the expected set relations. Violations of these set relations indicate potential logic bugs. We realized SRS and evaluated it on five widely-used and extensively-tested DBMSs: MySQL, MariaDB, TiDB, PostgreSQL, and DuckDB. SRS uncovered 33 previously unknown and unique bugs, all of which have been confirmed, with 12 already fixed. Among these, 33 are logic bugs, demonstrating SRS’s effectiveness and practicality in detecting logic bugs in the implementation of join operations within DBMSs.
ASE’25
ARG: Testing Query Rewriters via Abstract Rule Guided Fuzzing
Dawei Li, Yuxiao Guo, Qifan Liu, and 5 more authors
In 2025 40th IEEE/ACM International Conference on Automated Software Engineering (ASE), 2025
Logic bugs are bugs that can cause database management systems (DBMSs) to silently produce incorrect results for given queries. Such bugs are severe, because they can easily be overlooked by both developers and users, and can cause applications that rely on the DBMSs to malfunction. In this work, we propose Constant-Optimization-Driven Database Testing (CODDTest) as a novel approach for detecting logic bugs in DBMSs. This method draws inspiration from two well-known optimizations in compilers: constant folding and constant propagation. Our key insight is that for a certain database state and query containing a predicate, we can apply constant folding on the predicate by replacing an expression in the predicate with a constant, anticipating that the results of this predicate remain unchanged; any discrepancy indicates a bug in the DBMS. We evaluated CODDTest on five mature and extensively-tested DBMSs–SQLite, MySQL, CockroachDB, DuckDB, and TiDB–and found 45 unique, previously unknown bugs in them. Out of these, 24 are unique logic bugs. Our manual analysis of the state-of-the-art approaches indicates that 11 logic bugs are detectable only by CODDTest. We believe that CODDTest is easy to implement, and can be widely adopted in practice.
2024
OOPSLA’24
Finding Cross-Rule Optimization Bugs in Datalog Engines
Chi Zhang, Linzhang Wang, and Manuel Rigger
In Proceedings of the 39th ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications, 2024
@inproceedings{zhang2024oopsla,title={Finding Cross-Rule Optimization Bugs in Datalog Engines},booktitle={Proceedings of the 39th ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications},author={Zhang, Chi and Wang, Linzhang and Rigger, Manuel},journal={Proc. ACM Program. Lang.},volume={8},number={OOPSLA1},article={98},pages={1--27},year={2024},publisher={ACM},}
OOPSLA’24
Evaluating the Effectiveness of Deep Learning Models for Foundational Program Analysis Tasks
Qian Chen, Chenyang Yu, Ruyan Liu, and 5 more authors
In Proceedings of the 39th ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications, 2024
@inproceedings{chen2024oopsla,title={Evaluating the Effectiveness of Deep Learning Models for Foundational Program Analysis Tasks},booktitle={Proceedings of the 39th ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications},author={Chen, Qian and Yu, Chenyang and Liu, Ruyan and Zhang, Chi and Wang, Yu and Wang, Ke and Su, Ting and Wang, Linzhang},journal={Proc. ACM Program. Lang.},volume={8},number={OOPSLA1},article={112},pages={1--29},year={2024},publisher={ACM},}
SPLASH’24
Step-wise Execution of Data-Centric Systems
Chi Zhang
In Companion Proceedings of the 2024 ACM SIGPLAN International Conference on Systems, Programming, Languages, and Applications: Software for Humanity (SPLASH Companion’24), 2024
@inproceedings{zhang2024splash,title={Step-wise Execution of Data-Centric Systems},author={Zhang, Chi},booktitle={Companion Proceedings of the 2024 ACM SIGPLAN International Conference on Systems, Programming, Languages, and Applications: Software for Humanity (SPLASH Companion'24)},pages={1--3},year={2024},publisher={ACM},}
2023
IoT-J
Physical Devices-Agnostic Hybrid Fuzzing of IoT Firmware
Lingyun Situ, Chi Zhang, Le Guan, and 5 more authors
@article{situ2023iotj,title={Physical Devices-Agnostic Hybrid Fuzzing of IoT Firmware},author={Situ, Lingyun and Zhang, Chi and Guan, Le and Zuo, Zhiqiang and Wang, Linzhang and Li, Xuandong and Liu, Peng and Shi, Jin},journal={IEEE Internet of Things Journal},volume={10},number={23},pages={20718--20734},year={2023},publisher={IEEE},}
2021
Internetware’20
Firmware Fuzzing: The State of the Art
Chi Zhang, Yu Wang, and Linzhang Wang
In Proceedings of the 12th Asia-Pacific Symposium on Internetware (Internetware’20), 2021
@inproceedings{zhang2021internetware,title={Firmware Fuzzing: The State of the Art},author={Zhang, Chi and Wang, Yu and Wang, Linzhang},booktitle={Proceedings of the 12th Asia-Pacific Symposium on Internetware (Internetware'20)},pages={1--6},year={2021},publisher={ACM},}