Publications | Pengxiang LI

* Equal contribution, ✉ Corresponding author

2026

Modality Alignment across Trees on Heterogeneous Hyperbolic Manifolds

Wu Wei , Xiaomeng Fan , Yuwei Wu^✉ , Zhi Gao , Pengxiang Li , Yunde Jia , and Mehrtash Harandi

International Conference on Learning Representations (ICLR), 2026

@article{wei2025modality,
  title = {Modality Alignment across Trees on Heterogeneous Hyperbolic Manifolds},
  author = {Wei, Wu and Fan, Xiaomeng and Wu, Yuwei and Gao, Zhi and Li, Pengxiang and Jia, Yunde and Harandi, Mehrtash},
  journal = {International Conference on Learning Representations (ICLR)},
  year = {2026},
  correspondence = {Wu, Yuwei},
}

2025

Efficient Multi-turn RL for GUI Agents via Decoupled Training and Adaptive Data Curation

Pengxiang Li , Zechen Hu , Zirui Shang , Jingrong Wu , Yang Liu , Hui Liu , Zhi Gao^✉ , Chenrui Shi , Bofei Zhang , Zihao Zhang , Xiaochuan Shi , Zedong Yu , Yuwei Wu^✉ , Xinxiao Wu , Yunde Jia , Liuyu Xiang , Zhaofeng He , and Qing Li^✉

arXiv preprint arXiv:2509.23866, 2025

arXiv Bib Website

@article{li2025efficient,
  title = {Efficient Multi-turn RL for GUI Agents via Decoupled Training and Adaptive Data Curation},
  author = {Li, Pengxiang and Hu, Zechen and Shang, Zirui and Wu, Jingrong and Liu, Yang and Liu, Hui and Gao, Zhi and Shi, Chenrui and Zhang, Bofei and Zhang, Zihao and Shi, Xiaochuan and Yu, Zedong and Wu, Yuwei and Wu, Xinxiao and Jia, Yunde and Xiang, Liuyu and He, Zhaofeng and Li, Qing},
  journal = {arXiv preprint arXiv:2509.23866},
  year = {2025},
  correspondence = {Gao, Zhi and Li, Qing and Wu, Yuwei},
}

From Objects to Anywhere: A Holistic Benchmark for Multi-level Visual Grounding in 3D Scenes

Tianxu Wang , Zhuofan Zhang , Ziyu Zhu , Yue Fan , Jing Xiong , Pengxiang Li , Xiaojian Ma , and Qing Li^✉

Neural Information Processing Systems: Datasets and Benchmarks (NeurIPS D&B), 2025

arXiv Bib Website

@article{wang2025objects,
  title = {From Objects to Anywhere: A Holistic Benchmark for Multi-level Visual Grounding in 3D Scenes},
  author = {Wang, Tianxu and Zhang, Zhuofan and Zhu, Ziyu and Fan, Yue and Xiong, Jing and Li, Pengxiang and Ma, Xiaojian and Li, Qing},
  journal = {Neural Information Processing Systems: Datasets and Benchmarks (NeurIPS D&B)},
  year = {2025},
  correspondence = {Li, Qing},
}

Chain-of-Focus: Adaptive Visual Search and Zooming for Multimodal Reasoning via RL

Xintong Zhang* , Zhi Gao* , Bofei Zhang , Pengxiang Li , Xiaowen Zhang , Yang Liu , Tao Yuan , Yuwei Wu^✉ , Yunde Jia , Song-Chun Zhu , and Qing Li^✉

2025

arXiv Bib Website

@article{zhang2025chainoffocusadaptivevisualsearch,
  title = {Chain-of-Focus: Adaptive Visual Search and Zooming for Multimodal Reasoning via RL},
  author = {Zhang, Xintong and Gao, Zhi and Zhang, Bofei and Li, Pengxiang and Zhang, Xiaowen and Liu, Yang and Yuan, Tao and Wu, Yuwei and Jia, Yunde and Zhu, Song-Chun and Li, Qing},
  equalauthor = {Zhang, Xintong and Gao, Zhi},
  year = {2025},
  correspondence = {Wu, Yuwei and Li, Qing},
}

Iterative Tool Usage Exploration for Multimodal Agents via Step-wise Preference Tuning

Pengxiang Li* , Zhi Gao* , Bofei Zhang , Yapeng Mi , Xiaojian Ma , Chenrui Shi , Tao Yuan , Yuwei Wu^✉ , Yunde Jia , Song-Chun Zhu , and Qing Li^✉

Advances in Neural Information Processing Systems (NeurIPS), 2025

arXiv Bib Website

@article{2025iterative,
  title = {Iterative Tool Usage Exploration for Multimodal Agents via Step-wise Preference Tuning },
  author = {Li, Pengxiang and Gao, Zhi and Zhang, Bofei and Mi, Yapeng and Ma, Xiaojian and Shi, Chenrui and Yuan, Tao and Wu, Yuwei and Jia, Yunde and Zhu, Song-Chun and Li, Qing},
  equalauthor = { Li, Pengxiang and Gao, Zhi},
  journal = {Advances in Neural Information Processing Systems (NeurIPS)},
  year = {2025},
  correspondence = {Wu, Yuwei and Li, Qing},
}

Multi-modal Agent Tuning: Building a VLM-Driven Agent for Efficient Tool Usage Spotlight

Zhi Gao* , Bofei Zhang* , Pengxiang Li* , Xiaojian Ma , Tao Yuan , Yue Fan , Yuwei Wu^✉ , Yunde Jia , Song-Chun Zhu , and Qing Li^✉

International Conference on Learning Representations (ICLR), 2025

Bib Website

Spotlight

@article{2025mat,
  title = {Multi-modal Agent Tuning: Building a VLM-Driven Agent for Efficient Tool Usage},
  author = {Gao, Zhi and Zhang, Bofei and Li, Pengxiang and Ma, Xiaojian and Yuan, Tao and Fan, Yue and Wu, Yuwei and Jia, Yunde and Zhu, Song-Chun and Li, Qing},
  equalauthor = {Gao, Zhi and Zhang, Bofei and Li, Pengxiang},
  correspondence = {Wu, Yuwei and Li, Qing},
  year = {2025},
  journal = {International Conference on Learning Representations (ICLR)},
}

A Set-to-Set Distance Measure in Hyperbolic Space

Pengxiang Li , Wei Wu , Zhi Gao , Xiaomeng Fan , Peilin Yu , Yuwei Wu^✉ , Zhipeng Lu , Yunde Jia , and Mehrtash Harandi

arXiv preprint arXiv:2506.18529, 2025

arXiv Bib

@article{li2025set,
  title = {A Set-to-Set Distance Measure in Hyperbolic Space},
  author = {Li, Pengxiang and Wu, Wei and Gao, Zhi and Fan, Xiaomeng and Yu, Peilin and Wu, Yuwei and Lu, Zhipeng and Jia, Yunde and Harandi, Mehrtash},
  journal = {arXiv preprint arXiv:2506.18529},
  year = {2025},
  correspondence = {Wu, Yuwei},
}

Geometry-aware Distance Measure for Diverse Hierarchical Structures in Hyperbolic Spaces

Pengxiang Li , Yuwei Wu^✉ , Zhi Gao , Xiaomeng Fan , Wei Wu , Zhipeng Lu , Yunde Jia , and Mehrtash Harandi

arXiv preprint arXiv:2506.18533, 2025

arXiv Bib

@article{li2025geometry,
  title = {Geometry-aware Distance Measure for Diverse Hierarchical Structures in Hyperbolic Spaces},
  author = {Li, Pengxiang and Wu, Yuwei and Gao, Zhi and Fan, Xiaomeng and Wu, Wei and Lu, Zhipeng and Jia, Yunde and Harandi, Mehrtash},
  journal = {arXiv preprint arXiv:2506.18533},
  year = {2025},
  correspondence = {Wu, Yuwei},
}

2024

Task-oriented Sequential Grounding in 3D Scenes

Zhuofan Zhang , Ziyu Zhu , Pengxiang Li , Tengyu Liu , Xiaojian Ma , Yixin Chen , Baoxiong Jia , Siyuan Huang , and Qing Li^✉

arXiv preprint arXiv:2408.04034, 2024

arXiv Bib Website

@article{2024sg3d,
  title = {Task-oriented Sequential Grounding in 3D Scenes},
  author = {Zhang, Zhuofan and Zhu, Ziyu and Li, Pengxiang and Liu, Tengyu and Ma, Xiaojian and Chen, Yixin and Jia, Baoxiong and Huang, Siyuan and Li, Qing},
  journal = {arXiv preprint arXiv:2408.04034},
  year = {2024},
  correspondence = {Wu, Yuwei and Li, Qing},
}

FIRE: A Dataset for Feedback Integration and Refinement Evaluation of Multimodal Models

Pengxiang Li* , Zhi Gao* , Bofei Zhang* , Tao Yuan , Yuwei Wu^✉ , Mehrtash Harandi , Yunde Jia , Song-Chun Zhu , and Qing Li^✉

Neural Information Processing Systems: Datasets and Benchmarks (NeurIPS D&B), 2024

arXiv Bib Website

@article{2024fire,
  title = {FIRE: A Dataset for Feedback Integration and Refinement Evaluation of Multimodal Models},
  author = {Li, Pengxiang and Gao, Zhi and Zhang, Bofei and Yuan, Tao and Wu, Yuwei and Harandi, Mehrtash and Jia, Yunde and Zhu, Song-Chun and Li, Qing},
  journal = {Neural Information Processing Systems: Datasets and Benchmarks (NeurIPS D&B)},
  year = {2024},
  equalauthor = {Li, Pengxiang and Gao, Zhi and Zhang, Bofei},
  correspondence = {Wu, Yuwei and Li, Qing},
}

Inter-Scale Similarity Guided Cost Aggregation for Stereo Matching

Pengxiang Li , Chengtang Yao , Yuwei Wu^✉ , and Yunde Jia

IEEE Transactions on Circuits and Systems for Video Technology (TCSVT), 2024

Bib

@article{2024issga,
  title = {Inter-Scale Similarity Guided Cost Aggregation for Stereo Matching},
  author = {Li, Pengxiang and Yao, Chengtang and Wu, Yuwei and Jia, Yunde},
  journal = {IEEE Transactions on Circuits and Systems for Video Technology (TCSVT)},
  year = {2024},
  correspondence = {Wu, Yuwei},
  paper = {https://ieeexplore.ieee.org/document/10663688},
}

2023

Hyperbolic Learning: Theory and Applications

Pengxiang Li , Peilin Yu , Yangkai Xue , Yuwei Wu , and Zhi Gao

2023

Bib Slides

2021

A decomposition model for stereo matching

Chengtang Yao , Yunde Jia , Huijun Di , Pengxiang Li , and Yuwei Wu^✉

The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021

arXiv Bib

@article{yao2021decomposition,
  title = {A decomposition model for stereo matching},
  author = {Yao, Chengtang and Jia, Yunde and Di, Huijun and Li, Pengxiang and Wu, Yuwei},
  journal = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2021},
  correspondence = {Wu, Yuwei},
}