Abstract
Chinese Word Segmentation (CWS) is essential for a broad spectrum of NLP tasks. However, the high inference cost of large pre-trained models restricts their scalability. DEEP-CWS distills pre-trained transformer models into lightweight CNNs, incorporating pruning, early exit mechanisms, and ONNX optimization to improve inference speed significantly — achieving over 100x speedup without compromising segmentation quality.
Citation
@article{xu2025deepcws,
title={DEEP-CWS: Distilling Efficient pre-trained models with Early exit and Pruning for scalable Chinese Word Segmentation},
author={Xu, Shiting},
journal={Information Sciences},
volume={719},
pages={122470},
year={2025},
publisher={Elsevier}
}