@inproceedings{feca4be0785248cf972426baa6b7f0b7,
title = "Efficient Visual Tracking Using Local Information Patch Attention Free Transformer",
abstract = "The state-of-the-art (SOTA) transformer tracker TransT achieves high tracking accuracy. Nevertheless, the time and space complexity of its attention operation is quadratic to the spatial dimension of feature vectors. Thus it is difficult to deploy TransT on resource constrained devices. This paper proposes Local Information Patch Attention Free Transformer (LIP-AFT) based Local Information Patch Self-Attention Free Transformer (LIPS-AFT) and Local Information Patch Cross-Attention Free Transformer (LIPC-AFT) for linear time and space complexity and high accuracy. LIP-AFT benefits from global connectivity between patches while it focuses on na{\"i}ve strong local attention patterns. The proposed tracker outperforms both SOTA trackers and TransT with various SOTA attention algorithms on accuracy and complexity. Moreover, its inference phase runs at 41 fps on RTX 2070S GPUs.",
keywords = "attention, space and time complexity, tracking, transformer",
author = "Wang, {Pin Feng} and Tang, {Chih Wei}",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 IEEE International Conference on Consumer Electronics - Taiwan, ICCE-Taiwan 2022 ; Conference date: 06-07-2022 Through 08-07-2022",
year = "2022",
doi = "10.1109/ICCE-Taiwan55306.2022.9869107",
language = "???core.languages.en_GB???",
series = "Proceedings - 2022 IEEE International Conference on Consumer Electronics - Taiwan, ICCE-Taiwan 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "447--448",
booktitle = "Proceedings - 2022 IEEE International Conference on Consumer Electronics - Taiwan, ICCE-Taiwan 2022",
}