@inproceedings{f16268e31865463a8eff80d7a4143299,
title = "Dynamic tracking attention model for action recognition",
abstract = "This paper proposes a dynamic tracking attention model (DTAM), which mainly comprises a motion attention mechanism, a convolutional neural network (CNN) and long short-term memory (LSTM), to recognize human action in a video sequence. In the motion attention mechanism, the local dynamic tracking is used to track moving objects in feature domain and global dynamic tracking corrects the motion in the spectral domain. The CNN is utilized to perform feature extraction, while the LSTM is applied to handle sequential information about actions that is extracted from videos. It effectively fetches information between consecutive frames in a video sequence and has an even higher recognition rate than does the CNN-LSTM. Combining the DTAM with the visual attention model, the proposed algorithm has a recognition rate that is 3.6% and 4.5% higher than that of the CNN-LSTMs with and without the visual attention model, respectively.",
keywords = "Action recognition, attention model, convolutional neural network, deep learning, long short-term memory (LSTM)",
author = "Wang, {Chien Yao} and Chiang, {Chin Chin} and Ding, {Jian Jiun} and Wang, {Jia Ching}",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 2017 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2017 ; Conference date: 05-03-2017 Through 09-03-2017",
year = "2017",
month = jun,
day = "16",
doi = "10.1109/ICASSP.2017.7952430",
language = "???core.languages.en_GB???",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1617--1621",
booktitle = "2017 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2017 - Proceedings",
}