@inproceedings{32d46a073982497b83ff15a5ec4acdc7,
title = "Automatic Speaker Localization in Conference Based on Yolox-Tiny and TDOA",
abstract = "In remote conferences, it is often necessary to adjust the angle of the camera so that only the speaker's part remains in the picture. However, most of the current methods of adjusting the camera are done manually. This study uses the YOLOX-tiny neural network to detect the mouth movements and upper body positions of everyone in the picture captured by the camera in time, and cooperates with TDOA to detect the direction of the sound source to enhance the accuracy of the detection results. The recall of YOLOX-tiny is 93%, the recall of TDOA is 88%, the recall of only using video for speaker positioning is 77%, and the recall of integrating video and sound is about 80.3%, which can quickly and effectively retain the speaker's picture.",
author = "Hsieh, {Chen Chiung} and Lu, {Meng Ju} and Zheng, {You Zhan} and Tseng, {Hsiao Ting}",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 IEEE International Conference on Consumer Electronics - Taiwan, ICCE-Taiwan 2022 ; Conference date: 06-07-2022 Through 08-07-2022",
year = "2022",
doi = "10.1109/ICCE-Taiwan55306.2022.9869193",
language = "???core.languages.en_GB???",
series = "Proceedings - 2022 IEEE International Conference on Consumer Electronics - Taiwan, ICCE-Taiwan 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "161--162",
booktitle = "Proceedings - 2022 IEEE International Conference on Consumer Electronics - Taiwan, ICCE-Taiwan 2022",
}