@inproceedings{cf57da317416412595bf63caf90c7946,
title = "Acoustic scene classification using convolutional neural networks and multi-scale multi-feature extraction",
abstract = "Audio scenes are often composed of a variety of sound events from different sources. Their content exhibits wide variations in both frequency and time domain. Convolutional neural networks (CNNs) provide an effective way to extract spatial information of multidimensional data such as image, audio, and video; they have the ability to learn hierarchical representation from time-frequency features of audio signals. In this paper, we develop a convolutional neural network and employ a multi-scale multi-feature extraction methods for acoustic scene classification. We conduct experiments on the TUT Acoustic Scenes 2016 dataset. Experimental results show that the use of multi-scale multi-feature extraction methods improves significantly the performance of the system. Our proposed approach obtains a high accuracy of 85.9% that outperforms the baseline approach by a large margin of 8.7%.",
author = "An Dang and Vu, {Toan H.} and Wang, {Jia Ching}",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 2018 IEEE International Conference on Consumer Electronics, ICCE 2018 ; Conference date: 12-01-2018 Through 14-01-2018",
year = "2018",
month = mar,
day = "26",
doi = "10.1109/ICCE.2018.8326315",
language = "???core.languages.en_GB???",
series = "2018 IEEE International Conference on Consumer Electronics, ICCE 2018",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1--4",
editor = "Mohanty, {Saraju P.} and Peter Corcoran and Hai Li and Anirban Sengupta and Jong-Hyouk Lee",
booktitle = "2018 IEEE International Conference on Consumer Electronics, ICCE 2018",
}