@article{ca94977d4e6d4ca1ac2bfd8064489fd8,
title = "{\textquoteleft}All possible sounds{\textquoteright}: speech, music, and the emergence of machine listening",
abstract = "“Machine listening” is one common term for a fast-growing interdisciplinary field of science and engineering that “uses signal processing and machine learning to extract useful information from sound”. This article contributes to the critical literature on machine listening by presenting some of its history as a field. From the 1940s to the 1990s, work on artificial intelligence and audio developed along two streams. There was work on speech recognition/understanding, and work in computer music. In the early 1990s, another stream began to emerge. At institutions such as MIT Media Lab and Stanford{\textquoteright}s CCRMA, researchers started turning towards “more fundamental problems of audition”. Propelled by work being done by and alongside musicians, speech and music would increasingly be understood by computer scientists as particular sounds within a broader “auditory scene”. Researchers began to develop machine listening systems for a more diverse range of sounds and classification tasks: often in the service of speech recognition, but also increasingly for their own sake. The soundscape itself was becoming an object of computational concern. Today, the ambition is “to cover all possible sounds”. That is the aspiration with which we must now contend politically, and which this article sets out to historicise and understand.",
keywords = "artificial intelligence, big data, computer music, machine hearing, machine learning, Machine listening, speech recognition",
author = "Parker, {James E.K.} and Sean Dockray",
note = "Funding Information: This article contributes to the growing critical literature on machine listening by presenting some of its history: as a field, or constellation of fields, but also as a commercial enterprise, sociotechnical imaginary (Jasanoff and Kim ), desire, or network. As we will see, from the 1940s to roughly the end of the 1980s, work on artificial intelligence and audio developed along two major streams. There was work on speech recognition/understanding, which was mostly funded by and arranged according to the interests of Bell, the US Department of Defence{\textquoteright}s Advanced Research Project Agency (originally ARPA, later DARPA), and IBM. And there was work in computer music: on the aesthetic possibilities of composing and improvising with “intelligent” listening systems, as well as certain applications with more commercial appeal. It was in relation to such systems that the term “machine listening” was originally, or most durably, coined. Funding Information: Such a cure required financing, of course, and by 1987 the Media Lab had fostered an annual budget of around $7 million a year, $6 million of which came from “almost one hundred sponsors … each of whom had paid a minimum of two hundred thousand dollars to join” (Turner , 178). These sponsors included IBM and DARPA, along with Apple, BBN, General Motors, Sony, Nippon Telephone and Telegraph, major newspaper and television companies, the National Science Foundation, and many others. The Music and Cognition group was sponsored by the System Development Foundation, the not-for-profit arm of a company that began life making software for the US Air Force, and has since been subsumed into L3Harris. Rowe{\textquoteright}s doctoral work on Cypher was funded in part by Yamaha. Publisher Copyright: {\textcopyright} 2023 Informa UK Limited, trading as Taylor & Francis Group.",
year = "2023",
doi = "10.1080/20551940.2023.2195057",
language = "English",
volume = "9",
pages = "253--281",
journal = "Sound Studies",
issn = "2055-1959",
publisher = "Taylor & Francis",
number = "2",
}