<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pone.0233968</article-id>
<article-id pub-id-type="publisher-id">PONE-D-19-28712</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognition</subject><subj-group><subject>Memory</subject><subj-group><subject>Visual object recognition</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Learning and memory</subject><subj-group><subject>Memory</subject><subj-group><subject>Visual object recognition</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Visual object recognition</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Visual object recognition</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Visual object recognition</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>People and places</subject><subj-group><subject>Population groupings</subject><subj-group><subject>Age groups</subject><subj-group><subject>Children</subject><subj-group><subject>Infants</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>People and places</subject><subj-group><subject>Population groupings</subject><subj-group><subject>Families</subject><subj-group><subject>Children</subject><subj-group><subject>Infants</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Linguistics</subject><subj-group><subject>Semantics</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Organisms</subject><subj-group><subject>Eukaryota</subject><subj-group><subject>Animals</subject><subj-group><subject>Vertebrates</subject><subj-group><subject>Amniotes</subject><subj-group><subject>Mammals</subject><subj-group><subject>Dogs</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Language</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Language</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Language</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Algebra</subject><subj-group><subject>Polynomials</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Vision</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Vision</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Vision</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Physiology</subject><subj-group><subject>Sensory physiology</subject><subj-group><subject>Visual system</subject><subj-group><subject>Eye movements</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Physiology</subject><subj-group><subject>Sensory physiology</subject><subj-group><subject>Visual system</subject><subj-group><subject>Eye movements</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Sensory systems</subject><subj-group><subject>Visual system</subject><subj-group><subject>Eye movements</subject></subj-group></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Infants’ conceptual representations of meaningful verbal and nonverbal sounds</article-title>
<alt-title alt-title-type="running-head">Activation of conceptual representations in adults and infants</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0001-5951-8320</contrib-id>
<name name-style="western">
<surname>Sirri</surname>
<given-names>Louah</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Data curation</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Project administration</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-2294-1138</contrib-id>
<name name-style="western">
<surname>Guerra</surname>
<given-names>Ernesto</given-names>
</name>
<role content-type="http://credit.casrai.org/">Formal analysis</role>
<role content-type="http://credit.casrai.org/">Software</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Linnert</surname>
<given-names>Szilvia</given-names>
</name>
<role content-type="http://credit.casrai.org/">Data curation</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Smith</surname>
<given-names>Eleanor S.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Data curation</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Reid</surname>
<given-names>Vincent</given-names>
</name>
<role content-type="http://credit.casrai.org/">Funding acquisition</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Parise</surname>
<given-names>Eugenio</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Funding acquisition</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Department of Education, Manchester Metropolitan University, Manchester, United Kingdom</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Department of Psychology, Lancaster University, Lancaster, United Kingdom</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>Institute of Education and Center for Advanced Research in Education, Universidad de Chile, Santiago, Chile</addr-line></aff>
<aff id="aff004"><label>4</label> <addr-line>Department of Experimental Psychology, University of Cambridge, Cambridge, United Kingdom</addr-line></aff>
<aff id="aff005"><label>5</label> <addr-line>School of Psychology, University of Waikato, Waikato, New Zealand</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Nomikou</surname>
<given-names>Iris</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>University of Portsmouth, UNITED KINGDOM</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>In accordance with ethical obligations, we declare that we have no conflict of interest with respect to our authorship or the publication of this article.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">l.sirri@mmu.ac.uk</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>8</day>
<month>6</month>
<year>2020</year>
</pub-date>
<pub-date pub-type="collection">
<year>2020</year>
</pub-date>
<volume>15</volume>
<issue>6</issue>
<elocation-id>e0233968</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>10</month>
<year>2019</year>
</date>
<date date-type="accepted">
<day>15</day>
<month>5</month>
<year>2020</year>
</date>
</history>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Sirri et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="pone.0233968.pdf"/>
<abstract>
<p>In adults, words are more effective than sounds at activating conceptual representations. We aimed to replicate these findings and extend them to infants. In a series of experiments using an eye tracker object recognition task, suitable for both adults and infants, participants heard either a word (e.g. cow) or an associated sound (e.g. mooing) followed by an image illustrating a target (e.g. cow) and a distracter (e.g. telephone). The results showed that adults reacted faster when the visual object matched the auditory stimulus and even faster in the word relative to the associated sound condition. Infants, however, did not show a similar pattern of eye-movements: only eighteen-month-olds, but not 9- or 12-month-olds, were equally fast at recognizing the target object in both conditions. Looking times, however, were longer for associated sounds, suggesting that processing sounds elicits greater allocation of attention. Our findings suggest that the advantage of words over associated sounds in activating conceptual representations emerges at a later stage during language development.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100000269</institution-id>
<institution>Economic and Social Research Council</institution>
</institution-wrap>
</funding-source>
<award-id>ES/L008955/1</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Reid</surname>
<given-names>Vincent</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award002">
<funding-source>
<institution>ANID/PIA/Basal Funds for Centers of Excellence</institution>
</funding-source>
<award-id>FB0003</award-id>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-2294-1138</contrib-id>
<name name-style="western">
<surname>Guerra</surname>
<given-names>Ernesto</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>This work was funded by the: - Economic and Social Research Council (ES/L008955/1) International Centre for Language and Communication Development (LuCiD) - LS, SL, VR, EP. - Basal Funds for Centers of Excellence (Project FB0003 - from the CONICYT Associative Research Program) - EG.</funding-statement>
</funding-group>
<counts>
<fig-count count="10"/>
<table-count count="4"/>
<page-count count="18"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All data and analysis scripts are available online in <ext-link ext-link-type="uri" xlink:href="https://osf.io/ze429/" xlink:type="simple">https://osf.io/ze429/</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Two key features of human cognition are language and conceptual categorization [<xref ref-type="bibr" rid="pone.0233968.ref001">1</xref>]. Developmental studies have shown that during the first years of life, verbal (spoken words)—as opposed to non-verbal meaningless—sounds facilitate conceptual categorization [<xref ref-type="bibr" rid="pone.0233968.ref002">2</xref>, <xref ref-type="bibr" rid="pone.0233968.ref003">3</xref>]. Conceptual categorization implies constructing an abstract mental representation of a category by grouping different exemplars or objects into clusters based on shared features, such as perceptual, functional, taxonomic or thematic [<xref ref-type="bibr" rid="pone.0233968.ref004">4</xref>]. Consequently, upon hearing the label of an object (e.g. ‘dog’), all category related objects (e.g. exemplars of dogs, and/or associated animals (e.g. cat, sheep) are activated, which in turn, results in faster visual object recognition [<xref ref-type="bibr" rid="pone.0233968.ref005">5</xref>, <xref ref-type="bibr" rid="pone.0233968.ref006">6</xref>]. Meaningful auditory information, however, originates not only from spoken words (e.g. “dog”), but also from environmental associated sounds (e.g. bark). Although words and associated sounds are both informative and semantically related to their referents (e.g. dog), they fundamentally differ from one another [<xref ref-type="bibr" rid="pone.0233968.ref007">7</xref>]. While associated sounds are based on causal relationships being strictly related to their generating source, words are arbitrarily linked to their referents, have phonological forms that are reproduced by a person, carry an informative intent, and are used to label objects or name a category to which these objects belong [<xref ref-type="bibr" rid="pone.0233968.ref008">8</xref>, <xref ref-type="bibr" rid="pone.0233968.ref003">3</xref>].</p>
<p>Though both words and associated sounds carry semantic knowledge, research on language processing and object recognition has focused mostly on spoken words, and much less on associated sounds. It remains unclear whether during language development, infants process words and associated sounds similarly, or whether the former has an advantage over the latter. Uncovering what effects words have on developing conceptual representations can contribute to a better understanding of the relation between language and cognition. An important question is whether words and associated sounds activate conceptual representations differently, and consequently, whether recognizing visual information can be modulated by a preceding auditory cue (e.g. word <italic>versus</italic> associated sounds). In adults, a few studies that investigated the semantic organization of words and associated sounds have shown that compared to words, associated sounds enhanced visual object detection (judging whether an object was present on the visual display) [<xref ref-type="bibr" rid="pone.0233968.ref009">9</xref>], especially when the stimulus onset asynchrony (SOA; time from the beginning of the auditory stimulus to the appearance of the image) was short (e.g. 350 ms; 10). Chen &amp; Spence [<xref ref-type="bibr" rid="pone.0233968.ref010">10</xref>]suggested that words access their meanings via lexical representations, whereas associated sounds access faster and more directly their meaning. The findings of a more recent study using the visual world paradigm (VWP), revealed that participants looked faster at the target (e.g. puppy) and longer at its competitor (e.g. bone) compared to other two distractors (e.g. candle and daffodil), suggesting similar graded effects for both associated sounds and words [<xref ref-type="bibr" rid="pone.0233968.ref011">11</xref>]. In addition, the results showed that this graded pattern was more pronounced in the associated sounds condition [<xref ref-type="bibr" rid="pone.0233968.ref011">11</xref>].</p>
<p>On the other hand, when investigating the activation of conceptual representations during object recognition, Lupyan and Thompson-Schill [<xref ref-type="bibr" rid="pone.0233968.ref008">8</xref>]found that words (e.g. ‘dog’) activate conceptual representations more effectively than associated sounds do (e.g. dog bark). In a series of visual identification tasks, adults systematically reacted faster to a target image when primed by a word compared to an associated sound. In another VWP study [<xref ref-type="bibr" rid="pone.0233968.ref006">6</xref>], the results also revealed that sound primes led participants to look more at one category exemplar (e.g. robin) compared to the three others (e.g. three different exemplars of <italic>bird</italic>), most likely the source of the sound, whereas in response to word primes, participants looked equally at the four images. These findings suggest that while words are somehow detached from the perceptual information, sounds are tightly linked to the perceptual details of the generating source [<xref ref-type="bibr" rid="pone.0233968.ref006">6</xref>]. Furthermore, although both words and sounds yielded similar N400 response (an event-related brain potential (ERP) known to reflect semantic processing; [<xref ref-type="bibr" rid="pone.0233968.ref012">12</xref>]), words elicited earlier and larger P1 ERP response, which is related to perceptual categorization [<xref ref-type="bibr" rid="pone.0233968.ref005">5</xref>].</p>
<p>Altogether, Lupyan and colleagues’ work contrasts previous hypotheses that words and sounds are processed similarly [<xref ref-type="bibr" rid="pone.0233968.ref013">13</xref>], or that sounds access their meanings faster than words [<xref ref-type="bibr" rid="pone.0233968.ref010">10</xref>]. They demonstrate that although both words and sounds activate conceptual representations, the representations activated by words are enhanced, facilitating the match to the category exemplars. Unlike associated sounds, labels are abstract symbols “standing for” and referring to objects. They are used by humans to communicate and convey abstract information that is not strictly linked to the ‘here and now’ of an object, whereas associated sounds are mere features of objects [<xref ref-type="bibr" rid="pone.0233968.ref008">8</xref>, <xref ref-type="bibr" rid="pone.0233968.ref014">14</xref>]. According to Edmiston and Lupyan [<xref ref-type="bibr" rid="pone.0233968.ref006">6</xref>], sounds act as “motivated” cues, and are idiosyncratically linked to their referents, whereas words are decontextualized “unmotivated” cues, and activate conceptual categories abstractly. By taking this stance, we were interested in determining whether differences in processing words and associated sounds occur early in language development. This will contribute to a better understanding of how we form categories, and to the theoretical account stating that ‘words refer to’ [<xref ref-type="bibr" rid="pone.0233968.ref003">3</xref>], rather than being merely ‘associated to’, objects (3, for a review). According to this account, words enable more abstract conceptual representations and are not directly linked to the context or event, therefore enhancing object recognition.</p>
<p>To the best of our knowledge, there are only two developmental studies that investigated whether young children process known words and sounds similarly [<xref ref-type="bibr" rid="pone.0233968.ref015">15</xref>, <xref ref-type="bibr" rid="pone.0233968.ref016">16</xref>]. In Cummings et al. [<xref ref-type="bibr" rid="pone.0233968.ref015">15</xref>] study, 15-, 20-, and 25-month-old toddlers participated in a looking-while-listening task, during which they viewed pairs of images (e.g. dog–piano) and heard either associated sounds (e.g. dog barking or piano playing) or words. The results showed that across ages, infants were equally fast at recognizing the target object in both word and associated sound conditions. Faster object recognition preceded by words was correlated with infants’ productive skills. In their recent ERP study, Hendrickson et al. [<xref ref-type="bibr" rid="pone.0233968.ref016">16</xref>]investigated the semantic organization of words and associated sounds in the developing brain of 20-month-olds, including three control conditions. Toddlers viewed the target images (e.g. dog) while hearing matching words (e.g. “dog”) or associated sounds (e.g. barking), within-category violations (“cat” or meowing), and between-category violations (e.g. “pen” or scribbling). The ERP results showed that 20-months-olds exhibit different patterns of brain activation in response to words and associated sounds. While between-category violations (e.g. dog–“pen” or scribbling) elicited similar ERP responses across words and associated sounds, within-category violations (e.g. dog–“cat”) for words elicited earlier and greater negativity than for associated sounds (e.g. dog–meowing), suggesting that young children exhibit greater sensitivity to the relationship between words than that of associated sounds in the semantic system.</p>
<p>The current study aimed at extending these findings, exploring whether words have an advantage over associated sounds in activating conceptual representations in infancy as they have in adulthood. If early in development, infants, like adults, interpret words as abstract, “unmotivated” and arbitrary symbols, and sounds as “motivated” and idiosyncratic cues, the visual object recognition should be modulated by the preceding auditory information. Thus, like in Lupyan and Thompson-Schill [<xref ref-type="bibr" rid="pone.0233968.ref008">8</xref>]study, activation of conceptual representations would be more efficient when target objects are cued by words than by associated sounds. Alternatively, if words and sounds activate conceptual representations similarly, object recognition will not be modulated by its preceding cue. Our first goal was therefore to replicate the study of Lupyan and Thompson-Schill [<xref ref-type="bibr" rid="pone.0233968.ref008">8</xref>] with adults, by using a similar behavioural visual identification task (Experiment 1A). We then conducted an object recognition task with adults (Experiment 1B), measuring eye movements. This eye tracking task was also suitable for infants at 9- (Experiment 2A), 12- (Experiment 2B), and 18 months of age (Experiment 2C). We predicted that adults will react faster to the target image (e.g. cow) when preceded by a spoken word (e.g. “cow”) compared to meaningful associated sounds (e.g. cow mooing). This should also be reflected by faster and longer looking behaviour to the target image (e.g. cow) compared to a distractor (e.g. train). Similarly, we predicted that infants will look faster and longer at the target object when preceded by word compared to associated sound primes, indicating that the advantage of words emerges early during language development.</p>
</sec>
<sec id="sec002">
<title>Experiment 1A</title>
<sec id="sec003">
<title>Methods</title>
<sec id="sec004">
<title>Participants</title>
<p>Thirty healthy adults (20 females; age range: 23;2 y to 41;4 y) from the Department of Psychology (<italic>n</italic> = 29) and Computer Science (<italic>n</italic> = 1) volunteered in the experiment. All participants were right-handed. An additional two left-handed participants were excluded from the final sample. Participants were informed about the aim of the study and gave written consent before their participation. The study was approved by the University Research Ethics Committee and conducted in conformity with the declaration of Helsinki.</p>
</sec>
<sec id="sec005">
<title>Stimuli</title>
<p>We selected six objects that have basic level nouns and characteristic sounds (car, cow, dog, sheep, telephone, train), suitable for both adults and infants experiments. The auditory stimuli included spoken words and their associated sounds. A native female speaker recorded the words uttered in neutral and adult-directed speech (ADS); and the associated sounds were selected from the internet. Audio files were digitized and edited with Adobe Audition (CS 5.5), at 16-bit resolution and 44 kHz sampling rate and had mean length of 601 ms for words and 883 ms for sounds. The visual stimuli were selected online and included images (see <xref ref-type="supplementary-material" rid="pone.0233968.s001">S1 Data</xref>) of the six objects, and presented on a 19” CRT monitor.</p>
</sec>
<sec id="sec006">
<title>Procedure</title>
<p>The procedure matched closely that of the study by Lupyan and Thompson-Schill (8). Participants sat in front of the monitor and were given a gamepad to respond by button-press. On each trial, participants heard either a word or an associated sound while fixating a central black fixation cross on a grey screen, followed by an image. The inter stimulus interval (ISI) from the offset of the auditory stimulus to the onset of the image was fixed at 1000 ms. The images matched the auditory stimulus 50% of the time, and the order of trials was randomised. Each image remained on the screen for 2 seconds, and participants were instructed to respond as fast as possible by pressing a <italic>match</italic> or <italic>mismatch</italic> button on a gamepad. The side (left and right buttons) of the correct response was counterbalanced across participants. After every response, participants received an auditory feedback, indicating whether their response was correct (a beep) or not (a buzz). As the image disappeared, another trial began. Each of the six objects was preceded by a word or a sound, match and mismatch, and repeated four times, yielding 96 verification trials. The experiment lasted approximately five minutes.</p>
</sec>
<sec id="sec007">
<title>Data analysis</title>
<p>Before the analysis, all incorrect responses were removed. As in Lupyan &amp; Thompson-Schill (8), reaction times (RTs) below 200 ms and above 1500 ms were also excluded as well as any trial with no answer (less than 2% of the data, and less than 4% before excluding incorrect trials). The number of trials included was 22 (<italic>SD</italic> = 1.4) for sound-match and 22 (<italic>SD</italic> = 1.8) sound-mismatch, and 22 (<italic>SD</italic> = 1.6) for word-match and 23 (<italic>SD</italic> = 1.1) word-mismatch. RTs and accuracy were analysed with a within-subject <italic>2</italic> (stimulus type: word or sound) x <italic>2</italic> (congruency: match or mismatch) analysis of variance (ANOVA). All data and analysis scripts are available online in <ext-link ext-link-type="uri" xlink:href="https://osf.io/ze429/" xlink:type="simple">https://osf.io/ze429/</ext-link>.</p>
</sec>
</sec>
<sec id="sec008">
<title>Results and discussion</title>
<p>The results showed a marginal main effect of auditory stimulus (<italic>F</italic>(1,29) = 4.11; <italic>p</italic> = 0.051; <italic>η<sup>2</sup>g</italic> = 0.004) and a significant main effect of congruency (<italic>F</italic>(1,29) = 52.35; <italic>p</italic>&lt;0.001; <italic>η<sup>2</sup>g</italic> = 0.08), indicating that for adults, both words and associated sounds activate conceptual representations with greater sensitivity to congruency. Paired sample <italic>t</italic>-test revealed faster responses (<italic>t</italic>(59) = 2.13; <italic>p</italic>&lt;0.05; <italic>Cohen’s d =</italic> 0.27) to words (572 ms; <italic>SD</italic> = 0.11) relative to associated sounds (589 ms; <italic>SD</italic> = 0.13), especially in the congruent trials (<italic>t</italic>(29) = 2.18; <italic>p</italic>&lt;0.05; <italic>Cohen’s d =</italic> 0.39). This advantage of words over sounds is similar to that of Lupyan and Thompson-Schill, (2012, cf. <xref ref-type="fig" rid="pone.0233968.g001">Fig 1</xref>). Accuracy analysis revealed significant effect of congruency (<italic>F</italic>(1,29) = 4.93; <italic>p</italic>&lt;0.05; <italic>η<sup>2</sup>g</italic> = 0.04) and an interaction between stimulus type and congruency (<italic>F</italic>(1,29) = 4.63; <italic>p</italic>&lt;0.05; <italic>η<sup>2</sup>g</italic> = 0.02), but no main effect of stimulus type (<italic>F</italic>(1,29)&lt;1). Paired sample <italic>t</italic>-test showed that participants were equally accurate across words and associated sound conditions (97% sound-match; 94% sound-mismatch; 96% word-match; and 95% word-mismatch, cf. <xref ref-type="fig" rid="pone.0233968.g002">Fig 2</xref>), but more accurate in the sound-match compared to the sound-mismatch condition (<italic>t</italic>(29) = 3.37; <italic>p</italic>&lt;0.01; <italic>Cohen’s d</italic> = 0.62).</p>
<fig id="pone.0233968.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Mean response times (aggregated by participants) as a function of stimulus type and congruency.</title>
<p>Error bars without caps represent standard error of the means (SE).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0233968.g001.tif" xlink:type="simple"/>
</fig>
<fig id="pone.0233968.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Mean response accuracy (aggregated by participants) as a function of stimulus type and congruency.</title>
<p>Error bars without caps represent standard error of the means (SE).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0233968.g002.tif" xlink:type="simple"/>
</fig>
</sec>
</sec>
<sec id="sec009">
<title>Experiment 1B</title>
<sec id="sec010">
<title>Participants</title>
<p>Twenty healthy adults (18 female, age range: 24;7 y to 42;7 y) from the Department of Psychology took part in the study, and received £3.50 for their participation.</p>
</sec>
<sec id="sec011">
<title>Stimuli</title>
<p>The auditory and the visual stimuli were the same used in Experiment 1. The visual stimuli were arranged into 12 images (709 x 425 pixels) of paired objects, one on the left and one on the right side of the screen. Each pair included a target (e.g. dog) and a distractor (e.g. car) from two different semantic categories, presented on a 1920 x 1080 computer screen.</p>
</sec>
<sec id="sec012">
<title>Procedure</title>
<p>Participants sat at 50–70 cm in front of the computer screen. A Tobii X120 eyetracker (Tobii Pro, Stockholm, Sweden) located beneath the screen recorded their gaze at 60 Hz sampling rate. The eye tracker was first calibrated, using a five-point calibration (shrinking blue and red attention grabber) procedure delivered through Matlab<sup>®</sup> (v. 2013b). The calibration was controlled with a key press and repeated if necessary. Each trial began with the appearance of a black fixation cross centred on a grey screen for 1000 ms after which an auditory stimulus was played, a word or an associated sound, while the fixation cross remained on the screen. The visual stimulus depicting two objects simultaneously–target and distractor–appeared, and remained on the screen for 2000 ms while the eye tracker recorded participant’s gaze. The inter stimulus interval (ISI) from the offset of the auditory stimulus to the onset of the image was fixed at 1000 ms. After 2000 ms the image disappeared, and another trial began. The side of target and distractor was counterbalanced, resulting in one block of 24 trials. The experimental block was repeated 4 times, yielding 96 trials in total. The order of trials within a block and across participants was randomised. The experiment lasted approximately 9 minutes.</p>
</sec>
<sec id="sec013">
<title>Data analysis</title>
<p>Two areas of interest that matched size and location of the displayed target and distractor images were defined using Matlab<sup>®</sup> (v. 2014b), and a summary of participants’ fixations with their duration and coordinates on the display was produced using the same software.</p>
<p>After data pre-processing, we calculated fixation proportions for each of the images on the display in both stimulus type conditions (words vs. sounds) using R software [<xref ref-type="bibr" rid="pone.0233968.ref017">17</xref>]. A value of 1 was given to an object when participants were fixating its region on the display at a given moment, while a value of 0 was given to the other region. If no fixation was detected by the eye tracker, both regions were given a 0 value. We defined fixation proportion as the percentage of looks to an object on each trial and across time. This measure was then aggregated, first by participant and stimulus type, and then into 100 ms time windows. The first aggregation allows us to calculate confidence intervals, which were corrected for within-subject designs and for number of multiple comparisons. The second aggregation helps to lessen auto-correlation between fixation proportions over time.</p>
<p>To evaluate the effects of words and sounds on participants’ looks to the pictures on the display, we used a complementary approach based on confidence intervals and quantifiable effect size of proportion of fixation over time [<xref ref-type="bibr" rid="pone.0233968.ref018">18</xref>, <xref ref-type="bibr" rid="pone.0233968.ref019">19</xref>, <xref ref-type="bibr" rid="pone.0233968.ref020">20</xref>], plus a quasi-logistic growth curve analysis (GCA) approach [<xref ref-type="bibr" rid="pone.0233968.ref021">21</xref>, <xref ref-type="bibr" rid="pone.0233968.ref022">22</xref>, <xref ref-type="bibr" rid="pone.0233968.ref023">23</xref>] on empirical logit transformation of the proportion of fixations [<xref ref-type="bibr" rid="pone.0233968.ref024">24</xref>, <xref ref-type="bibr" rid="pone.0233968.ref025">25</xref>]. These two analyses allow complementary inferences by tackling different aspects of eye tracking data in the VWP. Following Baayen [<xref ref-type="bibr" rid="pone.0233968.ref026">26</xref>], we considered all t-values &gt; |2| as significant effects (e.g. <italic>p</italic>&lt;0.05).</p>
</sec>
<sec id="sec014">
<title>Results and discussion</title>
<p><xref ref-type="fig" rid="pone.0233968.g003">Fig 3</xref> shows mean proportion of fixation by object and stimulus type. Shaded areas around the lines represent the within-subject adjusted 95% confidence intervals. Points mark 100 ms time bins from the onset of auditory stimuli windows and distinguish between types of stimulus (i.e. words vs. sounds). The results show greater preference for the target objects, both when hearing the label of the object (word) and its associated sound. This preference for the target over the distractor was also independent of the nature of the item, animals or objects (see <xref ref-type="supplementary-material" rid="pone.0233968.s001">S1 Data</xref>). <xref ref-type="fig" rid="pone.0233968.g003">Fig 3</xref> shows that this preference is slightly stronger for the words compared to the associated sounds. After 200 ms from stimuli onset, a larger fixation proportion on the target object is observed when participants heard the label of the object. This advantage is evident for about 400 ms, disappearing around 700 ms after stimuli onset.</p>
<fig id="pone.0233968.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Mean fixation proportion (aggregated by participants) as a function of object in the visual context and type of auditory stimulus in Experiment 1B.</title>
<p>Shaded areas around lines represented 95% confidence intervals adjusted for within-subject designs and multiple time windows.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0233968.g003.tif" xlink:type="simple"/>
</fig>
<p>Model comparison for Experiment 1B resulted in the selection of a model that included all four orthogonal polynomial terms (all <italic>χ²</italic>-values &gt; 142.56, <italic>df</italic> = 11, all <italic>p</italic>-values &gt; .001). The results of the GCA model are presented in <xref ref-type="table" rid="pone.0233968.t001">Table 1</xref>. All polynomial terms show reliable main effects and interaction with the difference between objects (target vs. competitor), except for the quadratic polynomial, which exhibit only the interaction but no main effect in the model. Model comparison, nonetheless, shows that a model with all four terms produce a better fit of the data relative to one without the quadratic term (<italic>χ²</italic> = 2633.5, <italic>df</italic> = 11, all <italic>p</italic>-values &gt; .001). As expected, based on the confidence intervals analysis, the results of the GCA model showed a reliable main effect of object (<italic>β</italic> = -6.41, <italic>se</italic> = 0.28, <italic>t</italic> = -22.71), however, the word preference is not captured in the model (<italic>t</italic> &lt; |2|). <xref ref-type="fig" rid="pone.0233968.g004">Fig 4</xref> shows that the shape of the fixations towards the targets assumes a quartic form with an initial quadratic form, in contrast to the gaze pattern to distractors, which takes a more pronounced cubic and linear shape.</p>
<fig id="pone.0233968.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.g004</object-id>
<label>Fig 4</label>
<caption>
<title>GCA model fit (lines) of empirical logit (points) as a function of object in the visual context and type of auditory stimulus in Experiment 1B.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0233968.g004.tif" xlink:type="simple"/>
</fig>
<table-wrap id="pone.0233968.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.t001</object-id>
<label>Table 1</label> <caption><title>Main and interaction effect in the quasi-logistic GCA mixed model analysis in Experiment 1B.</title></caption>
<alternatives>
<graphic id="pone.0233968.t001g" mimetype="image" position="float" xlink:href="pone.0233968.t001.tif" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="center"><italic>Estimate</italic></th>
<th align="center"><italic>se</italic></th>
<th align="center" colspan="2"><italic>t</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">(Intercept)</td>
<td align="right">-1.96</td>
<td align="right">0.38</td>
<td align="right">-5.10</td>
<td align="left"><xref ref-type="table-fn" rid="t001fn001">*</xref></td>
</tr>
<tr>
<td align="left">Linear</td>
<td align="right">1.36</td>
<td align="right">0.59</td>
<td align="right">2.29</td>
<td align="left"><xref ref-type="table-fn" rid="t001fn001">*</xref></td>
</tr>
<tr>
<td align="left">Quadratic</td>
<td align="right">-0.60</td>
<td align="right">0.49</td>
<td align="right">-1.24</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Cubic</td>
<td align="right">1.10</td>
<td align="right">0.20</td>
<td align="right">5.46</td>
<td align="left"><xref ref-type="table-fn" rid="t001fn001">*</xref></td>
</tr>
<tr>
<td align="left">Quartic</td>
<td align="right">-0.68</td>
<td align="right">0.12</td>
<td align="right">-5.44</td>
<td align="left"><xref ref-type="table-fn" rid="t001fn001">*</xref></td>
</tr>
<tr>
<td align="left">Object</td>
<td align="right">-6.41</td>
<td align="right">0.28</td>
<td align="right">-22.71</td>
<td align="left"><xref ref-type="table-fn" rid="t001fn001">*</xref></td>
</tr>
<tr>
<td align="left">Condition</td>
<td align="right">-0.08</td>
<td align="right">0.12</td>
<td align="right">-0.69</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Linear * Object</td>
<td align="right">-3.54</td>
<td align="right">0.41</td>
<td align="right">-8.63</td>
<td align="left"><xref ref-type="table-fn" rid="t001fn001">*</xref></td>
</tr>
<tr>
<td align="left">Linear * Condition</td>
<td align="right">0.01</td>
<td align="right">0.23</td>
<td align="right">0.03</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Quadratic * Object</td>
<td align="right">2.83</td>
<td align="right">0.47</td>
<td align="right">6.03</td>
<td align="left"><xref ref-type="table-fn" rid="t001fn001">*</xref></td>
</tr>
<tr>
<td align="left">Quadratic * Condition</td>
<td align="right">-0.03</td>
<td align="right">0.14</td>
<td align="right">-0.22</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Cubic * Object</td>
<td align="right">-1.76</td>
<td align="right">0.19</td>
<td align="right">-9.40</td>
<td align="left"><xref ref-type="table-fn" rid="t001fn001">*</xref></td>
</tr>
<tr>
<td align="left">Cubic * Condition</td>
<td align="right">-0.02</td>
<td align="right">0.10</td>
<td align="right">-0.18</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Quartic * Object</td>
<td align="right">0.50</td>
<td align="right">0.12</td>
<td align="right">3.99</td>
<td align="left"><xref ref-type="table-fn" rid="t001fn001">*</xref></td>
</tr>
<tr>
<td align="left">Quartic * Condition</td>
<td align="right">0.06</td>
<td align="right">0.08</td>
<td align="right">0.76</td>
<td align="left"/>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t001fn001"><p>* = <italic>p</italic>&lt;0.05</p></fn>
</table-wrap-foot>
</table-wrap>
<p>These results strengthen and support our replication in Exp. 1A by showing that adults looked faster at the target object in the word compared to the sound condition. This preference for words was also reflected by longer early looking time to the target in response to words. The analysis of mean looking times revealed that longer looking to the target was more prominent in the words compared to the associated sounds condition (see <xref ref-type="supplementary-material" rid="pone.0233968.s001">S1 Data</xref>).</p>
<p>Both experiments further confirm the theory that conceptual representations are activated more effectively through verbal (words) than nonverbal (associated sounds) means, suggesting that words exert stronger effect on the activation of visual components of the related conceptual representations.</p>
<p>The question of whether this phenomenon emerges early in infancy is studied in the following set of experiments. Previous developmental studies have shown that words, compared to non-linguistic sounds, enhance object categorization (3, for a review) in infants. And, under specific circumstances (e.g. mother’s voice or presenting two objects from different categories), 9-month-old infants have the capacity to understand the meaning of some common words [<xref ref-type="bibr" rid="pone.0233968.ref027">27</xref>], and detect the match or mismatch between the auditory label and visual object [<xref ref-type="bibr" rid="pone.0233968.ref028">28</xref>]. By 18 months, infants are more sensitive to the relationship between words (e.g. dog–“cat”) than that of associated sounds (e.g. dog–meowing) [<xref ref-type="bibr" rid="pone.0233968.ref016">16</xref>]. We therefore hypothesized that at 9 months, words will have an advantage over associated sounds in activating conceptual representations. We expected infants to look faster and longer at the target relative to the distractor object, in particular, when preceded by words compared to associated sounds.</p>
</sec>
</sec>
<sec id="sec015">
<title>Experiment 2</title>
<sec id="sec016">
<title>Methods</title>
<sec id="sec017">
<title>Participants</title>
<p>Thirty-two healthy 9-month-old infants (15 girls; age range: 8m3d to 9m23d) took part in Exp. 2A. In Exp. 2B, there were thirty-two 12-month-olds (17 girls; age range: 11m2d to 12m23d), and in Exp. 2C twenty-three 18-month-old (12 girls; age range: 17m14 to 18m8d) infants. Participants were recruited from a database of parents from the local area who expressed an interest in taking part in developmental research studies. Parents were informed about the aim of the study and gave written consent before participation. An additional forty infants took part in the study but were not included in the final sample due to an insufficient amount of trials per condition (word or sound; <italic>n</italic> = 35), no familiarization phase (<italic>n</italic> = 1), participating twice (at 9- and 12 months; <italic>n</italic> = 1), low birth weight (&lt;2500 kg; <italic>n</italic> = 2) or premature (&lt;37 weeks of gestation; <italic>n</italic> = 1). All infants received a book for their participation and parents were reimbursed £10 for travel expenses. The study was approved by the University Research Ethics Committee and conducted in conformity with the declaration of Helsinki.</p>
</sec>
<sec id="sec018">
<title>Stimuli</title>
<p>The auditory stimuli were the basic level spoken words and their associated sounds as in Experiment 1. A different native female speaker recorded the words uttered in infant-directed speech (IDS). Audio files were digitized and edited with Adobe Audition (CS 5.5), at 16-bit resolution and 44 kHz sampling rate and had mean length of 819 ms for words and 883 ms for sounds. The visual stimuli were the same 24 images from Experiment 1B.</p>
</sec>
<sec id="sec019">
<title>Procedure and data analysis</title>
<p>We adapted the procedure from Experiment 1B to infants, by adding a familiarization phase (using slide presentation (Microsoft Office 2016) on an iPad mini (7,9”) tablet); and, by increasing the time of the fixation cross on the screen to 3000 ms. During this time, caregivers were encouraged to maintain their infant’s attention and interest in the task by saying for instance, “<italic>Oh look</italic>!” or “<italic>Look …</italic>.<italic>”</italic>. Infants sat on their caregiver’s laps, and caregivers were asked to sit at a 90° angle from their infant to ensure the eye tracker recorded the infants’ eye movements only, and to facilitate the interaction between trials. Caregivers were also instructed to avoid verbal communication when the auditory and visual stimuli were displayed, pointing to the screen or naming the objects. The visual stimulus remained on the screen for 4.5 seconds while the eye tracker recorded infants’ gaze. After 4.5 seconds, the image disappeared, and another trial began. Infants were presented with one block of 24 trials in total. A break was taken when needed, and the experiment lasted approximately 5 minutes. The data analysis was identical to that of Experiment 1B, and was applied to each of the age group separately.</p>
</sec>
</sec>
<sec id="sec020">
<title>Results and discussion</title>
<sec id="sec021">
<title>Experiment 2A: 9-month-olds</title>
<p><xref ref-type="fig" rid="pone.0233968.g005">Fig 5</xref> reveals that target objects were preferred relative to the distractors, particularly between 2000 ms and 2500 ms. However, the confidence intervals suggest that this effect is too small to be considered significant. Similarly, the gaze pattern to the target does not appear to differ between stimulus types. We now turn to the GCA approach to corroborate these results.</p>
<fig id="pone.0233968.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.g005</object-id>
<label>Fig 5</label>
<caption>
<title>Mean fixation proportion (aggregated by participants) as a function of object in the visual context and type of auditory stimulus.</title>
<p>Shaded areas around lines represented 95% confidence intervals adjusted for within-subject designs and multiple time windows.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0233968.g005.tif" xlink:type="simple"/>
</fig>
<p>An adult saccadic eye movement is generally assumed to take about 200 ms [<xref ref-type="bibr" rid="pone.0233968.ref029">29</xref>, <xref ref-type="bibr" rid="pone.0233968.ref030">30</xref>, <xref ref-type="bibr" rid="pone.0233968.ref031">31</xref>]. Arguably, however, children take longer than adults to program and initiate saccades [<xref ref-type="bibr" rid="pone.0233968.ref032">32</xref>]. Thus, GCA regressions consider time windows from 300 ms to 3000 ms after the onset of the stimuli. The results show significant main effects of all polynomial terms, reflecting that the overall changes over time in the fixation of proportion can be reliably depicted by linear, quadratic, cubic and quartic components (all <italic>t</italic>-values &gt; |2|).</p>
<p>More important, the model shows no reliable differences between conditions or objects (both <italic>t</italic>-values &lt; |2|), corroborating the conclusions inferred in the first analysis approach. However, the interaction effect between third-order polynomial predictor of changes over time and object, we found a reliable effect (<italic>β</italic> = 0.73, <italic>se</italic> = 0.32, <italic>t</italic> = 2.25), suggesting subtler overall differences in the time course of looks for target and distractors objects (see <xref ref-type="table" rid="pone.0233968.t002">Table 2</xref>). <xref ref-type="fig" rid="pone.0233968.g006">Fig 6</xref> shows GCA model fits on empirical log data time-locked to 300 ms after stimuli onset. The graph is divided into panels per condition where lines represent GCA model fits (solid for the target and dashed for the distractors), and points represents the empirical logit data per condition (circles for the target and triangles for the distractors). In sum, the results of both analyses suggest a subtle preference for the target object in both the stimulus type experimental conditions, despite displaying no differences between conditions.</p>
<fig id="pone.0233968.g006" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.g006</object-id>
<label>Fig 6</label>
<caption>
<title>GCA model fit (lines) of empirical logit (points) as a function of object in the visual context and type of auditory stimulus.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0233968.g006.tif" xlink:type="simple"/>
</fig>
<table-wrap id="pone.0233968.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.t002</object-id>
<label>Table 2</label> <caption><title>Main and interaction effect in the quasi-logistic GCA mixed model analysis.</title></caption>
<alternatives>
<graphic id="pone.0233968.t002g" mimetype="image" position="float" xlink:href="pone.0233968.t002.tif" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="center"><italic>Estimate</italic></th>
<th align="center"><italic>se</italic></th>
<th align="center" colspan="2"><italic>t</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">(Intercept)</td>
<td align="right">-3.89</td>
<td align="right">0.63</td>
<td align="right">-6.12</td>
<td align="right"><xref ref-type="table-fn" rid="t002fn001">*</xref></td>
</tr>
<tr>
<td align="left">Linear</td>
<td align="right">2.04</td>
<td align="right">0.79</td>
<td align="right">2.57</td>
<td align="right"><xref ref-type="table-fn" rid="t002fn001">*</xref></td>
</tr>
<tr>
<td align="left">Quadratic</td>
<td align="right">-1.74</td>
<td align="right">0.50</td>
<td align="right">-3.46</td>
<td align="right"><xref ref-type="table-fn" rid="t002fn001">*</xref></td>
</tr>
<tr>
<td align="left">Cubic</td>
<td align="right">1.40</td>
<td align="right">0.38</td>
<td align="right">3.67</td>
<td align="right"><xref ref-type="table-fn" rid="t002fn001">*</xref></td>
</tr>
<tr>
<td align="left">Quartic</td>
<td align="right">-1.02</td>
<td align="right">0.30</td>
<td align="right">-3.38</td>
<td align="right"><xref ref-type="table-fn" rid="t002fn001">*</xref></td>
</tr>
<tr>
<td align="left">Object</td>
<td align="right">-0.19</td>
<td align="right">0.19</td>
<td align="right">-1.01</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Condition</td>
<td align="right">0.05</td>
<td align="right">0.14</td>
<td align="right">0.34</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Linear * Object</td>
<td align="right">-0.86</td>
<td align="right">0.66</td>
<td align="right">-1.31</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Linear * Condition</td>
<td align="right">-0.29</td>
<td align="right">0.42</td>
<td align="right">-0.71</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quadratic * Object</td>
<td align="right">0.00</td>
<td align="right">0.45</td>
<td align="right">0.01</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quadratic * Condition</td>
<td align="right">0.03</td>
<td align="right">0.28</td>
<td align="right">0.12</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Cubic * Object</td>
<td align="right">0.73</td>
<td align="right">0.32</td>
<td align="right">2.25</td>
<td align="right"><xref ref-type="table-fn" rid="t002fn001">*</xref></td>
</tr>
<tr>
<td align="left">Cubic * Condition</td>
<td align="right">-0.26</td>
<td align="right">0.27</td>
<td align="right">-0.95</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quartic * Object</td>
<td align="right">0.07</td>
<td align="right">0.27</td>
<td align="right">0.25</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quartic * Condition</td>
<td align="right">0.17</td>
<td align="right">0.22</td>
<td align="right">0.76</td>
<td align="right"/>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t002fn001"><p>* = <italic>p</italic>&lt;0.05</p></fn>
</table-wrap-foot>
</table-wrap>
<p>These findings are not in line with our prediction that word advantage emerges at 9 months when infants show semantic understanding of common words. Nine-month-old infants recognized the visual target object, however, looking time and fixations were similar across conditions. Consequently, we hypothesized that words will become more effective at activating conceptual representations at 12 months, when their mental representation of words as abstract referential symbols might be more consolidated.</p>
</sec>
</sec>
<sec id="sec022">
<title>Experiment 2B: 12-month-olds</title>
<p><xref ref-type="fig" rid="pone.0233968.g007">Fig 7</xref> shows a distinctive pattern for the sound and the word experimental conditions. Target objects show a small and short-lived preferences in the sound condition with a peak around 1750 ms after stimuli onset, while a similar pattern is observed for the distractor in the word condition but with a later peak (around 2150 ms after stimuli onset). Confidence intervals, however, suggest that these effects are too small to be considered significant.</p>
<fig id="pone.0233968.g007" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.g007</object-id>
<label>Fig 7</label>
<caption>
<title>Mean fixation proportion (aggregated by participants) as a function of object in the visual context and type of auditory stimulus.</title>
<p>Shaded areas around lines represented 95% confidence intervals adjusted for within-subject designs and multiple time windows.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0233968.g007.tif" xlink:type="simple"/>
</fig>
<p>Model comparisons showed that the inclusion of each polynomial term increased the fit of the model (all <italic>χ²</italic>-values &gt; 51.86, <italic>df</italic> = 11, all <italic>p</italic>-values &gt; .001). Critically, and as for the 9-month-olds, we observed no reliable overall differences between objects or conditions, and an interaction between the cubic polynomial and object (see <xref ref-type="table" rid="pone.0233968.t003">Table 3</xref>). However, the pattern observed is different to that in 9 months old children. As it can be observed in <xref ref-type="fig" rid="pone.0233968.g008">Fig 8</xref>, the model fit for the target in both conditions takes the form of a cubic curve, while that for the distractor can be better described as a quartic curve. Consequently, the combination of the two analyses approach suggests that as in Exp. 2A, there are no differences between the experimental conditions, and that there might be subtle differences between the time course visual attention pattern for target objects and distractors.</p>
<fig id="pone.0233968.g008" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.g008</object-id>
<label>Fig 8</label>
<caption>
<title>GCA model fit (lines) of empirical logit (points) as a function of object in the visual context and type of auditory stimulus.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0233968.g008.tif" xlink:type="simple"/>
</fig>
<table-wrap id="pone.0233968.t003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.t003</object-id>
<label>Table 3</label> <caption><title>Main and interaction effect in the quasi-logistic GCA mixed model analysis in Experiment 2B.</title></caption>
<alternatives>
<graphic id="pone.0233968.t003g" mimetype="image" position="float" xlink:href="pone.0233968.t003.tif" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="center"><italic>Estimate</italic></th>
<th align="center"><italic>se</italic></th>
<th align="center" colspan="2"><italic>t</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">(Intercept)</td>
<td align="right">-3.77</td>
<td align="right">0.67</td>
<td align="right">-5.63</td>
<td align="right"><xref ref-type="table-fn" rid="t003fn001">*</xref></td>
</tr>
<tr>
<td align="left">Linear</td>
<td align="right">1.30</td>
<td align="right">0.75</td>
<td align="right">1.74</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quadratic</td>
<td align="right">-1.13</td>
<td align="right">0.52</td>
<td align="right">-2.17</td>
<td align="right"><xref ref-type="table-fn" rid="t003fn001">*</xref></td>
</tr>
<tr>
<td align="left">Cubic</td>
<td align="right">1.38</td>
<td align="right">0.41</td>
<td align="right">3.39</td>
<td align="right"><xref ref-type="table-fn" rid="t003fn001">*</xref></td>
</tr>
<tr>
<td align="left">Quartic</td>
<td align="right">-0.75</td>
<td align="right">0.35</td>
<td align="right">-2.13</td>
<td align="right"><xref ref-type="table-fn" rid="t003fn001">*</xref></td>
</tr>
<tr>
<td align="left">Object</td>
<td align="right">-0.06</td>
<td align="right">0.11</td>
<td align="right">-0.56</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Condition</td>
<td align="right">0.04</td>
<td align="right">0.13</td>
<td align="right">0.33</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Linear * Object</td>
<td align="right">0.20</td>
<td align="right">0.52</td>
<td align="right">0.39</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Linear * Condition</td>
<td align="right">0.25</td>
<td align="right">0.46</td>
<td align="right">0.55</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quadratic * Object</td>
<td align="right">-0.05</td>
<td align="right">0.38</td>
<td align="right">-0.13</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quadratic * Condition</td>
<td align="right">0.29</td>
<td align="right">0.40</td>
<td align="right">0.73</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Cubic * Object</td>
<td align="right">-1.08</td>
<td align="right">0.37</td>
<td align="right">-2.90</td>
<td align="right"><xref ref-type="table-fn" rid="t003fn001">*</xref></td>
</tr>
<tr>
<td align="left">Cubic * Condition</td>
<td align="right">-0.04</td>
<td align="right">0.24</td>
<td align="right">-0.17</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quartic * Object</td>
<td align="right">-0.37</td>
<td align="right">0.23</td>
<td align="right">-1.58</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quartic * Condition</td>
<td align="right">-0.08</td>
<td align="right">0.32</td>
<td align="right">-0.26</td>
<td align="right"/>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t003fn001"><p>* = <italic>p</italic>&lt;0.05</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Unexpectedly, we obtained similar results to Exp. 2A. Twelve-month-old infants did not show a preference for words over associated sounds during object recognition. However, for each age group, 9 and 12 months, the analysis per item and mean proportion of fixtaions provided a slightly clearer pattern, and revealed that infants looked longer at the target compared to the distractor only when items were animals as opposed to objects (see <xref ref-type="supplementary-material" rid="pone.0233968.s001">S1 Data</xref>). This preference was independent of the conditions, words or associated sounds, and could be explained either by familiarity or by animacy.</p>
<p>Earlier studies have shown that the second year is marked by an accelerated rate of word learning and understanding, yielding a more efficient recognition [<xref ref-type="bibr" rid="pone.0233968.ref033">33</xref>], and greater sensitivity to the relationships between words than that of associated sounds [<xref ref-type="bibr" rid="pone.0233968.ref033">]16</xref>]. Thus, we hypothesized that at 18 months, infants will exhibit greater sensitivity to words, and visual object recognition will be more effective when cued by words than associated sounds.</p>
</sec>
<sec id="sec023">
<title>Experiment 2C: 18-month-olds</title>
<p>In contrast to Exp. 2A and Exp. 2B, the pattern of fixation proportion in <xref ref-type="fig" rid="pone.0233968.g009">Fig 9</xref> shows a clear preference for the target object (compared to the distractor) in both conditions. This preference starts around 600 ms after stimuli onset and it is maintained beyond 2500 ms after word onset in the sound condition, but only until 1500 ms after word onset in the word condition. Nevertheless, the confidence intervals suggest that while a larger difference between target and distractors is evident in the sound condition relative to the word condition, there is no clear differences between the two experimental conditions.</p>
<fig id="pone.0233968.g009" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.g009</object-id>
<label>Fig 9</label>
<caption>
<title>Mean fixation proportion (aggregated by participants) as a function of object in the visual context and type of auditory stimulus.</title>
<p>Shaded areas around lines represented 95% confidence intervals adjusted for within-subject designs and multiple time windows.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0233968.g009.tif" xlink:type="simple"/>
</fig>
<p>Model comparison resulted in the selection of a model that included all four orthogonal polynomial terms (all <italic>χ²</italic>-values &gt; 63.84, <italic>df</italic> = 11, all <italic>p</italic>-values &gt; .001). Importantly, and in contrast to Exp. 2A and Exp. 2B, the results of the GCA model showed a reliable main effect of object (<italic>β</italic> = -1.19, <italic>se</italic> = 0.30, <italic>t</italic> = -3.96), but no reliable main effect of condition (<italic>t</italic> &lt; |2|). This is coherent with what can be inferred based on the confidence intervals approach (see <xref ref-type="fig" rid="pone.0233968.g010">Fig 10</xref>). Additionally, the GCA model shows three significant interaction effects between object and the polynomials terms quadratic, cubic, and quartic (see <xref ref-type="table" rid="pone.0233968.t004">Table 4</xref>). <xref ref-type="fig" rid="pone.0233968.g010">Fig 10</xref> shows that the shape of the fixations on the targets over time takes a quartic form with an initial strong quadratic shape. In contrast, the fixation to distractors assume a much more pronounced cubic shape relative to the target objects.</p>
<fig id="pone.0233968.g010" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.g010</object-id>
<label>Fig 10</label>
<caption>
<title>GCA model fit (lines) of empirical logit (points) as a function of object in the visual context and type of auditory stimulus in Experiment 2C.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0233968.g010.tif" xlink:type="simple"/>
</fig>
<table-wrap id="pone.0233968.t004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233968.t004</object-id>
<label>Table 4</label> <caption><title>Main and interaction effect in the quasi-logistic GCA mixed model analysis.</title></caption>
<alternatives>
<graphic id="pone.0233968.t004g" mimetype="image" position="float" xlink:href="pone.0233968.t004.tif" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="center"><italic>Estimate</italic></th>
<th align="center"><italic>se</italic></th>
<th align="center" colspan="2"><italic>T</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">(Intercept)</td>
<td align="right">-2.94</td>
<td align="right">0.56</td>
<td align="right">-5.26</td>
<td align="right"><xref ref-type="table-fn" rid="t004fn001">*</xref></td>
</tr>
<tr>
<td align="left">Linear</td>
<td align="right">1.52</td>
<td align="right">0.98</td>
<td align="right">1.55</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quadratic</td>
<td align="right">-1.41</td>
<td align="right">0.73</td>
<td align="right">-1.93</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Cubic</td>
<td align="right">1.22</td>
<td align="right">0.55</td>
<td align="right">2.23</td>
<td align="right"><xref ref-type="table-fn" rid="t004fn001">*</xref></td>
</tr>
<tr>
<td align="left">Quartic</td>
<td align="right">-1.15</td>
<td align="right">0.44</td>
<td align="right">-2.60</td>
<td align="right"><xref ref-type="table-fn" rid="t004fn001">*</xref></td>
</tr>
<tr>
<td align="left">Object</td>
<td align="right">-1.19</td>
<td align="right">0.30</td>
<td align="right">-3.96</td>
<td align="right"><xref ref-type="table-fn" rid="t004fn001">*</xref></td>
</tr>
<tr>
<td align="left">Condition</td>
<td align="right">0.25</td>
<td align="right">0.21</td>
<td align="right">1.20</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Linear * Object</td>
<td align="right">0.61</td>
<td align="right">0.71</td>
<td align="right">0.86</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Linear * Condition</td>
<td align="right">-0.37</td>
<td align="right">0.72</td>
<td align="right">-0.52</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quadratic * Object</td>
<td align="right">2.31</td>
<td align="right">0.61</td>
<td align="right">3.80</td>
<td align="right"><xref ref-type="table-fn" rid="t004fn001">*</xref></td>
</tr>
<tr>
<td align="left">Quadratic * Condition</td>
<td align="right">-0.27</td>
<td align="right">0.39</td>
<td align="right">-0.69</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Cubic * Object</td>
<td align="right">-2.12</td>
<td align="right">0.50</td>
<td align="right">-4.21</td>
<td align="right"><xref ref-type="table-fn" rid="t004fn001">*</xref></td>
</tr>
<tr>
<td align="left">Cubic * Condition</td>
<td align="right">0.08</td>
<td align="right">0.32</td>
<td align="right">0.25</td>
<td align="right"/>
</tr>
<tr>
<td align="left">Quartic * Object</td>
<td align="right">1.04</td>
<td align="right">0.39</td>
<td align="right">2.68</td>
<td align="right"><xref ref-type="table-fn" rid="t004fn001">*</xref></td>
</tr>
<tr>
<td align="left">Quartic * Condition</td>
<td align="right">-0.34</td>
<td align="right">0.25</td>
<td align="right">-1.36</td>
<td align="right"/>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t004fn001"><p>* = <italic>p</italic>&lt;0.05</p></fn>
</table-wrap-foot>
</table-wrap>
<p>These results show that 18-month-olds were equally fast at recognizing the target object in both the word and sound conditions, and independently of the nature of items (animals or objects; see <xref ref-type="supplementary-material" rid="pone.0233968.s001">S1 Data</xref>). The difference between both conditions was not significant, but as reflected in the GCA model, infants displayed longer looking time in the associated sound compared to the word condition.</p>
</sec>
</sec>
<sec id="sec024">
<title>General discussion</title>
<p>In this study, we aimed to determine whether during language development, words are more effective than associated sounds at activating conceptual representations. We conducted one behavioural visual identification and one eye tracker object recognition experiment with adults to replicate Lupyan and colleagues’ [<xref ref-type="bibr" rid="pone.0233968.ref008">8</xref>]findings. We then adapted the object recognition task so that it was suitable for infants. Our successful replication revealed that adults identified and recognized faster the target object when preceded by its label compared to its associated sound, supporting further the theory that although both words and associated sounds activate conceptual representations, words have the advantage of being more efficient in activating the visual representation of an object. In Lupyan and Thompson-Schill’s study [<xref ref-type="bibr" rid="pone.0233968.ref008">8</xref>], the word advantage was also evident in the accuracy measure, whereas in our study, participants were equally accurate across words and associated sounds conditions. The eye movement measures, however, strengthened the RTs findings and yielded a similar word advantage.</p>
<p>It is possible that upon hearing the word “<italic>dog</italic>” for instance, all dog features including their generic visual appearance are activated, accelerating the reaction times, while hearing dog barking might require increased verification time to create the direct link between the source of the sound and the image. This is unlikely, however, as even with a longer average sound duration (relative to word duration) providing participants with additional processing time, RTs and looking times were faster in the word compared to the associated sound condition. Thus, unlike associated sounds, words enhance the abstraction of conceptual categories leading to faster activation of the category representations: words are “special” because they enable activation of conceptual representations in a more categorical way [<xref ref-type="bibr" rid="pone.0233968.ref003">3</xref>, <xref ref-type="bibr" rid="pone.0233968.ref008">8</xref>].</p>
<p>However, this phenomenon did not emerge in our experiments with infants at 9-, 12-, or 18-months. Nine- and 12-month-olds did not display any differences between words and associated sounds conditions. Moreover, the distinction between the target and distractor object in either condition was not reliable. Consequently, it can be assumed that at these ages, infants process both auditory stimuli differently, but our empirical paradigm was not sensitive enough to detect these nuances. Unexpectedly, although 18-month-old infants were equally fast at recognizing the target object in both conditions, we observed a shift towards larger fixations and longer looking time at the target when preceded by the associated sound compared to the word. Our findings suggest that infants allocated greater attention to the target image in the associated sound compared to the word condition. This result must be taken with caution and needs to be considered carefully. Though it matches Hendrickson et al. [<xref ref-type="bibr" rid="pone.0233968.ref016">16</xref>]suggestion that associated sounds require longer time to process the semantic match between the visual object and the generated sound, it contradicts the results of Cummings and colleagues [<xref ref-type="bibr" rid="pone.0233968.ref015">15</xref>], whereby object recognition was similar in the words and associated sounds condition. Toom and Kukona’s [<xref ref-type="bibr" rid="pone.0233968.ref011">11</xref>]VWP study with adults, found greater looking times and semantic activation of the competitors in the associated sounds relative to the words conditions. Just like in our study, however, this source of differences requires further replication and investigation.</p>
<p>Alternatively, developmental studies commonly claim that words have a special status and are preferred over other non-linguistic sounds because of their <italic>referential</italic> nature [<xref ref-type="bibr" rid="pone.0233968.ref003">3</xref>]. In this account, words enhance categorization and learning because unlike other sounds, words <italic>refer</italic> to object kinds. Therefore, it could be that during infancy, words activate conceptual representations more efficiently than associated sounds, but another methodology would be more sensitive to these differences. We also want to note that, although the ISI was kept constant (1000 ms), for both adults and infants, the average duration of words was shorter compared to sounds, and participants had more time to process sounds over words. As Exp. 1B indicates, this had no effect on adult results but might have had an effect on infant results.</p>
<p>Another possibility is that words become more effective at activating conceptual representations, but their advantage over associated sounds emerges at later stages of language development. The only two studies to date that investigated the processing of words and associated sounds tackled the question differentially. Cummings et al. [<xref ref-type="bibr" rid="pone.0233968.ref015">15</xref>]studied the speed of word-object recognition and its correlation with chronological age and infants’ productive skills, while Hendrickson et al. [<xref ref-type="bibr" rid="pone.0233968.ref016">16</xref>]investigated the organization of words and associated sounds in the semantic memory. Here, we were interested in studying whether visual object recognition is modulated by the preceding auditory cue to determine whether words have a ‘special’ status compared to associated sounds.</p>
<p>Our study demonstrates that by 18 months, infants process words and associated sounds differently, possibly allocating more attention to target objects when cues by associated sounds relative to words. The question of whether and when infants reach the pattern of results we observed in adults remains open: a different experimental methodology or different ages might yield the initially expected results.</p>
</sec>
<sec id="sec025">
<title>Supporting information</title>
<supplementary-material id="pone.0233968.s001" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="pone.0233968.s001.docx" xlink:type="simple">
<label>S1 Data</label>
<caption>
<title/>
<p>(DOCX)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>We thank all families for their participation and contribution to this research.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pone.0233968.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Gelman</surname> <given-names>S. A.</given-names></name>, <name><surname>Roberts</surname> <given-names>S. O.</given-names></name> <article-title>How language shapes the cultural inheritance of categories</article-title>. <source>Proceedings of the National Academy of Sciences of the United States of America</source>. <year>2017</year>; <volume>114</volume>(<issue>30</issue>): <fpage>7900</fpage>–<lpage>7907</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1621073114" xlink:type="simple">10.1073/pnas.1621073114</ext-link></comment> <object-id pub-id-type="pmid">28739931</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Ferguson</surname> <given-names>B</given-names></name>, <name><surname>Waxman</surname> <given-names>S.</given-names></name> <article-title>Linking language and categorization in infancy</article-title>. <source>Journal of Child Language</source>. <year>2017</year>; <volume>44</volume>(<issue>3</issue>): <fpage>527</fpage>–<lpage>552</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1017/S0305000916000568" xlink:type="simple">10.1017/S0305000916000568</ext-link></comment> <object-id pub-id-type="pmid">27830633</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Waxman</surname> <given-names>SR</given-names></name>, <name><surname>Gelman</surname> <given-names>SA</given-names></name>. <article-title>Early word-learning entails reference, not merely associations</article-title>. <source>Trends in Cognitive Sciences</source>. <year>2009</year>; <volume>13</volume>(<issue>6</issue>): <fpage>258</fpage>–<lpage>263</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.tics.2009.03.006" xlink:type="simple">10.1016/j.tics.2009.03.006</ext-link></comment> <object-id pub-id-type="pmid">19447670</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Hills</surname> <given-names>TT</given-names></name>, <name><surname>Maouene</surname> <given-names>M</given-names></name>, <name><surname>Maouene</surname> <given-names>J</given-names></name>, <name><surname>Sheya</surname> <given-names>A</given-names></name>, <name><surname>Smith</surname> <given-names>L.</given-names></name> <article-title>Categorical structure among shared features in networks of early-learned nouns</article-title>. <source>Cognition</source>. <year>2009</year>; <volume>112</volume>(<issue>3</issue>): <fpage>381</fpage>–<lpage>396</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cognition.2009.06.002" xlink:type="simple">10.1016/j.cognition.2009.06.002</ext-link></comment> <object-id pub-id-type="pmid">19576579</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Boutonnet</surname> <given-names>B</given-names></name>, <name><surname>Lupyan</surname> <given-names>G.</given-names></name> <article-title>Words jump-start vision: a label advantage in object recognition</article-title>. <source>Journal of Neuroscience</source>. <year>2015</year>, <volume>35</volume>(<issue>25</issue>): <fpage>9329</fpage>–<lpage>9335</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1523/JNEUROSCI.5111-14.2015" xlink:type="simple">10.1523/JNEUROSCI.5111-14.2015</ext-link></comment> <object-id pub-id-type="pmid">26109657</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Edmiston</surname> <given-names>P</given-names></name>, <name><surname>Lupyan</surname> <given-names>G.</given-names></name> <article-title>What makes words special? Words as unmotivated cues</article-title>. <source>Cognition</source>. <year>2015</year>; <volume>143</volume>: <fpage>93</fpage>–<lpage>100</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cognition.2015.06.008" xlink:type="simple">10.1016/j.cognition.2015.06.008</ext-link></comment> <object-id pub-id-type="pmid">26117488</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Ballas</surname> <given-names>J.A.</given-names></name>, <name><surname>Mullins</surname> <given-names>T.</given-names></name> <article-title>Effects of context on the identification of everyday sounds</article-title>. <source>Human Performance</source>. <year>1991</year>; <volume>4</volume>(<issue>3</issue>): <fpage>199</fpage>–<lpage>219</lpage>.</mixed-citation></ref>
<ref id="pone.0233968.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Lupyan</surname> <given-names>G</given-names></name>, <name><surname>Thompson-Schill</surname> <given-names>SL</given-names></name>. <article-title>The evocative power of words: Activation of concepts by verbal and nonverbal means</article-title>. <source>Journal of Experimental Psychology-General</source>. <year>2012</year>; <volume>141</volume>(<issue>1</issue>): <fpage>170</fpage>–<lpage>186</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1037/a0024904" xlink:type="simple">10.1037/a0024904</ext-link></comment> <object-id pub-id-type="pmid">21928923</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Chen</surname> <given-names>Y.C.</given-names></name>, <name><surname>Spence</surname> <given-names>C.</given-names></name> <article-title>Crossmodal semantic priming by naturalistic sounds and spoken words enhances visual sensitivity</article-title>. <source>Journal of Experimental Psychology: Human Perception and Performance</source>. <year>2011</year>; <volume>37</volume>: <fpage>1554</fpage>–<lpage>1568</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1037/a0024329" xlink:type="simple">10.1037/a0024329</ext-link></comment> <object-id pub-id-type="pmid">21688942</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Chen</surname> <given-names>Y.C.</given-names></name>, <name><surname>Spence</surname> <given-names>C.</given-names></name> <article-title>Dissociating the time courses of the cross-modal semantic priming effects elicited by naturalistic sounds and spoken words</article-title>. <source>Psychonomic Bulletin &amp; Review</source>. <year>2018</year>b; <volume>25</volume>(<issue>3</issue>): <fpage>1138</fpage>–<lpage>1146</lpage>.</mixed-citation></ref>
<ref id="pone.0233968.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Toon</surname> <given-names>J.</given-names></name>, <name><surname>Kukona</surname> <given-names>A.</given-names></name> <article-title>Activating semantic knowledge during spoken words and environmental sounds: Evidence from the visual world paradigm</article-title>. <source>Cognitive Science</source>. <year>2019</year>; <volume>43</volume>: <fpage>e12810</fpage>.</mixed-citation></ref>
<ref id="pone.0233968.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Kutas</surname> <given-names>M</given-names></name>, <name><surname>Federmeier</surname> <given-names>KD</given-names></name>. <article-title>Thirty years and counting: finding meaning in the N400 component of the event-related brain potential (ERP)</article-title>. <source>Annual Review of Psychology</source>. <year>2011</year>; <volume>62</volume>: <fpage>621</fpage>–<lpage>647</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1146/annurev.psych.093008.131123" xlink:type="simple">10.1146/annurev.psych.093008.131123</ext-link></comment> <object-id pub-id-type="pmid">20809790</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Ballas</surname> <given-names>JA</given-names></name>, <name><surname>Howard</surname> <given-names>JH</given-names> <suffix>Jr</suffix></name>. <article-title>Interpreting the language of environmental sounds</article-title>. <source>Environment and Behavior</source>. <year>1987</year>; <volume>19</volume>(<issue>1</issue>): <fpage>91</fpage>–<lpage>114</lpage>.</mixed-citation></ref>
<ref id="pone.0233968.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Lupyan</surname> <given-names>G</given-names></name>, <name><surname>Lewis</surname> <given-names>M.</given-names></name> <article-title>From words-as-mappings to words-as-cues: the role of language in semantic knowledge. Language</article-title>, <source>Cognition and Neuroscience</source>. <year>2017</year>; <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1080/23273798.2017.1404114" xlink:type="simple">10.1080/23273798.2017.1404114</ext-link></comment></mixed-citation></ref>
<ref id="pone.0233968.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Cummings</surname> <given-names>A</given-names></name>, <name><surname>Saygin</surname> <given-names>A P</given-names></name>, <name><surname>Bates</surname> <given-names>E</given-names></name>, <name><surname>Dick</surname> <given-names>F</given-names></name>. <article-title>Infants’ recognition of meaningful verbal and nonverbal sounds</article-title>. <source>Language Learning and Development</source>. <year>2009</year>; <volume>5</volume>(<issue>3</issue>): <fpage>172</fpage>–<lpage>190</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1080/15475440902754086" xlink:type="simple">10.1080/15475440902754086</ext-link></comment> <object-id pub-id-type="pmid">20228882</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Hendrickson</surname> <given-names>K</given-names></name>, <name><surname>Love</surname> <given-names>T</given-names></name>, <name><surname>Walenski</surname> <given-names>M</given-names></name>, <name><surname>Friend</surname> <given-names>M.</given-names></name> <article-title>The organization of words and environmental sounds in the second year: behavioural and electrophysiological evidence</article-title>. <source>Developmental Science</source>. <year>2019</year>; <volume>22</volume>:<fpage>e12746</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/desc.12746" xlink:type="simple">10.1111/desc.12746</ext-link></comment> <object-id pub-id-type="pmid">30159958</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref017"><label>17</label><mixed-citation publication-type="book" xlink:type="simple"><collab>R Core Team</collab>. <source>R: A language and environment for statistical computing</source>. <publisher-name>R Foundation for Statistical Computing</publisher-name>, <publisher-loc>Vienna, Austria</publisher-loc>. <year>2018</year>. URL <ext-link ext-link-type="uri" xlink:href="https://www.R-project.org/" xlink:type="simple">https://www.R-project.org/</ext-link>.</mixed-citation></ref>
<ref id="pone.0233968.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Cumming</surname> <given-names>G.</given-names></name> <article-title>The new statistics: Why and how</article-title>. <source>Psychological science</source>. <year>2014</year>; <volume>25</volume>(<issue>1</issue>): <fpage>7</fpage>–<lpage>29</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1177/0956797613504966" xlink:type="simple">10.1177/0956797613504966</ext-link></comment> <object-id pub-id-type="pmid">24220629</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Huettig</surname> <given-names>F</given-names></name>, <name><surname>Janse</surname> <given-names>E.</given-names></name> <article-title>Individual differences in working memory and processing speed predict anticipatory spoken language processing in the visual world</article-title>. <source>Language, Cognition and Neuroscience</source>. <year>2016</year>; <volume>31</volume>(<issue>1</issue>): <fpage>80</fpage>–<lpage>93</lpage>.</mixed-citation></ref>
<ref id="pone.0233968.ref020"><label>20</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Huettig</surname> <given-names>F</given-names></name>, <name><surname>Guerra</surname> <given-names>E.</given-names></name> <article-title>Effects of speech rate, preview time of visual context, and participant instructions reveal strong limits on prediction in language processing</article-title>. <source>Brain Research</source>. <year>2019</year>; <volume>1706</volume>: <fpage>196</fpage>–<lpage>208</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.brainres.2018.11.013" xlink:type="simple">10.1016/j.brainres.2018.11.013</ext-link></comment> <object-id pub-id-type="pmid">30439351</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Mirman</surname> <given-names>D</given-names></name>, <name><surname>Dixon</surname> <given-names>JA</given-names></name>, <name><surname>Magnuson</surname> <given-names>JS</given-names></name>. <article-title>Statistical and computational models of the visual world paradigm: Growth curves and individual differences</article-title>. <source>Journal of memory and language</source>. <year>2008</year>; <volume>59</volume>(<issue>4</issue>): <fpage>475</fpage>–<lpage>494</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jml.2007.11.006" xlink:type="simple">10.1016/j.jml.2007.11.006</ext-link></comment> <object-id pub-id-type="pmid">19060958</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref022"><label>22</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Mirman</surname> <given-names>D.</given-names></name> <source>Growth curve analysis and visualization using R</source>. <publisher-name>CRC Press</publisher-name>. <year>2014</year>.</mixed-citation></ref>
<ref id="pone.0233968.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Bates</surname> <given-names>D</given-names></name>, <name><surname>Maechler</surname> <given-names>M</given-names></name>, <name><surname>Bolker</surname> <given-names>B</given-names></name>, <name><surname>Walker</surname> <given-names>S.</given-names></name> <article-title>Fitting Linear Mixed-Effects Models Using lme4</article-title>. <source>Journal of Statistical Software</source>. <year>2015</year>; <volume>67</volume>(<issue>1</issue>): <fpage>1</fpage>–<lpage>48</lpage>.</mixed-citation></ref>
<ref id="pone.0233968.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Barr</surname> <given-names>DJ</given-names></name>. <article-title>Analyzing ‘visual world’ eyetracking data using multilevel logistic regression</article-title>. <source>Journal of memory and language</source>, <year>2008</year>; <volume>59</volume>(<issue>4</issue>): <fpage>457</fpage>–<lpage>474</lpage>.</mixed-citation></ref>
<ref id="pone.0233968.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Barr</surname> <given-names>DJ</given-names></name>, <name><surname>Levy</surname> <given-names>R</given-names></name>, <name><surname>Scheepers</surname> <given-names>C</given-names></name>, <name><surname>Tily</surname> <given-names>HJ</given-names></name>. <article-title>Random effects structure for confirmatory hypothesis testing: Keep it maximal</article-title>. <source>Journal of memory and language</source>. <year>2013</year>; <volume>68</volume>(<issue>3</issue>): <fpage>255</fpage>–<lpage>278</lpage>.</mixed-citation></ref>
<ref id="pone.0233968.ref026"><label>26</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Baayen</surname> <given-names>R.H.</given-names></name> <source>Analyzing linguistic data: A practical introduction to statistics using R</source>. <publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>. <year>2008</year>.</mixed-citation></ref>
<ref id="pone.0233968.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Bergelson</surname> <given-names>E</given-names></name>, <name><surname>Swingley</surname> <given-names>D.</given-names></name> <article-title>At 6–9 months, human infants know the meanings of many common nouns</article-title>. <source>Proceedings of the National Academy of Sciences of the United States of America</source>. <year>2012</year>; <volume>109</volume>(<issue>9</issue>): <fpage>3253</fpage>–<lpage>3258</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1113380109" xlink:type="simple">10.1073/pnas.1113380109</ext-link></comment> <object-id pub-id-type="pmid">22331874</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Parise</surname> <given-names>E</given-names></name>, <name><surname>Csibra</surname> <given-names>G.</given-names></name> <article-title>Electrophysiological evidence for the understanding of maternal speech by 9-month-old infants</article-title>. <source>Psychological Science</source>. <year>2012</year>; <volume>23</volume>(<issue>7</issue>): <fpage>728</fpage>–<lpage>733</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1177/0956797612438734" xlink:type="simple">10.1177/0956797612438734</ext-link></comment> <object-id pub-id-type="pmid">22692337</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Dahan</surname> <given-names>D</given-names></name>, <name><surname>Magnuson</surname> <given-names>JS</given-names></name>, <name><surname>Tanenhaus</surname> <given-names>MK</given-names></name>, <name><surname>Hogan</surname> <given-names>EM</given-names></name>. <article-title>Subcategorical mismatches and the time course of lexical access: evidence for lexical competition</article-title>. <source>Language and Cognitive Processes</source>. <year>2001</year>; <volume>16</volume>(<issue>5/6</issue>): <fpage>507</fpage>–<lpage>534</lpage>.</mixed-citation></ref>
<ref id="pone.0233968.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Matin</surname> <given-names>E</given-names></name>, <name><surname>Shao</surname> <given-names>KC</given-names></name>, <name><surname>Boff</surname> <given-names>KR</given-names></name>. <article-title>Saccadic overhead: information-processing time with and without saccades</article-title>. <source>Perception &amp; Psychophysics</source>. <year>1993</year>; <volume>53</volume>(<issue>4</issue>): <fpage>372</fpage>–<lpage>380</lpage>.</mixed-citation></ref>
<ref id="pone.0233968.ref031"><label>31</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Saslow</surname> <given-names>M.</given-names></name> <article-title>Effects of components of displacement-step stimuli upon latency for saccadic eye movement</article-title>. <source>Journal of the Optical Society of America</source>. <year>1967</year>; <volume>57</volume>: <fpage>1024</fpage>–<lpage>1029</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1364/josa.57.001024" xlink:type="simple">10.1364/josa.57.001024</ext-link></comment> <object-id pub-id-type="pmid">6035296</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref032"><label>32</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Fernald</surname> <given-names>A</given-names></name>, <name><surname>Swingley</surname> <given-names>D</given-names></name>, <name><surname>Pinto</surname> <given-names>JP</given-names></name>. <article-title>When half a word is enough: infants can recognize spoken words using partial phonetic information</article-title>. <source>Child Development</source>. <year>2001</year>; <volume>72</volume>(<issue>4</issue>): <fpage>1003</fpage>–<lpage>1015</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/1467-8624.00331" xlink:type="simple">10.1111/1467-8624.00331</ext-link></comment> <object-id pub-id-type="pmid">11480931</object-id></mixed-citation></ref>
<ref id="pone.0233968.ref033"><label>33</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Fernald</surname> <given-names>A</given-names></name>, <name><surname>Perfors</surname> <given-names>A</given-names></name>, <name><surname>Marchman</surname> <given-names>VA</given-names></name>. <article-title>Picking up speed in understanding: Speech processing efficiency and vocabulary growth across the 2nd year</article-title>. <source>Developmental Psychology</source>. <year>2006</year>; <volume>42</volume>(<issue>1</issue>): <fpage>98</fpage>–<lpage>116</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1037/0012-1649.42.1.98" xlink:type="simple">10.1037/0012-1649.42.1.98</ext-link></comment> <object-id pub-id-type="pmid">16420121</object-id></mixed-citation></ref>
</ref-list>
</back>
</article>