<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of hal-02520169</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-02T21:20:08+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">A taxonomy-free approach based on machine learning to assess the quality of rivers with diatoms</title>
            <author role="aut">
              <persName>
                <forename type="first">Maria João</forename>
                <surname>Feio</surname>
              </persName>
              <idno type="halauthorid">1592721-0</idno>
              <affiliation ref="#struct-499412"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Sonia R.Q.</forename>
                <surname>Serra</surname>
              </persName>
              <email type="md5">4800988db0ec20659bce2528dbfc0b54</email>
              <email type="domain">gmail.com</email>
              <idno type="idhal" notation="numeric">977516</idno>
              <idno type="halauthorid" notation="string">1187566-977516</idno>
              <affiliation ref="#struct-499412"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Andreia</forename>
                <surname>Mortágua</surname>
              </persName>
              <idno type="halauthorid">1812732-0</idno>
              <affiliation ref="#struct-60947"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Agnes</forename>
                <surname>Bouchez</surname>
              </persName>
              <email type="md5">3db48840ed445dc00b4ffeea1349205e</email>
              <email type="domain">inrae.fr</email>
              <idno type="idhal" notation="string">agnes-bouchez</idno>
              <idno type="idhal" notation="numeric">737113</idno>
              <idno type="halauthorid" notation="string">49840-737113</idno>
              <idno type="ORCID">https://orcid.org/0000-0001-8802-6966</idno>
              <idno type="IDREF">https://www.idref.fr/034094660</idno>
              <orgName ref="#struct-577435"/>
              <affiliation ref="#struct-1002124"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Frédéric</forename>
                <surname>Rimet</surname>
              </persName>
              <email type="md5">f9c77d0f6eb5f0df870afa97bb477b0e</email>
              <email type="domain">inrae.fr</email>
              <idno type="idhal" notation="string">frederic-rimet</idno>
              <idno type="idhal" notation="numeric">1136618</idno>
              <idno type="halauthorid" notation="string">669915-1136618</idno>
              <idno type="ORCID">https://orcid.org/0000-0002-5514-869X</idno>
              <idno type="IDREF">https://www.idref.fr/164303758</idno>
              <affiliation ref="#struct-1002124"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Valentin</forename>
                <surname>Vasselon</surname>
              </persName>
              <email type="md5">a494ec6acd73f6173c511a16fadbee56</email>
              <email type="domain">inrae.fr</email>
              <idno type="idhal" notation="string">valentin-vasselon</idno>
              <idno type="idhal" notation="numeric">20047</idno>
              <idno type="halauthorid" notation="string">32952-20047</idno>
              <idno type="ORCID">https://orcid.org/0000-0001-5038-7918</idno>
              <idno type="IDREF">https://www.idref.fr/253127432</idno>
              <affiliation ref="#struct-1002124"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Salomé F.P.</forename>
                <surname>Almeida</surname>
              </persName>
              <idno type="halauthorid">1812733-0</idno>
              <affiliation ref="#struct-60947"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Sabine</forename>
                <surname>ROSSI</surname>
              </persName>
              <email type="md5">a5e8dbe8a7959c08b4b3ddfa9cc6bfa4</email>
              <email type="domain">inrae.fr</email>
            </editor>
            <funder>Portuguese Foundation for Science and TechnologyUID/GEO/04035/2019UID/MAR/04292/2019COST (European Cooperation in Science and Technology) program CA15219PROAQUA</funder>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2020-03-26 14:53:54</date>
              <date type="whenModified">2025-07-29 16:42:04</date>
              <date type="whenReleased">2020-03-26 14:53:54</date>
              <date type="whenProduced">2020</date>
              <fs>
                <f name="inra_etatDocument_local" notation="numeric">
                  <numeric>0</numeric>
                </f>
                <f name="inra_etatDocument_local" notation="string" n="0">
                  <string>inra_etatDocument_local_0</string>
                </f>
                <f name="inra_publicVise_local" notation="string" n="SC">
                  <string>inra_publicVise_local_SC</string>
                </f>
              </fs>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="642558">
                <persName>
                  <forename>Sabine</forename>
                  <surname>ROSSI</surname>
                </persName>
                <email type="md5">a5e8dbe8a7959c08b4b3ddfa9cc6bfa4</email>
                <email type="domain">inrae.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">hal-02520169</idno>
            <idno type="halUri">https://hal.inrae.fr/hal-02520169</idno>
            <idno type="halBibtex">feio:hal-02520169</idno>
            <idno type="halRefHtml">&lt;i&gt;Science of the Total Environment&lt;/i&gt;, 2020, 722, &lt;a target="_blank" href="https://dx.doi.org/10.1016/j.scitotenv.2020.137900"&gt;&amp;#x27E8;10.1016/j.scitotenv.2020.137900&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">Science of the Total Environment, 2020, 722, &amp;#x27E8;10.1016/j.scitotenv.2020.137900&amp;#x27E9;</idno>
            <availability status="restricted"/>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="SDE">Sciences De l'Environnement</idno>
            <idno type="stamp" n="UNIV-SAVOIE">Université Savoie Mont Blanc</idno>
            <idno type="stamp" n="OSUG">Observatoire des Sciences de la Terre, de l'Univers et de l'Environnement de Grenoble</idno>
            <idno type="stamp" n="GIP-BE">GIP Bretagne Environnement</idno>
            <idno type="stamp" n="INRAE">Institut National de Recherche en Agriculture, Alimentation et Environnement</idno>
            <idno type="stamp" n="INRAE-AQUA">INRAE-AQUA</idno>
            <idno type="stamp" n="CARRTEL">Centre Alpin de Recherche sur les Réseaux Trophiques et Ecosystèmes Limniques </idno>
            <idno type="stamp" n="RESEAU-EAU">Réseau "Systèmes Agricoles et Eau"</idno>
            <idno type="stamp" n="INEE-CNRS">Institut écologie et environnement du CNRS</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">A taxonomy-free approach based on machine learning to assess the quality of rivers with diatoms</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Maria João</forename>
                    <surname>Feio</surname>
                  </persName>
                  <idno type="halauthorid">1592721-0</idno>
                  <affiliation ref="#struct-499412"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Sonia R.Q.</forename>
                    <surname>Serra</surname>
                  </persName>
                  <email type="md5">4800988db0ec20659bce2528dbfc0b54</email>
                  <email type="domain">gmail.com</email>
                  <idno type="idhal" notation="numeric">977516</idno>
                  <idno type="halauthorid" notation="string">1187566-977516</idno>
                  <affiliation ref="#struct-499412"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Andreia</forename>
                    <surname>Mortágua</surname>
                  </persName>
                  <idno type="halauthorid">1812732-0</idno>
                  <affiliation ref="#struct-60947"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Agnes</forename>
                    <surname>Bouchez</surname>
                  </persName>
                  <email type="md5">3db48840ed445dc00b4ffeea1349205e</email>
                  <email type="domain">inrae.fr</email>
                  <idno type="idhal" notation="string">agnes-bouchez</idno>
                  <idno type="idhal" notation="numeric">737113</idno>
                  <idno type="halauthorid" notation="string">49840-737113</idno>
                  <idno type="ORCID">https://orcid.org/0000-0001-8802-6966</idno>
                  <idno type="IDREF">https://www.idref.fr/034094660</idno>
                  <orgName ref="#struct-577435"/>
                  <affiliation ref="#struct-1002124"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Frédéric</forename>
                    <surname>Rimet</surname>
                  </persName>
                  <email type="md5">f9c77d0f6eb5f0df870afa97bb477b0e</email>
                  <email type="domain">inrae.fr</email>
                  <idno type="idhal" notation="string">frederic-rimet</idno>
                  <idno type="idhal" notation="numeric">1136618</idno>
                  <idno type="halauthorid" notation="string">669915-1136618</idno>
                  <idno type="ORCID">https://orcid.org/0000-0002-5514-869X</idno>
                  <idno type="IDREF">https://www.idref.fr/164303758</idno>
                  <affiliation ref="#struct-1002124"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Valentin</forename>
                    <surname>Vasselon</surname>
                  </persName>
                  <email type="md5">a494ec6acd73f6173c511a16fadbee56</email>
                  <email type="domain">inrae.fr</email>
                  <idno type="idhal" notation="string">valentin-vasselon</idno>
                  <idno type="idhal" notation="numeric">20047</idno>
                  <idno type="halauthorid" notation="string">32952-20047</idno>
                  <idno type="ORCID">https://orcid.org/0000-0001-5038-7918</idno>
                  <idno type="IDREF">https://www.idref.fr/253127432</idno>
                  <affiliation ref="#struct-1002124"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Salomé F.P.</forename>
                    <surname>Almeida</surname>
                  </persName>
                  <idno type="halauthorid">1812733-0</idno>
                  <affiliation ref="#struct-60947"/>
                </author>
              </analytic>
              <monogr>
                <idno type="halJournalId" status="VALID">18830</idno>
                <idno type="issn">0048-9697</idno>
                <idno type="eissn">1879-1026</idno>
                <title level="j">Science of the Total Environment</title>
                <imprint>
                  <publisher>Elsevier</publisher>
                  <biblScope unit="volume">722</biblScope>
                  <date type="datePub">2020</date>
                </imprint>
              </monogr>
              <idno type="doi">10.1016/j.scitotenv.2020.137900</idno>
              <idno type="pubmed">32199386</idno>
              <idno type="wos">000535720900009</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <keywords scheme="author">
                <term xml:lang="en">Machine learning</term>
                <term xml:lang="en">OTUs</term>
                <term xml:lang="en">Metabarcoding</term>
                <term xml:lang="en">Bioassessment</term>
                <term xml:lang="en">Rivers</term>
                <term xml:lang="en">HYDRA</term>
              </keywords>
              <classCode scheme="halDomain" n="sde.be">Environmental Sciences/Biodiversity and Ecology</classCode>
              <classCode scheme="halTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halOldTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halTreeTypology" n="ART">Journal articles</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>Diatoms are a compulsory biological quality element in the ecological assessment of rivers according to the Water Framework Directive. The application of current official indices requires the identification of individuals to species or lower rank under a microscope based on the valve morphology. This is a highly time-consuming task, often susceptible of disagreements among analysts. In alternative, the use of DNA metabarcoding combined with High-Throughput Sequencing (HTS) has been proposed. The sequences obtained from environmental DNA are clustered into Operational Taxonomic Units (OTUs), which can be assigned to a taxon using reference databases, and from there calculate biotic indices. However, there is still a high percentage of unassigned OTUs to species due to the incompleteness of reference libraries. Alternatively, we tested a new taxonomy-free approach based on diatom community samples to assess rivers. A combination of three machine learning techniques is used to build models that predict diatom OTUs expected in test sites, under reference conditions, from environmental data. The Observed/Expected OTUs ratio indicates the deviation from reference condition and is converted into a quality class. This approach was never used with diatoms neither with OTUs data. To evaluate its efficiency, we built a model based on OTUs lists (HYDGEN) and another based on taxa lists from morphological identification (HYDMORPH), and also calculated a biotic index (IPS). The models were trained and tested with data from 81 sites (44 reference sites) from central Portugal. Both models were considered accurate (linear regression for Observed and Expected richness: R2 ≈ 0.7, interception ≈ 0.8) and sensitive to global anthropogenic disturbance (Rs2 &gt; 0.30 p &lt; 0.006 for global disturbance). Yet, the HYDGEN model based on molecular data was sensitive to more types of pressures (such as, changes in land use and habitat quality), which gives promising insights to its use for bioassessment of rivers.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="laboratory" xml:id="struct-499412" status="INCOMING">
          <orgName>Marine and Environnmental Sciences Centre</orgName>
          <orgName type="acronym">MARE</orgName>
          <desc>
            <address>
              <addrLine>Largo Marquês de Pombal</addrLine>
              <country key="PT"/>
            </address>
          </desc>
          <listRelation>
            <relation active="#struct-300707" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-60947" status="VALID">
          <idno type="ROR">https://ror.org/00nt41z93</idno>
          <orgName>Universidade de Aveiro = University of Aveiro</orgName>
          <desc>
            <address>
              <addrLine>Campus Universitário de Santiago 3810-193 Aveiro</addrLine>
              <country key="PT"/>
            </address>
            <ref type="url">http://www.ua.pt/</ref>
          </desc>
        </org>
        <org type="laboratory" xml:id="struct-1002124" status="VALID">
          <idno type="IdRef">07444879X</idno>
          <idno type="ISNI">0000000406383030</idno>
          <idno type="RNSR">196417921R</idno>
          <idno type="ROR">https://ror.org/037vdeb02</idno>
          <orgName>Centre Alpin de Recherche sur les Réseaux Trophiques et Ecosystèmes Limniques</orgName>
          <orgName type="acronym">CARRTEL</orgName>
          <date type="start">2020-01-01</date>
          <desc>
            <address>
              <addrLine>74200 Thonon-les-Bains, France - 73376 Le Bourget du Lac, France</addrLine>
              <country key="FR"/>
            </address>
          </desc>
          <listRelation>
            <relation active="#struct-432896" type="direct"/>
            <relation name="UMR0042" active="#struct-577435" type="direct"/>
            <relation active="#struct-1002499" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-300707" status="VALID">
          <idno type="IdRef">026430339</idno>
          <idno type="ISNI">0000 0000 9511 4342</idno>
          <idno type="ROR">https://ror.org/04z8k9a98</idno>
          <idno type="Wikidata">Q368643</idno>
          <orgName>Universidade de Coimbra = University of Coimbra [Portugal]</orgName>
          <orgName type="acronym">UC</orgName>
          <date type="start">1290-01-01</date>
          <desc>
            <address>
              <addrLine>Rua LargaEdifício Faculdade de Medicina (R/Ch. Esq.)3004-504 Coimbra</addrLine>
              <country key="PT"/>
            </address>
            <ref type="url">http://www.uc.pt/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-432896" status="VALID">
          <idno type="ROR">https://ror.org/04gqg1a07</idno>
          <orgName>Université Savoie Mont Blanc</orgName>
          <orgName type="acronym">USMB [Université de Savoie] [Université de Chambéry]</orgName>
          <date type="start">1979-10-01</date>
          <desc>
            <address>
              <addrLine>27, rue Marcoz - 73000 Chambéry</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.univ-smb.fr</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-577435" status="VALID">
          <idno type="ROR">https://ror.org/003vg9w96</idno>
          <orgName>Institut National de Recherche pour l’Agriculture, l’Alimentation et l’Environnement</orgName>
          <orgName type="acronym">INRAE</orgName>
          <date type="start">2020-01-01</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
          </desc>
        </org>
        <org type="institution" xml:id="struct-1002499" status="VALID">
          <idno type="IdRef">186067518</idno>
          <idno type="ISNI">0000000417832786</idno>
          <idno type="RNSR">200310841A</idno>
          <idno type="ROR">https://ror.org/03vte9x46</idno>
          <orgName>Observatoire des Sciences de l'Univers de Grenoble</orgName>
          <orgName type="acronym">Fédération OSUG</orgName>
          <date type="start">1985-01-01</date>
          <desc>
            <address>
              <addrLine>Domaine universitaire, bâtiment OSUG-D122 rue de la piscine38 400 Saint Martin d’Hères</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.osug.fr/</ref>
          </desc>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>