<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of hal-03034477</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-24T04:51:46+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">Could spatial features help the matching of textual data?</title>
            <author role="crp">
              <persName>
                <forename type="first">Jacques</forename>
                <surname>Fize</surname>
              </persName>
              <email type="md5">6bb98f59aa4944ac232253e31d5cf488</email>
              <email type="domain">teledetection.fr</email>
              <idno type="idhal" notation="numeric">1564702</idno>
              <idno type="halauthorid" notation="string">1264889-1564702</idno>
              <idno type="ORCID">https://orcid.org/0000-0003-1783-934X</idno>
              <affiliation ref="#struct-1002492"/>
              <affiliation ref="#struct-420902"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Mathieu</forename>
                <surname>Roche</surname>
              </persName>
              <email type="md5">f3369d939820713d626eff81740a4eeb</email>
              <email type="domain">cirad.fr</email>
              <idno type="idhal" notation="string">mathieu-roche</idno>
              <idno type="idhal" notation="numeric">4967</idno>
              <idno type="halauthorid" notation="string">20190-4967</idno>
              <idno type="IDREF">https://www.idref.fr/09042087X</idno>
              <idno type="ORCID">https://orcid.org/0000-0003-3272-8568</idno>
              <affiliation ref="#struct-1002492"/>
              <affiliation ref="#struct-420902"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Maguelonne</forename>
                <surname>Teisseire</surname>
              </persName>
              <email type="md5">bfed7f55123bad5a6ddc404f64f1a920</email>
              <email type="domain">teledetection.fr</email>
              <idno type="idhal" notation="string">maguelonne-teisseire</idno>
              <idno type="idhal" notation="numeric">8645</idno>
              <idno type="halauthorid" notation="string">24802-8645</idno>
              <idno type="ORCID">https://orcid.org/0000-0001-9313-6414</idno>
              <idno type="IDREF">https://www.idref.fr/117436593</idno>
              <idno type="VIAF">https://viaf.org/viaf/164498326</idno>
              <idno type="ISNI">http://isni.org/isni/0000000117179295</idno>
              <affiliation ref="#struct-1002492"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Isabelle</forename>
                <surname>NAULT</surname>
              </persName>
              <email type="md5">257a01240d864fba4fd9d7a650162fd2</email>
              <email type="domain">inrae.fr</email>
            </editor>
            <funder ref="#projanr-47613"/>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2022-03-29 15:16:11</date>
              <date type="whenModified">2025-12-10 17:52:28</date>
              <date type="whenReleased">2022-03-29 15:16:11</date>
              <date type="whenProduced">2020-09-30</date>
              <fs>
                <f name="inra_etatDocument_local" notation="string" n="VE">
                  <string>publisher version</string>
                </f>
                <f name="inra_publicVise_local" notation="string" n="SC">
                  <string>Scientists</string>
                </f>
              </fs>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="439979">
                <persName>
                  <forename>Isabelle</forename>
                  <surname>NAULT</surname>
                </persName>
                <email type="md5">257a01240d864fba4fd9d7a650162fd2</email>
                <email type="domain">inrae.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">hal-03034477</idno>
            <idno type="halUri">https://hal.inrae.fr/hal-03034477</idno>
            <idno type="halBibtex">fize:hal-03034477</idno>
            <idno type="halRefHtml">&lt;i&gt;Intelligent Data Analysis&lt;/i&gt;, 2020, 24 (5), pp.1043-1064. &lt;a target="_blank" href="https://dx.doi.org/10.3233/IDA-194749"&gt;&amp;#x27E8;10.3233/IDA-194749&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">Intelligent Data Analysis, 2020, 24 (5), pp.1043-1064. &amp;#x27E8;10.3233/IDA-194749&amp;#x27E9;</idno>
            <availability status="restricted"/>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="CIRAD">CIRAD - Centre de coopération internationale en recherche agronomique pour le développement</idno>
            <idno type="stamp" n="AGROPARISTECH">AgroParisTech</idno>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="TETIS">TETIS</idno>
            <idno type="stamp" n="AGREENIUM">Archive ouverte en agrobiosciences</idno>
            <idno type="stamp" n="INRAE">Institut National de Recherche en Agriculture, Alimentation et Environnement</idno>
            <idno type="stamp" n="INRAEOCCITANIEMONTPELLIER" corresp="INRAE">INRAE Occitanie Montpellier</idno>
            <idno type="stamp" n="ANR">ANR</idno>
            <idno type="stamp" n="TEST3-HALCNRS">TEST3-HALCNRS</idno>
            <idno type="stamp" n="TEST4-HALCNRS">collection test</idno>
            <idno type="stamp" n="MATHNUM">Département MathNum</idno>
            <idno type="stamp" n="TEST5-HALCNRS">collection test 5</idno>
            <idno type="stamp" n="INEE-CNRS">Institut écologie et environnement du CNRS</idno>
            <idno type="stamp" n="TEST-MATHNUM">Test MathNum</idno>
            <idno type="stamp" n="DIGITAG">#DigitAg, l’Institut Convergences Agriculture Numérique ou Digital Agriculture Convergence Lab</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">Could spatial features help the matching of textual data?</title>
                <author role="crp">
                  <persName>
                    <forename type="first">Jacques</forename>
                    <surname>Fize</surname>
                  </persName>
                  <email type="md5">6bb98f59aa4944ac232253e31d5cf488</email>
                  <email type="domain">teledetection.fr</email>
                  <idno type="idhal" notation="numeric">1564702</idno>
                  <idno type="halauthorid" notation="string">1264889-1564702</idno>
                  <idno type="ORCID">https://orcid.org/0000-0003-1783-934X</idno>
                  <affiliation ref="#struct-1002492"/>
                  <affiliation ref="#struct-420902"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Mathieu</forename>
                    <surname>Roche</surname>
                  </persName>
                  <email type="md5">f3369d939820713d626eff81740a4eeb</email>
                  <email type="domain">cirad.fr</email>
                  <idno type="idhal" notation="string">mathieu-roche</idno>
                  <idno type="idhal" notation="numeric">4967</idno>
                  <idno type="halauthorid" notation="string">20190-4967</idno>
                  <idno type="IDREF">https://www.idref.fr/09042087X</idno>
                  <idno type="ORCID">https://orcid.org/0000-0003-3272-8568</idno>
                  <affiliation ref="#struct-1002492"/>
                  <affiliation ref="#struct-420902"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Maguelonne</forename>
                    <surname>Teisseire</surname>
                  </persName>
                  <email type="md5">bfed7f55123bad5a6ddc404f64f1a920</email>
                  <email type="domain">teledetection.fr</email>
                  <idno type="idhal" notation="string">maguelonne-teisseire</idno>
                  <idno type="idhal" notation="numeric">8645</idno>
                  <idno type="halauthorid" notation="string">24802-8645</idno>
                  <idno type="ORCID">https://orcid.org/0000-0001-9313-6414</idno>
                  <idno type="IDREF">https://www.idref.fr/117436593</idno>
                  <idno type="VIAF">https://viaf.org/viaf/164498326</idno>
                  <idno type="ISNI">http://isni.org/isni/0000000117179295</idno>
                  <affiliation ref="#struct-1002492"/>
                </author>
              </analytic>
              <monogr>
                <idno type="halJournalId" status="VALID">14249</idno>
                <idno type="issn">1088-467X</idno>
                <title level="j">Intelligent Data Analysis</title>
                <imprint>
                  <publisher>IOS Press</publisher>
                  <biblScope unit="volume">24</biblScope>
                  <biblScope unit="issue">5</biblScope>
                  <biblScope unit="pp">1043-1064</biblScope>
                  <date type="datePub">2020-09-30</date>
                </imprint>
              </monogr>
              <idno type="doi">10.3233/IDA-194749</idno>
              <idno type="wos">000582728600006</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <keywords scheme="author">
                <term xml:lang="en">spatial similarity</term>
                <term xml:lang="en">graph matching</term>
                <term xml:lang="en">text matching</term>
                <term xml:lang="en">heterogeneity</term>
                <term xml:lang="en">Textual data</term>
              </keywords>
              <classCode scheme="halDomain" n="info">Computer Science [cs]</classCode>
              <classCode scheme="halTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halOldTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halTreeTypology" n="ART">Journal articles</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>Textual data is available to an increasing extent through different media (social networks, companies data, data catalogues, etc.). New information extraction methods are needed since these new resources are highly heterogeneous. In this article, we propose a text matching process based on spatial features and assessed through heterogeneous textual data. Besides being compatible with heterogeneous data, it comprises two contributions: first, spatial information is extracted for comparison purposes and subsequently stored in a dedicated spatial textual representation (STR); and then two transformations are applied on STR to improve the spatial similarity estimation. This article outlines the proposed approach with new contributions: (i) a new geocoding methods using general co-occurrences between entities, and (ii) a thorough evaluation followed by (iii) an in-depth discussion. The results obtained on two corpora demonstrate that good spatial matches (approximate to 80% precision on major criteria) can be obtained between the most similar STRs with further enhancement achieved via STR transformation.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="laboratory" xml:id="struct-1002492" status="VALID">
          <idno type="IdRef">181148498</idno>
          <idno type="RNSR">200718239Z</idno>
          <idno type="ROR">https://ror.org/0458hw939</idno>
          <orgName>Territoires, Environnement, Télédétection et Information Spatiale</orgName>
          <orgName type="acronym">UMR TETIS</orgName>
          <date type="start">2020-01-01</date>
          <desc>
            <address>
              <addrLine>Maison de la télédétection - 500 rue Jean-François Breton - 34093 Montpellier Cedex 5</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.umr-tetis.fr/</ref>
          </desc>
          <listRelation>
            <relation name="UMR91" active="#struct-11574" type="direct"/>
            <relation active="#struct-148117" type="direct"/>
            <relation name="UMR9000" active="#struct-441569" type="direct"/>
            <relation name="UMR1470" active="#struct-577435" type="direct"/>
          </listRelation>
        </org>
        <org type="regrouplaboratory" xml:id="struct-420902" status="VALID">
          <orgName>Département Environnements et Sociétés</orgName>
          <orgName type="acronym">Cirad-ES</orgName>
          <date type="start">2007-01-01</date>
          <desc>
            <address>
              <addrLine>Campus international de Baillarguet TA C-DIR / B 34398 Montpellier Cedex 5 France</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cirad.fr/qui-sommes-nous/organigramme/departements-scientifiques/environnements-et-societes-es/presentation</ref>
          </desc>
          <listRelation>
            <relation active="#struct-11574" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-11574" status="VALID">
          <idno type="ISNI">0000000121539871</idno>
          <idno type="ROR">https://ror.org/05kpkpg04</idno>
          <orgName>Centre de Coopération Internationale en Recherche Agronomique pour le Développement</orgName>
          <orgName type="acronym">Cirad</orgName>
          <date type="start">1984-06-01</date>
          <desc>
            <address>
              <addrLine>Siège 42, rue Scheffer 75116 Paris</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.cirad.fr</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-148117" status="VALID">
          <idno type="IdRef">139408088</idno>
          <idno type="ROR">https://ror.org/02kbmgc12</idno>
          <orgName>AgroParisTech</orgName>
          <date type="start">2007-01-01</date>
          <desc>
            <address>
              <addrLine>22 place de l'Agronomie CS 20040 91123 Palaiseau cedex</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.agroparistech.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-577435" status="VALID">
          <idno type="ROR">https://ror.org/003vg9w96</idno>
          <orgName>Institut National de Recherche pour l’Agriculture, l’Alimentation et l’Environnement</orgName>
          <orgName type="acronym">INRAE</orgName>
          <date type="start">2020-01-01</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
          </desc>
        </org>
      </listOrg>
      <listOrg type="projects">
        <org type="anrProject" xml:id="projanr-47613" status="VALID">
          <idno type="anr">ANR-16-CONV-0004</idno>
          <orgName>DIGITAG</orgName>
          <desc>Institut Convergences en Agriculture Numérique</desc>
          <date type="start">2016</date>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>