Item talk:Q318447

From geokb

{

 "DOI": {
   "doi": "10.5066/p1bdpxkz",
   "identifiers": [],
   "creators": [
     {
       "name": "Emily T Abbott",
       "nameType": "Personal",
       "affiliation": [
         "Contractor to the United States Geological Survey"
       ],
       "nameIdentifiers": [
         {
           "schemeUri": "https://orcid.org",
           "nameIdentifier": null,
           "nameIdentifierScheme": "ORCID"
         }
       ]
     }
   ],
   "titles": [
     {
       "title": "Grammar transformations of topographic feature type annotations of the U.S. to structured graph data"
     }
   ],
   "publisher": "U.S. Geological Survey",
   "container": {},
   "publicationYear": 2024,
   "subjects": [
     {
       "subject": "data management"
     },
     {
       "subject": "information sciences"
     },
     {
       "subject": "data release"
     },
     {
       "subject": "datasets"
     },
     {
       "subject": "structure"
     },
     {
       "subject": "society"
     },
     {
       "subject": "information architecture"
     },
     {
       "subject": "data integration"
     },
     {
       "subject": "improvement of scientific data usability"
     },
     {
       "subject": "information technology methods"
     },
     {
       "subject": "knowledge organization system development"
     },
     {
       "subject": "scientific data usability"
     },
     {
       "subject": "natural language processing"
     },
     {
       "subject": "knowledge graph"
     },
     {
       "subject": "semantic knowledge graph"
     },
     {
       "subject": "semantic data processing"
     },
     {
       "subject": "informal semantics"
     },
     {
       "subject": "geospatial lexicon"
     }
   ],
   "contributors": [],
   "dates": [],
   "language": null,
   "types": {
     "ris": "DATA",
     "bibtex": "misc",
     "citeproc": "dataset",
     "schemaOrg": "Dataset",
     "resourceType": "Dataset",
     "resourceTypeGeneral": "Dataset"
   },
   "relatedIdentifiers": [],
   "relatedItems": [],
   "sizes": [],
   "formats": [
     "xml",
     "csv"
   ],
   "version": null,
   "rightsList": [
     {
       "rights": "Creative Commons Zero v1.0 Universal",
       "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
       "schemeUri": "https://spdx.org/licenses/",
       "rightsIdentifier": "cc0-1.0",
       "rightsIdentifierScheme": "SPDX"
     }
   ],
   "descriptions": [
     {
       "description": "These data were used to examine grammatical structures and patterns within a set of geospatial glossary definitions. Objectives of our study were to analyze the semantic structure of input definitions, use this information to build triple structures of RDF graph data, upload our lexicon to a knowledge graph software, and perform SPARQL queries on the data. Upon completion of this study, SPARQL queries were proven to effectively convey graph triples which displayed semantic significance. These data represent and characterize the lexicon of our input text which are used to form graph triples. These data were collected in 2024 by passing text through multiple Python programs utilizing spaCy (a natural language processing library) and its pre-trained English transformer pipeline. Before data was processed by the Python programs, input definitions were first rewritten as natural language and formatted as tabular data. Passages were then tokenized and characterized by their part-of-speech, tag, dependency relation, dependency head, and lemma. Each word within the lexicon was tokenized. A stop-words list was utilized only to remove punctuation and symbols from the text, excluding hyphenated words (ex. bowl-shaped) which remained as such. The tokens\u2019 lemmas were then aggregated and totaled to find their recurrences within the lexicon. This procedure was repeated for tokenizing noun chunks using the same glossary definitions.",
       "descriptionType": "Abstract"
     }
   ],
   "geoLocations": [],
   "fundingReferences": [],
   "url": "https://www.sciencebase.gov/catalog/item/664cb465d34e1955f5a4f57e",
   "contentUrl": null,
   "metadataVersion": 1,
   "schemaVersion": "http://datacite.org/schema/kernel-4",
   "source": "api",
   "isActive": true,
   "state": "findable",
   "reason": null,
   "viewCount": 0,
   "downloadCount": 0,
   "referenceCount": 0,
   "citationCount": 0,
   "partCount": 0,
   "partOfCount": 0,
   "versionCount": 0,
   "versionOfCount": 0,
   "created": "2024-07-12T17:47:57Z",
   "registered": "2024-07-12T17:47:57Z",
   "published": null,
   "updated": "2024-07-12T17:48:06Z"
 }

}