<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "http://jats.nlm.nih.gov/publishing/1.3/JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<processing-meta>
<custom-meta-group content-type="composition">
<custom-meta specific-use="newgen" xlink:href="https://www.newgen.co/">
<meta-name>Composition Vendor</meta-name>
<meta-value>Newgen KnowledgeWorks (P) Ltd.</meta-value>
</custom-meta>
</custom-meta-group>
</processing-meta>
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLOS Digit Health</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosdh</journal-id>
<journal-title-group>
<journal-title>PLOS Digital Health</journal-title>
</journal-title-group>
<issn pub-type="epub">2767-3170</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pdig.0000755</article-id>
<article-id pub-id-type="publisher-id">PDIG-D-24-00401</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Radiology and imaging</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Diagnostic medicine</subject><subj-group><subject>Diagnostic radiology</subject><subj-group><subject>Magnetic resonance imaging</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Imaging techniques</subject><subj-group><subject>Diagnostic radiology</subject><subj-group><subject>Magnetic resonance imaging</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Radiology and imaging</subject><subj-group><subject>Diagnostic radiology</subject><subj-group><subject>Magnetic resonance imaging</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Oncology</subject><subj-group><subject>Cancers and neoplasms</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Oncology</subject><subj-group><subject>Cancers and neoplasms</subject><subj-group><subject>Malignant tumors</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Oncology</subject><subj-group><subject>Cancer treatment</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Chemistry</subject><subj-group><subject>Chemical compounds</subject><subj-group><subject>Organic compounds</subject><subj-group><subject>Steroids</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Chemistry</subject><subj-group><subject>Organic chemistry</subject><subj-group><subject>Organic compounds</subject><subj-group><subject>Steroids</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Applied mathematics</subject><subj-group><subject>Algorithms</subject><subj-group><subject>Machine learning algorithms</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Simulation and modeling</subject><subj-group><subject>Algorithms</subject><subj-group><subject>Machine learning algorithms</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Artificial intelligence</subject><subj-group><subject>Machine learning</subject><subj-group><subject>Machine learning algorithms</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Information technology</subject><subj-group><subject>Natural language processing</subject></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>From manual clinical criteria to machine learning algorithms: Comparing outcome endpoints derived from diverse electronic health record data modalities</article-title>
<alt-title alt-title-type="running-head">Automating endpoint extraction using diverse data modalities and machine learning algorithms</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-7042-5766</contrib-id>
<name name-style="western">
<surname>Chappidi</surname>
<given-names>Shreya</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Belue</surname>
<given-names>Mason J.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Harmon</surname>
<given-names>Stephanie A.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Jagasia</surname>
<given-names>Sarisha</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Zhuge</surname>
<given-names>Ying</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Tasci</surname>
<given-names>Erdal</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Turkbey</surname>
<given-names>Baris</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/resources/">Resources</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Singh</surname>
<given-names>Jatinder</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Camphausen</surname>
<given-names>Kevin</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<name name-style="western">
<surname>Krauze</surname>
<given-names>Andra V.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role content-type="http://credit.niso.org/contributor-roles/resources/">Resources</role>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Radiation Oncology Branch, Center for Cancer Research, National Cancer Institute, National Institutes of Health, Bethesda, Maryland, United States of America</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Department of Computer Science and Technology, University of Cambridge, Cambridge, United Kingdom</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>Artificial Intelligence Resource, Center for Cancer Research, National Cancer Institute, National Institutes of Health, Bethesda, Maryland, United States of America</addr-line></aff>
<aff id="aff004"><label>4</label> <addr-line>Research Center Trustworthy Data Science and Security, University Alliance Ruhr, Duisburg-Essen, Germany</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Grosan</surname>
<given-names>Crina</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/></contrib>
</contrib-group>
<aff id="edit1"><addr-line>King’s College London, UNITED KINGDOM OF GREAT BRITAIN AND NORTHERN IRELAND</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">andra.krauze@nih.gov</email></corresp>
</author-notes>
<pub-date pub-type="epub"><day>14</day><month>5</month><year>2025</year></pub-date>
<pub-date pub-type="collection"><month>5</month><year>2025</year></pub-date>
<volume>4</volume>
<issue>5</issue>
<elocation-id>e0000755</elocation-id>
<history>
<date date-type="received"><day>17</day><month>9</month><year>2024</year></date>
<date date-type="accepted"><day>17</day><month>1</month><year>2025</year></date>
</history>
<permissions>
<copyright-year>2025</copyright-year>
<copyright-holder>Chappidi et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p></license>
</permissions>
<self-uri content-type="pdf" xlink:href="pdig.0000755.pdf"/>
<abstract>
<sec id="sec040">
<title>Background</title>
<p>Progression free survival (PFS) is a critical clinical outcome endpoint during cancer management and treatment evaluation. Yet, PFS is often missing from publicly available datasets due to the current subjective, expert, and time-intensive nature of generating PFS metrics. Given emerging research in multi-modal machine learning (ML), we explored the benefits and challenges associated with mining different electronic health record (EHR) data modalities and automating extraction of PFS metrics via ML algorithms.</p>
</sec>
<sec id="sec041">
<title>Methods</title>
<p>We analyzed EHR data from 92 pathology-proven GBM patients, obtaining 233 corticosteroid prescriptions, 2080 radiology reports, and 743 brain MRI scans. Three methods were developed to derive clinical PFS: 1) frequency analysis of corticosteroid prescriptions, 2) natural language processing (NLP) of reports, and 3) computer vision (CV) volumetric analysis of imaging. Outputs from these methods were compared to manually annotated clinical guideline PFS metrics.</p>
</sec>
<sec id="sec042">
<title>Results</title>
<p>Employing data-driven methods, standalone progression rates were 63% (prescription), 78% (NLP), and 54% (CV), compared to the 99% progression rate from manually applied clinical guidelines using integrated data sources. The prescription method identified progression an average of 5.2 months later than the clinical standard, while the CV and NLP algorithms identified progression earlier by 2.6 and 6.9 months, respectively. While lesion growth is a clinical guideline progression indicator, only half of patients exhibited increasing contrast-enhancing tumor volumes during scan-based CV analysis.</p>
</sec>
<sec id="sec043">
<title>Conclusion</title>
<p>Our results indicate that data-driven algorithms can extract tumor progression outcomes from existing EHR data. However, ML methods are subject to varying availability bias, supporting contextual information, and pre-processing resource burdens that influence the extracted PFS endpoint distributions. Our scan-based CV results also suggest that the automation of clinical criteria may not align with human intuition. Our findings indicate a need for improved data source integration, validation, and revisiting of clinical criteria in parallel to multi-modal ML algorithm development.</p>
</sec>
</abstract>
<abstract abstract-type="summary">
<title>Author summary</title>
<p>Progression free survival is an important outcome in cancer research used to evaluate new treatments. However, this data is often not publicly available as it requires labor-intensive, subjective judgement from clinicians. Different data modalities, such as text reports and imaging, stored in the electronic health record could be used to automate the extraction of progression events from a patient’s medical record. This paper explores three automated and/or machine learning (ML) methods to extract progression from integrated electronic health data, including 1) analysis of patient prescription frequencies, 2) natural language processing algorithms applied to radiology reports, and 3) computer vision tumor segmentation algorithms applied to brain MRI scans. These automated results were compared to the current manual clinical standard method of determining progression. Our study found that various ML algorithms can automate the extraction of progression outcomes from diverse patient data. Yet, manual evaluation identified progression at a higher rate compared to data-driven algorithms. Our results indicated that “ground truth” labels obtained for training ML algorithms are influenced by both the data source and method used to obtain them. Future research should consider that varying data sources, availability, and reliability can create methodological bias during ML projects.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source><institution>National Cancer Institute Intramural Research Program</institution>
</funding-source><award-id>010990</award-id>
<principal-award-recipient><name name-style="western">
<surname>Camphausen</surname> <given-names>Kevin</given-names></name></principal-award-recipient></award-group>
<funding-statement>This work was supported by the National Institutes of Health (ZID BC 010990 to KC). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="6"/>
<table-count count="1"/>
<page-count count="29"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All imaging sequences processed in this paper are part of the University of Pennsylvania’s Federated Tumor Segmentation (FeTS) data sharing and federated learning initiative. These scans will be available for application of pre-trained segmentation models and later appear in The Cancer Imaging Archive (TCIA). More information about this FeTS initiative project can be found at <ext-link ext-link-type="uri" xlink:href="https://fets-ai.github.io/FL-PoST/" xlink:type="simple">https://fets-ai.github.io/FL-PoST/</ext-link> or by contacting study lead Even Calabrese at <ext-link ext-link-type="uri" xlink:href="https://admin@fets.aiadmin@fets.ai" xlink:type="simple">https://admin@fets.aiadmin@fets.ai</ext-link>. All associated volumes extracted from the implemented tumor segmentation model will be made available in supplemental material. Full radiology reports cannot be shared due to institutional and patient privacy policies. However, interested researchers may reach out to <ext-link ext-link-type="uri" xlink:href="https://NCIOfficeofDataSharing@mail.nih.govNCIOfficeofDataSharing@mail.nih.gov" xlink:type="simple">https://NCIOfficeofDataSharing@mail.nih.govNCIOfficeofDataSharing@mail.nih.gov</ext-link> with reasonable requests for report data, including de-identified terms and their associated frequencies associated with each report. De-identified steroid prescription data, including name and dosage, clinical standard PFS dates, tumor segmentation volumes, and term frequencies extracted from each report will be reported per data modality item.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Glioblastoma multiforme (GBM), a form of high-grade glioma, is amongst the most aggressive brain tumors with a median survival 14 months [<xref ref-type="bibr" rid="pdig.0000755.ref001">1</xref>]. Yet, brain tumor outcomes have seen limited improvement despite ongoing imaging, radiation therapy, and systemic management advancements. The ability to identify biomarkers associated with progression and treatment response is limited by data that often only includes survival as outcome endpoints.</p>
<p>Overall survival (OS) is commonly employed in patient datasets given its simpler calculation from date of diagnosis to date of death. However, OS is an imperfect outcome endpoint as it reflects the summation of multiple interventions beyond standard of care (SOC) upfront chemoirradiation (CRT), such as potential re-resection and use of study agents upon recurrence. Conversely, progression free survival (PFS), defined as the time between diagnosis to disease progression, is derived from a complex set of data sources using a subjective, labor-intensive process that surveys a patient’s medical record [<xref ref-type="bibr" rid="pdig.0000755.ref002">2</xref>]. PFS data is instrumental for guiding disease management and biomarker research as it can indicate treatment response or failure, allowing for rapid intervention to treat lower tumor burdens or initiation of novel treatment options [<xref ref-type="bibr" rid="pdig.0000755.ref003">3</xref>].</p>
<sec id="sec002">
<title>Clinical standards for generating PFS data</title>
<p>Current neuro-oncology practice standards involve using Response Assessment in Neuro-Oncology (RANO) criteria to determine progression for glioma patients [<xref ref-type="bibr" rid="pdig.0000755.ref002">2</xref>]. These criteria allow for a combination of clinical and imaging features. Progression is defined by Wen <italic>et al</italic>. [<xref ref-type="bibr" rid="pdig.0000755.ref002">2</xref>] as including any of the following factors:</p>
<list list-type="order">
<list-item>
<p><inline-formula id="pdig.0000755.e001"><alternatives><graphic id="pdig.0000755.e001g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e001.tif" xlink:type="simple"/><mml:math display="inline" id="M1"><mml:mrow><mml:mo>≥</mml:mo><mml:mn>25</mml:mn><mml:mi>%</mml:mi></mml:mrow></mml:math></alternatives></inline-formula> increase in T1 gadolinium enhancing disease,</p>
</list-item>
<list-item>
<p>increasing T2/FLAIR volume,</p>
</list-item>
<list-item>
<p>any new lesions, and/or</p>
</list-item>
<list-item>
<p>deteriorating clinical status.</p>
</list-item>
</list>
<p>Determining true progression in glioma is difficult due to the temporary clinical and radiographic deterioration that patients may experience following completion of CRT. This deterioration is termed <italic>pseudoprogression</italic> if these symptoms result from acute effects of management and reduce over time [<xref ref-type="bibr" rid="pdig.0000755.ref004">4</xref>–<xref ref-type="bibr" rid="pdig.0000755.ref006">6</xref>]. While previous RECIST progression criteria did not account for deteriorating clinical factors, the MacDonald criteria update eventually incorporated clinical status and corticosteroid administration [<xref ref-type="bibr" rid="pdig.0000755.ref007">7</xref>] and successive RANO iterations added caveats for pseudoprogression. Despite these changes, there are still limitations towards obtaining consensus on tumor progression. For example, progression of disease is based on a <inline-formula id="pdig.0000755.e002"><alternatives><graphic id="pdig.0000755.e002g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e002.tif" xlink:type="simple"/><mml:math display="inline" id="M2"><mml:mrow><mml:mo>≥</mml:mo><mml:mn>25</mml:mn><mml:mi>%</mml:mi></mml:mrow></mml:math></alternatives></inline-formula> or greater increase in the product of perpendicular diameters on contrast enhanced imaging, which can be subjective and represent pseudoprogression without changes outside the radiotherapy (RT) field. Moreover, it should be noted that the extent and location of the RT dose cloud is not readily available for visualization to radiologists or even some neuro-oncology teams. Thus, tumor size or lesion counts are often not explicitly captured or recorded in a patient’s electronic health record (EHR). While ongoing revisions to RANO currently include adapting to the use of immunotherapeutics and molecular disease classification [<xref ref-type="bibr" rid="pdig.0000755.ref008">8</xref>], there are still limitations in the quantification of tumors identified by imaging.</p>
<p>Given the aforementioned limitations, non-clinical-trial glioma data sets do not have a straightforward progression date for patient unless retrospectively assigned in small cohorts. Most publicly available brain tumor data sets do not include PFS data, including The Cancer Genome Atlas (TCGA) [<xref ref-type="bibr" rid="pdig.0000755.ref009">9</xref>], The Cancer Imaging Atlas (TCIA) [<xref ref-type="bibr" rid="pdig.0000755.ref010">10</xref>], Georgetown Database of Cancer (G-DOC) [<xref ref-type="bibr" rid="pdig.0000755.ref011">11</xref>], and the Chinese Glioma Genome Atlas (CGGA) [<xref ref-type="bibr" rid="pdig.0000755.ref012">12</xref>].</p>
</sec>
<sec id="sec003">
<title>Data integration and multi-modal machine learning (ML)</title>
<p>Clinical application of RANO criteria involves review of multiple EHR data sources and modalities by skilled clinicians. This process reflects human attempts to integrate and extract insights from multiple modalities of medical data, including scans, radiology reports, progress notes, and other clinical context over time and potentially institutions. ML research has begun to focus on multi-modal algorithms with the goal of more closely aligning with clinical practice, where a totality of information is processed during diagnosis and treatment [<xref ref-type="bibr" rid="pdig.0000755.ref013">13</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref014">14</xref>]. Some studies have often demonstrated that multi-modal algorithms demonstrate superior performance over unimodal algorithms trained on a singular stream of data [<xref ref-type="bibr" rid="pdig.0000755.ref015">15</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref016">16</xref>]. However, it is not yet clear how various data modalities may influence the predictions of a multi-modal algorithm, either due to the information encoded within the data or biases surrounding the data collection process. As a result, data integration for multi-modal ML analysis has remained underexplored, particularly in the areas of endpoint extraction and brain cancer.</p>
<p>Recent attempts to compute PFS metrics from data using artificial intelligence (AI) have successfully used clinical features [<xref ref-type="bibr" rid="pdig.0000755.ref017">17</xref>] and radiomic features extracted from brain MRI scans, including texture and morphological features [<xref ref-type="bibr" rid="pdig.0000755.ref018">18</xref>] and quantified tumor volumes [<xref ref-type="bibr" rid="pdig.0000755.ref019">19</xref>]. While Kwiatkowska-Miernik <italic>et al</italic>. [<xref ref-type="bibr" rid="pdig.0000755.ref018">18</xref>] identify that four out of six of their models demonstrate appropriate predictive performance via mean absolute percentage error, they do not detail specific criteria applied to obtain ground truth progression (“determined based on follow-up MRI exams evaluated by an experienced radiologist”) and only evaluate a cohort of 51 patients meeting their inclusion criteria. Meanwhile, Kickingereder <italic>et al</italic>. obtain 87% agreement between automated neural network versus radiologist drawn volumes; however, this volumetric approach demonstrated lower agreement (between 73% to 51% depending on the test set) with manually applied RANO criteria, indicating a need to explore other volumetric approaches or definitions [<xref ref-type="bibr" rid="pdig.0000755.ref019">19</xref>].</p>
<p>At the same time, others suggest that these ML outcome prediction studies may lack complete inclusion of histologic, pathologic, and molecular data sources that mirror clinical practice [<xref ref-type="bibr" rid="pdig.0000755.ref020">20</xref>]. Some retrospective analyses on clinical GBM data sets have integrated imaging data sources including histopathology imaging [<xref ref-type="bibr" rid="pdig.0000755.ref021">21</xref>] and genetic alterations [<xref ref-type="bibr" rid="pdig.0000755.ref022">22</xref>]. Yet, these studies generally do not study overall or progression free survival as an outcome endpoint. Clinical practice guidelines currently do not stipulate PFS capture by means other than manually applied RANO criteria. Thus, there is a need for approaches to increase PFS availability and further mine for linkages between progression and imaging, -omic, and other clinical features.</p>
</sec>
<sec id="sec004">
<title>Data capture in the electronic health record during cancer treatment and management</title>
<p>Several clinical data elements are collected and stored over the natural history course of a patient’s cancer diagnosis (<xref ref-type="fig" rid="pdig.0000755.g001">Fig 1</xref>). The following subsections discuss cancer standard of care and corresponding diverse data sources that could be used to obtain progression free survival.</p>
<fig id="pdig.0000755.g001" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0000755.g001</object-id><label>Fig 1</label><caption><title>Sample cancer patient treatment timeline with data generated and captured within the EHR.</title></caption>
<graphic mimetype="image" position="float" xlink:href="pdig.0000755.g001.tif" xlink:type="simple"/></fig>
<sec id="sec005">
<title>Clinical standard of care.</title>
<p>For glioblastoma multiforme (GBM), treatment standard of care involves maximal surgical resection followed by radiotherapy (RT) with administration of concurrent and maintenance temozolomide (altogether termed chemoirraditation (CRT)). Following completion of CRT, patients are followed clinically with contrast-enhanced MRI completed 2-8 weeks post CRT, then repeated every 2-4 months for 3 years, and then every 3-6 months indefinitely per national and international guidelines [<xref ref-type="bibr" rid="pdig.0000755.ref023">23</xref>].</p>
</sec>
<sec id="sec006">
<title>Tabular prescription data.</title>
<p>GBM patients often experience devastating neurological symptoms and are usually prescribed corticosteroids to manage these acute effects. Corticosteroids act by decreasing inflammation in the brain and may be administered prior to surgical intervention, post-surgical intervention (most common), during CRT, following completion of CRT to manage acute effects, or upon tumor progression. Oral dexamethasone is the most commonly prescribed, while intravenous loading may be selected when a more rapid effect or loading dose is indicated. A “tapering schedule” for gradual discontinuation of dexamethasone is employed to mitigate potential adrenal insufficiency and worsening of neurological symptoms. Corticosteroid prescriptions are captured in the EHR and their use can theoretically be correlated with radiographic report findings and clinical records. However, there is widespread heterogeneity in prescription patterns and tapering schedules, as well as subjectivity involved in the initiation of steroids. Thus, steroid usage is often difficult to implement and retrospectively interpret.</p>
</sec>
<sec id="sec007">
<title>Free text documents.</title>
<p>Hundreds of documents can be generated over the course of a patient’s cancer diagnosis and treatment (<xref ref-type="fig" rid="pdig.0000755.g001">Fig 1</xref>). EHR systems are often dated and lack infrastructure to share information with other systems which limits bulk and longitudinal analysis. Free text documents held within the EHR are often reviewed manually by a clinician prior to a patient’s visit or update in care. However, this process can be repetitive, time-consuming, and prone to error as details may be omitted or redundant between documents. As a result, natural language processing of clinical documents has been an increasingly popular method to improve efficiency of medical record analysis.</p>
</sec>
<sec id="sec008">
<title>Imaging.</title>
<p>Numerous medical images from various imaging modalities are collected over the course of a patient’s diagnosis, treatment, and care management, including magnetic resonance imaging (MRI), computer tomography (CT), and cone-beam CT scans (<xref ref-type="fig" rid="pdig.0000755.g001">Fig 1</xref>). However, cone-beam CT scans are not typically available outside of the radiation oncology department where they are used for treatment verification. Moreover, while RANO criteria indicate a quantitative metric to observe 25% volume increases in enhancing lesions, in practice, it is not common practice to quantify lesions or other enhancing regions identified on MRI scans, especially in community or non-neuro-oncology specialized settings. Moreover, when measurements are obtained, the rate of agreement between radiologists is generally <inline-formula id="pdig.0000755.e003"><alternatives><graphic id="pdig.0000755.e003g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e003.tif" xlink:type="simple"/><mml:math display="inline" id="M3"><mml:mrow><mml:mo>≤</mml:mo><mml:mn>50</mml:mn><mml:mi>%</mml:mi></mml:mrow></mml:math></alternatives></inline-formula> which limits the utility of these metrics during analysis [<xref ref-type="bibr" rid="pdig.0000755.ref024">24</xref>]. In addition, radiologists are generally not privy to radiation treatment dose cloud data, such as the 80% isodose line which can indicate recurrent disease outside of the high dose field, making it more difficult to distinguish <italic>pseudoprogression</italic> from progression given any increased enhancement [<xref ref-type="bibr" rid="pdig.0000755.ref006">6</xref>].</p>
</sec>
<sec id="sec009">
<title>Human influences on EHR data.</title>
<p>Automated methods to derive data outcome labels may not necessarily “objective” as EHR data sourecs are subject to inclusion bias, representativeness issues, and other types of biases [<xref ref-type="bibr" rid="pdig.0000755.ref025">25</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref026">26</xref>]. The increased accessibility of radiology reports compared to their source imaging files could make a free-text, natural language processing (NLP)-based method for obtaining progression metrics more desirable due to increased data point availability within a given patient timeline. However, radiology reports usually reflect a single author’s judgment based on the medical conventions of the time [<xref ref-type="bibr" rid="pdig.0000755.ref027">27</xref>] and studies document differences in interrater reliability during imaging analysis [<xref ref-type="bibr" rid="pdig.0000755.ref028">28</xref>]. Sole reliance on radiology reports can create positive bias in a reconstruction of a patient’s medical history from EHR data, as we only have access to what was explicitly measured and included in the report [<xref ref-type="bibr" rid="pdig.0000755.ref025">25</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref026">26</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref029">29</xref>]. Human-generated textual data can mirror issues with the “file drawer problem” in scientific publishing where information deemed as non-notable cannot be accessed by other potentially interested parties [<xref ref-type="bibr" rid="pdig.0000755.ref030">30</xref>]. This reflects a common tension in medical machine learning (ML) where data annotation requirements for ML include information about the presence and absence of every possible diagnostic option, as opposed to clinical practice, where clinicians usually only document notable features that require further attention or potential follow-up [<xref ref-type="bibr" rid="pdig.0000755.ref027">27</xref>]. Other datapoints, such as MRI scan frequency and acquired scan parameters, are also constrained by provider-based practices at the time including follow-up frequency and machine availability.</p>
</sec>
</sec>
<sec id="sec010">
<title>Automated approaches to derive PFS</title>
<p>Over the last decade, the medical field has seen an explosion in accessible and queryable EHR data, though there are still large gaps in retroactively transferring older patient data and integrating various sources. Barriers to digitization of medical data also persist, including fear of documentation due to stigma related to diagnosis and treatment of certain diseases such as HIV [<xref ref-type="bibr" rid="pdig.0000755.ref031">31</xref>]. The subjective and labor-intensive process of generating annotations for supervised machine learning has also highlighted issues such as label bias and low inter-rater reliability [<xref ref-type="bibr" rid="pdig.0000755.ref028">28</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref032">32</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref033">33</xref>]. These issues have led to increased interest in ML label generation methods, though current annotation algorithms carry their own set of issues, including narrower labeling abilities and technical onboarding challenges [<xref ref-type="bibr" rid="pdig.0000755.ref034">34</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref035">35</xref>]. Given critical challenges in generating clinically-relevant labels/annotations for supervised machine learning, we discuss and survey the current literature on automated approaches to generate outcome endpoints using EHR patient data.</p>
<sec id="sec011">
<title>Natural language processing (NLP).</title>
<p>Natural language processing (NLP) algorithms attempt to understand human-generated text by computationally encoding and representing text [<xref ref-type="bibr" rid="pdig.0000755.ref036">36</xref>]. A large portion of current NLP research is centered on text generation [<xref ref-type="bibr" rid="pdig.0000755.ref037">37</xref>] and knowledge checking [<xref ref-type="bibr" rid="pdig.0000755.ref038">38</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref039">39</xref>] due to current advances in large language models (LLM); however, there is growing literature focused on extracting structured details from unstructured free text in applications including multiple sclerosis traits [<xref ref-type="bibr" rid="pdig.0000755.ref040">40</xref>], chronic disease [<xref ref-type="bibr" rid="pdig.0000755.ref041">41</xref>], activities of daily living [<xref ref-type="bibr" rid="pdig.0000755.ref042">42</xref>], social determinants of health [<xref ref-type="bibr" rid="pdig.0000755.ref043">43</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref044">44</xref>], and other clinical traits [<xref ref-type="bibr" rid="pdig.0000755.ref045">45</xref>–<xref ref-type="bibr" rid="pdig.0000755.ref048">48</xref>]. Rule-based NLP approaches capitalize on domain knowledge by matching to human-specified keywords or patterns in text [<xref ref-type="bibr" rid="pdig.0000755.ref036">36</xref>]. In contrast, other deep learning approaches tend to employ more complex algorithm architectures to predict or classify text based on larger training data sets and concept-level annotations [<xref ref-type="bibr" rid="pdig.0000755.ref036">36</xref>].</p>
<p>In the context of cancer care and management, NLP has been used to extract pathological information for prostate cancer [<xref ref-type="bibr" rid="pdig.0000755.ref049">49</xref>], BI-RADS assessments from radiology reports in breast cancer [<xref ref-type="bibr" rid="pdig.0000755.ref050">50</xref>], initial treatment types [<xref ref-type="bibr" rid="pdig.0000755.ref051">51</xref>], breast cancer phenotypes [<xref ref-type="bibr" rid="pdig.0000755.ref052">52</xref>], and other quantitative clinical information [<xref ref-type="bibr" rid="pdig.0000755.ref053">53</xref>]. A scoping review of 123 publications by Wang <italic>et al</italic>. [<xref ref-type="bibr" rid="pdig.0000755.ref054">54</xref>] found that most cancer-related NLP algorithms were built with the aims of general information extraction and cohort identification, with only 3 studies attempting to visualize disease history and the authors explicitly identifying outcome analysis as a current gap in NLP-assisted mining of EHR text data.</p>
<p>For outcome identification, NLP algorithms have been deployed to identify recurrence in breast cancer [<xref ref-type="bibr" rid="pdig.0000755.ref055">55</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref056">56</xref>], response events and progression events in lung cancer [<xref ref-type="bibr" rid="pdig.0000755.ref057">57</xref>], progression using structured and embedded free text in glaucoma [<xref ref-type="bibr" rid="pdig.0000755.ref058">58</xref>], and progression across cancer types using EHR-derived Framingham risk scores [<xref ref-type="bibr" rid="pdig.0000755.ref059">59</xref>]. Sangariyavanich <italic>et al</italic>. [<xref ref-type="bibr" rid="pdig.0000755.ref036">36</xref>] conduct a systematic review of 267 models across 17 studies using NLP to identify recurrent cancer, with a majority relying on statistical text representation. The authors find slightly superior performance between studies using deep learning NLP compared to rule-based algorithms, but acknowledge a lack of comparative literature in developing and deploying algorithms to detect recurrence or progression.</p>
<p>Most papers reviewed by [<xref ref-type="bibr" rid="pdig.0000755.ref036">36</xref>] evaluate algorithm performance through calculated area under the receiver operating curve (AUROC), F1, precision, and/or recall scores, requiring manually curated ground truth data sets to identify report-level labels of either recurrence or stable disease. The review reports median F1 scores of 0.71, 0.43, and 0.76, for the rule-based, ML, and deep learning approaches evaluated, respectively [<xref ref-type="bibr" rid="pdig.0000755.ref036">36</xref>]. However, given current challenges in medical data sharing, there are little to no publicly available datasets with report-level progression annotations for cross-validation. At the time of this publication, there are also few studies investigating suitable proxies for progression via free text or NLP methods. Thus, the current state of NLP-supported structured endpoint extraction relies on hand-crafted, report-level ground truth, which is time-intensive to curate and not often shared for further validation.</p>
<p>Outside of predictive performance, NLP algorithms may also be evaluated in other dimensions including algorithmic complexity, privacy and security, interpretability, and veracity. While deep learning algorithms may often achieve comparable [<xref ref-type="bibr" rid="pdig.0000755.ref060">60</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref061">61</xref>] or superior accuracy [<xref ref-type="bibr" rid="pdig.0000755.ref036">36</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref062">62</xref>] to rule-based approaches, they are often subject to differences in required resources for training and deployment, training data set sizes, developer and clinician user familiarity, output verification processes, privacy and security concerns, and methods to achieve interpretability [<xref ref-type="bibr" rid="pdig.0000755.ref060">60</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref061">61</xref>]. Berge <italic>et al</italic>. [<xref ref-type="bibr" rid="pdig.0000755.ref061">61</xref>] emphasize the specific need for local approaches in the medical domain, which motivates the use of rule-based approaches or transfer learning in the context of larger foundation models for deep learning approaches. Bhattarai <italic>et al</italic>. [<xref ref-type="bibr" rid="pdig.0000755.ref062">62</xref>] also note that outputs from local rule-based models such as spaCy are also deterministic (compared to emerging LLM approaches using models including GPT-4 which provide non-deterministic outputs without current widely accepted gold standard methods for verification).</p>
</sec>
<sec id="sec012">
<title>Computer vision (CV).</title>
<p>Computer vision is a field of computer science dedicated to extracting information from visual or image data. There is extensive literature dedicated to machine learning pre-processing and processing of MRI scans [<xref ref-type="bibr" rid="pdig.0000755.ref063">63</xref>]. Many of these applications involve signal processing, segmentation, auto-contouring, and other disease detection algorithms. However, brain scans require additional processing for anonymization/de-identification purposes, which represents a barrier to public data sharing [<xref ref-type="bibr" rid="pdig.0000755.ref064">64</xref>]. Thus, there are also few studies aimed at quantifying and tracking progression in brain tumors directly via imaging.</p>
<p>Direct volumetric imaging analysis may appear to be a more “objective” method to determine tumor progression. However, medical image processing is a far more resource- and expertise-intensive task that can conflict with changing and evolving technologies in image processing and data storage over time. Even with a sufficiently large imaging data set, pre-processing is a labor- and time-intensive task requiring several registration, skull-stripping, contouring, and de-anonymization steps to allow for comparisons within and between patients. While extra steps can be taken to integrate and share scans between institutions, including federated learning initiatives [<xref ref-type="bibr" rid="pdig.0000755.ref065">65</xref>], current computer vision (CV) research indicates reduced transferability and generalizability of ML-based decision-assisting algorithms when patient scans are obtained from different imaging machines and facilities [<xref ref-type="bibr" rid="pdig.0000755.ref066">66</xref>–<xref ref-type="bibr" rid="pdig.0000755.ref068">68</xref>].</p>
<p>Some studies have explored the use of data-driven algorithms to detect tumor features, including primary gross tumor volume (GTV) contouring in patients with nasopharyngeal carcinoma [<xref ref-type="bibr" rid="pdig.0000755.ref069">69</xref>] and peritumoral edema in recurrent GBM [<xref ref-type="bibr" rid="pdig.0000755.ref070">70</xref>]. A review of literature linking radiomic features to other biomarkers [<xref ref-type="bibr" rid="pdig.0000755.ref071">71</xref>] found three studies linking lesion or necrotic volume to genetic features, but none of the cited studies explored ML segmentation algorithms or linked data to outcomes [<xref ref-type="bibr" rid="pdig.0000755.ref072">72</xref>–<xref ref-type="bibr" rid="pdig.0000755.ref074">74</xref>]. Kidd <italic>et al</italic>. [<xref ref-type="bibr" rid="pdig.0000755.ref075">75</xref>] used convolutional neural networks to extract volumes from malignant pleural mesothelioma patients and compare against modified RECIST (Response Evaluation Criteria in Solid Tumors) criteria, finding a significant difference in AI-derived volume changes between partial response and progression patients. These studies indicate the need for further exploration and validation of automated tumor segmentation volumes, particularly when linking to clinical features and outcomes.</p>
</sec>
</sec>
<sec id="sec013">
<title>Contributions</title>
<p>Given current barriers in generating PFS data in the context of GBM, this paper aims to mine, integrate, and automate large-scale EHR data to arrive at PFS endpoints efficiently, and compare automated and/or machine learning PFS endpoints to manually-derived PFS metrics using clinical guidelines. This data integration framework can be replicated to add PFS outcomes in other large-scale data sets given acute clinical need and lack of data availability in other cancer disease sites and medical disciplines [<xref ref-type="bibr" rid="pdig.0000755.ref019">19</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref076">76</xref>, <xref ref-type="bibr" rid="pdig.0000755.ref077">77</xref>]. In this paper, we showcase:</p>
<list list-type="order">
<list-item>
<p>the integration of clinical, imaging, and prescription medication data within a queryable framework;</p>
</list-item>
<list-item>
<p>the automated identification of a progression free survival date using corticosteroid administration, natural language processing (NLP)-analyzed radiology reports, and computer vision (CV)-derived MRI tumor volumes;</p>
</list-item>
<list-item>
<p>with comparison to manual chart review as the clinical gold standard for progression according to RANO.</p>
</list-item>
</list>
</sec>
</sec>
<sec id="sec014" sec-type="materials|methods">
<title>Materials and methods</title>
<p>This analysis set out to mine various EHR data modalities, automate the extraction of PFS metrics via ML algorithms, and evaluate the ability of these methods to extract relevant progression evidence from a given modality compared to the current clinical standard approach using manually RANO criteria.</p>
<sec id="sec015">
<title>Patient cohort</title>
<p>The patient cohort initially included 423 brain malignancy patients who received treatment on protocol at the NIH. All patients were treated on NCI NIH IRB (IRB00011862) approved protocols. Given the significant radiographic differences between GBM and lower grade glioma patients, the current analysis focused on patients with GBM confirmed via histopathology to ensure homogeneity. Patients were excluded if a manual progression date could not be determined due to loss to follow-up or patient expiration without overt progression evidence (i.e. death occurring from non-glioma cause or less then 1 month following completion of CRT) (<xref ref-type="fig" rid="pdig.0000755.g002">Fig 2</xref>). Patients were also excluded for lacking at least one queryable radiology report, one medication prescription, and two brain MRI scans (for comparative purposes) dated after their completion of chemoirradiation. Data was aggregated and queried through the NIH Integrated Data Analysis Platform (NIDAP). Available patient data included demographic and clinical attributes, MRI reports, progress notes, lab results, medication lists, and imaging scans.</p>
<fig id="pdig.0000755.g002" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0000755.g002</object-id><label>Fig 2</label><caption><title>Overall patient cohort with overlapping data source availability.</title></caption>
<graphic mimetype="image" position="float" xlink:href="pdig.0000755.g002.tif" xlink:type="simple"/></fig>
</sec>
<sec id="sec016">
<title>Clinical standard for assigning PFS</title>
<p>A clinical standard progression date for each patient was assigned using via manual review of patient charts with RANO criteria progression (<xref ref-type="fig" rid="pdig.0000755.g003">Fig 3</xref>). Progression in clinic was determined based on clinical and specifically neurological status, need for symptom management (e.g., use of steroids, recurrence of seizures requiring augmentation or initiation of seizure medication, etc.), and any alterations in patient status from previous functionality. These factors were concurrently considered by a clinical team with imaging alterations in tumor volumes treated with RT. Determination of progression was not the result of a single data modality or a single individual but rather the result of multidisciplinary discussion with consensus being reached after evaluation of all the features, which was then captured as progression in clinical progress notes. The nuances of this discussion are to some extent captured in clinical notes; however, data quantitatively documenting the number of individuals in the discussion (minimally <inline-formula id="pdig.0000755.e004"><alternatives><graphic id="pdig.0000755.e004g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e004.tif" xlink:type="simple"/><mml:math display="inline" id="M4"><mml:mrow><mml:mo>≥</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:math></alternatives></inline-formula> and typically &gt;5) and their level of agreement are not captured. The consensus (agreement) of the group is based on real time application of RANO criteria and manually captured as consensus for progression or stability in this study. Other studies [<xref ref-type="bibr" rid="pdig.0000755.ref027">27</xref>] have cited disconnects between the method and physical/virtual equipment used for ground truth labeling in clinical practice versus ML data annotation. Thus, to avoid this limitation, clinical standard RANO criteria were applied in the exact clinical context using the same equipment and software that providers used when treating patients.</p>
<fig id="pdig.0000755.g003" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0000755.g003</object-id><label>Fig 3</label><caption><title>Paradigm for manual and automated methods to derive progression free survival.</title></caption>
<graphic mimetype="image" position="float" xlink:href="pdig.0000755.g003.tif" xlink:type="simple"/></fig>
</sec>
<sec id="sec017">
<title>Corticosteroid administration analysis</title>
<p>All available prescriptions throughout a patient’s medical history were queried from the NIH Integrated Data Analysis Platform (NIDAP). Trends in prescription types, frequencies, doses, and sequences were analyzed. Prescriptions matching the generic key word ‘dexamethasone’ and associated brand names of any dosage and any administration route were selected for further analysis. Since GBM standard of care involves prescribing steroids immediately after surgery and CRT, analyses were limited to prescriptions dated 1 month after the end of CRT. Since steroids are prescribed on a tapering schedule, the first date of the largest dose prescription was followed continuously until the last date of the smallest prescription to determine the window of steroid tapering. During all subsequent analyses, this window was treated as a single course of steroids post-CRT.</p>
<p>The first date of the post-CRT steroids course was compared to the manually obtained clinical standard progression date. The number of prescriptions and months after treatment completion were compared to year of treatment to identify any department-level changes in prescription practices over time.</p>
</sec>
<sec id="sec018">
<title>Natural language processing of radiology reports</title>
<p>All available medical documents throughout the patient’s medical history were pulled from the integrated data framework. Free text document analyses were limited to brain MRI radiology reports.</p>
<p>Documents dated prior to the end date of a patient’s radiation therapy course were dropped to maintain consistency between variable-length patient histories. Document text was pre-processed to standardize paragraph formatting and spacing. The open-source <ext-link ext-link-type="uri" xlink:href="https://spacy.io/Python" xlink:type="simple">https://spacy.io/Python</ext-link> spaCy package was used to perform standard natural language processing tasks including part of speech tagging, lemmatization, and dependency parsing (<xref ref-type="fig" rid="pdig.0000755.g003">Fig 3</xref>). The add-on <ext-link ext-link-type="uri" xlink:href="https://github.com/medspacy/medspacymedspaCy" xlink:type="simple">https://github.com/medspacy/medspacymedspaCy</ext-link> package was used for further handling of medical context and document section parsing [<xref ref-type="bibr" rid="pdig.0000755.ref078">78</xref>].</p>
<p>Overall trends in word frequencies were analyzed within and across reports. We annotated the clinical standard RANO criteria for verbs and adjectives related to both progression and stability, as described in [<xref ref-type="bibr" rid="pdig.0000755.ref079">79</xref>]. A trained clinician also viewed the descending term frequency list obtained from an aggregate of reports (<xref ref-type="supplementary-material" rid="pdig.0000755.s004">S3a Fig</xref>) and sorted terms potentially relevant to determining progression criteria into either progression or stability categories, similar to the method employed by [<xref ref-type="bibr" rid="pdig.0000755.ref080">80</xref>]. Using clinical standard RANO criteria and these observed frequency trends, we created a list of words hypothesized to indicate either progression or stability (listed in <xref ref-type="supplementary-material" rid="pdig.0000755.s002">S1 Table</xref>).</p>
<p>Rule-based matchers with these term lists were created to search and tag any lemmatized instances of progression- and stability-related tokens within each document.</p>
<p>The medspaCy extension package was used to identify and handle contextual modifiers of these key terms, including negations and familial, historical, and hypothetical mentions. A custom ‘surgical’ contextual pipeline was constructed to match any tokens modified by surgical or postoperative terms to separate out tumor changes related to post-surgical effects of treatment. The ‘negation’ contextual pipeline was also expanded to include other terms commonly indicating no change in clinical practice given the high likelihood of radiology reports to indicate stability as a lack of positive findings (<xref ref-type="supplementary-material" rid="pdig.0000755.s002">S1 Table</xref>).</p>
<p>Each patient document was processed via the custom spaCy and medspaCy NLP pipeline implementation, and progression- and stability-related terms were extracted and categorized per document. Progression terms modified by negated or historical contextual terms in the document were re-categorized as ‘stability’ terms. Progression terms modified by surgical context were dropped from the progression category term list due to their high likelihood of indicating <italic>psuedoprogression</italic> as compared to actual progression.</p>
<p>The frequency of progression-related words for a given document was compared to the frequency of stability-related words to determine the overall document status. A higher frequency of progression-related words indicated overall progression within the document. If the number of progression-related terms equaled the number of stability-related terms, then surgical-context modified terms were included in the analysis to provide additional context. Various weightings and thresholds for obtaining a report-level determination from each term categories were tested. We also tested various approaches to using RANO criteria as a proxy for report level ground truth (e.g., selecting all reports within a time window of clinician ground truth); however, given the goal of independently testing results derived from various data modalities, we wanted to avoid using the results of one modality to optimize or constrain the predictions of another modality (e.g., using manual ground truth to optimize weights for the NLP-based methods). Thus, given a lack of report-level ground truth and publicly available reports for validation tests, a one-to-one weighting was ultimately selected in this study. This weighting was selected with the goal of testing a rule that could be straightforwardly communicated to clinicians and with acknowledgment that alternate approaches should be evaluated and optimized in future work.</p>
<p>Report-derived progression dates were obtained by selecting the date of the first report that indicated progression overall based on the term frequency formulas described above. These report-derived progression dates were compared to both manually-obtained clinical standard dates and to other data-derived progression methods.</p>
</sec>
<sec id="sec019">
<title>Computer vision analysis of MRI scans</title>
<p>All available brain MRI imaging throughout the patient’s medical history were pulled from the integrated framework. Only patients with at least two post-RT scans were included. The following 3T MRI sequences were acquired: T1-weighted pre-contrast, T1-weighted post-contrast, T2-weighted, and T2-weighted fluid-attenuated inversion recovery (FLAIR). The complete methods for deriving the brain MRI volumes are further detailed and published in [<xref ref-type="bibr" rid="pdig.0000755.ref081">81</xref>]. The tumor segmentation pipeline classified four tissue types: 1) background, 2) contrast-enhancing tumor, 3) non-contrast-enhancing tumor, and 4) edema.</p>
<p>Given that current clinical standard RANO criteria involve observing a 25% increase in contrast-enhancing lesions to indicate progression [<xref ref-type="bibr" rid="pdig.0000755.ref002">2</xref>], we chose to limit our analysis of relative volume changes to <bold>contrast-enhancing tumor</bold>. Volumetric changes were calculated by dividing a given scan volume over the volume from the initial reference or baseline brain MRI scan available post-surgery but pre-CRT intervention <xref ref-type="disp-formula" rid="pdig.0000755.e006">Eq 1</xref>. To ensure adequate capture of alteration in contrast enhancement for patients with both large and small tumor volumes while also avoiding false positives created by small segmentation errors, we elected to treat a <inline-formula id="pdig.0000755.e005"><alternatives><graphic id="pdig.0000755.e005g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e005.tif" xlink:type="simple"/><mml:math display="inline" id="M5"><mml:mrow><mml:mo>≥</mml:mo><mml:mn>5</mml:mn><mml:mi>%</mml:mi></mml:mrow></mml:math></alternatives></inline-formula> increase in volume as an indication of progression.</p>
<disp-formula id="pdig.0000755.e006"><alternatives><graphic id="pdig.0000755.e006g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e006.tif" xlink:type="simple"/><mml:math display="block" id="M6"><mml:mrow><mml:mrow><mml:mstyle displaystyle="false" scriptlevel="0"><mml:mtext>relative volumetric change</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mstyle displaystyle="false" scriptlevel="0"><mml:mtext>post-CRT contrast enhancing tumor volume</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle displaystyle="false" scriptlevel="0"><mml:mtext>baseline contrast enhancing tumor volume</mml:mtext></mml:mstyle></mml:mrow></mml:mfrac></mml:mrow></mml:mrow></mml:math></alternatives> <label>(1)</label></disp-formula>
<p>Imaging-derived progression dates were obtained by selecting the earliest date of scans with a <inline-formula id="pdig.0000755.e007"><alternatives><graphic id="pdig.0000755.e007g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e007.tif" xlink:type="simple"/><mml:math display="inline" id="M7"><mml:mrow><mml:mo>≥</mml:mo><mml:mn>5</mml:mn><mml:mi>%</mml:mi></mml:mrow></mml:math></alternatives></inline-formula> relative increase in contrast-enhancing tumor volume. These image-derived progression dates were compared to manually-obtained clinical standard dates and compared to other data-derived progression methods.</p>
</sec>
<sec id="sec020">
<title>Comparative analysis</title>
<p>The data-derived progression methods were aggregated by patient for overall comparison and analyzed for statistically significant differences in the overall distributions and individual differences between data-derived dates. Given that not every patient met the criteria for progression under each progression method, many of these comparisons reflected a smaller subset of the overall cohort.</p>
<p>Non-parametric statistics were used to compare progression timeline dates given that the normality assumption for the progression date distributions was violated (i.e., very long-term survivors lead to a right-skewed distribution as seen in <xref ref-type="fig" rid="pdig.0000755.g004">Fig 4a</xref>). The input data was the calculated PFS (in months) and the dependent variable was the method used to obtain the calculated PFS metric.</p>
<fig id="pdig.0000755.g004" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0000755.g004</object-id><label>Fig 4</label><caption><title>a) Boxplot and b) scatterplot distributions of manual and data-derived progression free survival dates.</title> <p>The dark green line represents the clinical standard PFS dates, with points falling above the dark green line indicating that the automated method derived an earlier PFS date compared to the clinical standard and points falling below indicating that the method derived a later PFS date. The light blue, dark blue, and light green trendlines reflect the Ordinary Least Squares linear regression for the radiology report, MRI scan, and prescription methods, respectively.</p></caption>
<graphic mimetype="image" position="float" xlink:href="pdig.0000755.g004.tif" xlink:type="simple"/></fig>
<p>The Kruskal-Wallis Test was used to examine differences in datapoint progression timelines. The data met the test criteria as the observed PFS metrics (i.e., number of months) were continuous, the methods to obtain each PFS metric were not dependent on each other, and each method contained a sufficiently large number of positive observations.</p>
<p>The Wilcoxon signed-rank test with Bonferroni correction was used for pairwise comparisons between different datapoint timelines. The data met the test criteria as observations were 1) not normally distributed (<xref ref-type="fig" rid="pdig.0000755.g004">Fig 4a</xref>), 2) dependent or naturally paired samples (i.e., each method calculated a PFS metric for the same given patient), and 3) independent from other pairs (i.e., metrics were calculated for each patient separately).</p>
</sec>
</sec>
<sec id="sec021" sec-type="results">
<title>Results</title>
<p>While the brain malignancy cohort receiving treatment at the National Institutes of Health (NIH) was around 423 patients, this analysis required integration of data from various sources. 331 patients were excluded for lacking either a confirmed GBM diagnosis or at least one instance of each EHR data modality queried in this paper. Ultimately, all four types of data were available for 92 patients receiving treatment between 2004-2023 at the NIH.</p>
<sec id="sec022">
<title>Manual clinical standard</title>
<p>Following manual determination of patient progression using RANO criteria with MRI report and clinical exam review, 99% (n=91) of patients experienced tumor progression. These patients progressed an average 404 days or 13 months (stddev: 20.9 months) after the end of their last day of RT (<xref ref-type="table" rid="pdig.0000755.t001">Table 1</xref>).</p>
<table-wrap id="pdig.0000755.t001" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0000755.t001</object-id><label>Table 1</label><caption><title>Descriptive statistics for a) manual and automated methods to derive PFS dates and b) relative differences between the manual PFS method and each automated PFS datapoint. PFS statistics are reported in months. Negative statistics indicate that the automated PFS date occurred prior to the manual PFS date.</title></caption>
<alternatives><graphic id="pdig.0000755.t001g" mimetype="image" position="float" xlink:href="pdig.0000755.t001.tif" xlink:type="simple"/><table><colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="left"/>
<th align="left">Clinical standard</th>
<th align="left">Prescription</th>
<th align="left">Radiology report</th>
<th align="left">MRI scan</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">a)</td>
<td align="left"><bold>% progressed</bold></td>
<td align="left">99% (n=91)</td>
<td align="left">63% (n=58)</td>
<td align="left">79% (n=73)</td>
<td align="left">54% (n=50)</td>
</tr>
<tr>
<td align="left"/>
<td align="left"><bold>mean PFS</bold></td>
<td align="left">13.3</td>
<td align="left">17.9</td>
<td align="left">8.0</td>
<td align="left">6.2</td>
</tr>
<tr>
<td align="left"/>
<td align="left"><bold>std dev</bold></td>
<td align="left">20.9</td>
<td align="left">18.8</td>
<td align="left">13.2</td>
<td align="left">11.6</td>
</tr>
<tr>
<td align="left"/>
<td align="left"><bold>median PFS</bold></td>
<td align="left">6.5</td>
<td align="left">11.8</td>
<td align="left">3.6</td>
<td align="left">1.9</td>
</tr>
<tr>
<td align="left"/>
<td align="left"><bold>range PFS</bold></td>
<td align="left">0-137.1</td>
<td align="left">1-84.8</td>
<td align="left">0.3-73.8</td>
<td align="left">0.6-55.5</td>
</tr>
<tr>
<td align="left"/>
<td align="left"/>
<td align="left"><bold>Prescription</bold></td>
<td align="left"><bold>Radiology report</bold></td>
<td align="left"><bold>MRI scan</bold></td>
<td align="left"/>
</tr>
<tr>
<td align="left"/>
<td align="left"><bold>mean PFS difference</bold></td>
<td align="left">4.5</td>
<td align="left">-6.9</td>
<td align="left">-2.6</td>
<td align="left"/>
</tr>
<tr>
<td align="left">b)</td>
<td align="left"><bold>std dev</bold></td>
<td align="left">8.3</td>
<td align="left">19.2</td>
<td align="left">5.8</td>
<td align="left"/>
</tr>
<tr>
<td align="left"/>
<td align="left"><bold>median PFS difference</bold></td>
<td align="left">2.5</td>
<td align="left">-1.6</td>
<td align="left">-0.03</td>
<td align="left"/>
</tr>
<tr>
<td align="left"/>
<td align="left"><bold>range PFS difference</bold></td>
<td align="left">-6.1-33.8</td>
<td align="left">-103.0-14.9</td>
<td align="left">-33.5-2.1</td>
<td align="left"/>
</tr>
</tbody>
</table>
</alternatives></table-wrap>
<p>There was no association observed between the date of treatment received and length of clinical standard progression free survival timelines (<italic>R</italic><sup>2</sup> = 0.0,<italic>F</italic>(1,89) = 0.0004110,<italic>p</italic> = .865) (<xref ref-type="supplementary-material" rid="pdig.0000755.s001">S1 Fig</xref>).</p>
</sec>
<sec id="sec023">
<title>Corticosteroid prescription analysis</title>
<p>23928 total prescription orders across the entire medical history of 92 patients were identified. 223 or 0.9% of these prescriptions across 58 patients were specifically for dexamethasone. Given the need for a tapering schedule for dexamethasone, patients often received multiple prescriptions of varying doses for a given “course” of steroids (<xref ref-type="fig" rid="pdig.0000755.g005">Fig 5a</xref>). In comparison, 91 (99%) patients were identified as having progressed via the manual clinical standard method. The median date of these steroid prescriptions were 11.8 months after end of radiotherapy (stddev: 18.8 months). When compared to a given ground truth progression date for a patient, post-radiotherapy steroid prescriptions occurred an average of 4.5 months (range -6 to 34 months, median 2.5 months, stddev 8.3 months) after clinical standard progression dates.</p>
<fig id="pdig.0000755.g005" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0000755.g005</object-id><label>Fig 5</label><caption><title>Patient timelines and progression results for available a) steroid prescriptions, b) radiology reports, and c) brain MRI scans.</title> <p>c) Red and blue points indicate scans with a relative increase and decrease, respectively, in contrast-enhancing tumor volumes compared to the baseline post-surgery, pre-RT scan.</p></caption>
<graphic mimetype="image" position="float" xlink:href="pdig.0000755.g005.tif" xlink:type="simple"/></fig>
</sec>
<sec id="sec024">
<title>Natural language processing of radiology reports</title>
<p>1993 available radiology reports across 92 patients were identified. 1862 documents were dated on or after the patient’s diagnosis date, and 1677 documents were dated after a patient received CRT. Within reports dated after a patient’s diagnosis, the most common document types included “MRI BRAIN-Perfusion (IP)” <inline-formula id="pdig.0000755.e008"><alternatives><graphic id="pdig.0000755.e008g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e008.tif" xlink:type="simple"/><mml:math display="inline" id="M8"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>1245</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>, “IP Perfusion” <inline-formula id="pdig.0000755.e009"><alternatives><graphic id="pdig.0000755.e009g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e009.tif" xlink:type="simple"/><mml:math display="inline" id="M9"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>90</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>, “CT Cerebrum” <inline-formula id="pdig.0000755.e010"><alternatives><graphic id="pdig.0000755.e010g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e010.tif" xlink:type="simple"/><mml:math display="inline" id="M10"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>97</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>, and “DX Chest - PA + Lat” <inline-formula id="pdig.0000755.e011"><alternatives><graphic id="pdig.0000755.e011g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e011.tif" xlink:type="simple"/><mml:math display="inline" id="M11"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>69</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>. Only reports including brain MRI results after a patient’s diagnosis date were selected for further analysis.</p>
<p>A total of 1243 brain MRI radiology reports dated after treatment completion were available across 92 patients, with an average document length of 347 words. Frequency analysis indicated that the most common disease-relevant terms used in these reports included ‘enhancement’, ‘perfusion’, ‘enhancing’, ‘increased’, ‘tumor’, and ‘abnormal.’ In consultation with RANO criteria and frequency analysis of these documents, a list of words conceptually related to progression and stability were generated and used to write matcher rules for NLP-based text analysis. Using these lists, documents were analyzed for terms mentioned on either list (<xref ref-type="supplementary-material" rid="pdig.0000755.s004">S3a Fig</xref>).</p>
<p>Terms indicating stability were mentioned 2641 times across 1243 documents, while words indicating progression were mentioned 1233 times (<xref ref-type="supplementary-material" rid="pdig.0000755.s004">S3a Fig</xref>). 803 additional terms were related to progression but modified by surgical context. Documents often contained terms pertaining to multiple categories. 70% of documents contained at least one term related to ‘stable,’ and 35% of documents contained a term relating to both ‘stable’ and ‘progression.’ 11% of documents contained terms relating to ‘stable,’ ‘progression,’ and ‘surgical progression’ simultaneously.</p>
<p>After applying ‘progression’ and ‘stable’ category term-frequency formulas to each report, overall progression was identified in 222 reports (18% of post-RT reports) belonging to 73 patients (80%) (<xref ref-type="fig" rid="pdig.0000755.g005">Fig 5b</xref>). The average date of these first progression reports were 8 months after end of radiotherapy (stddev: 13 months). When compared to a given ground truth progression date for a patient, radiology reports indicating progression occurred an average of 6.9 months (range -103 to 14.9 months, median -1.6 months, stddev 19 months) prior to clinical standard progression dates.</p>
</sec>
<sec id="sec025">
<title>Computer vision analysis of MRI scans</title>
<p>A total of 743 scans were available across all 92 patients following surgery. On average, edema was the largest identified volume, followed by non-contrast-enhancing tumor and contrast-enhancing tumor (<xref ref-type="supplementary-material" rid="pdig.0000755.s006">S2 Table</xref>). Total tumor, defined as the sum between non-contrast-enhancing and contrast-enhancing tumor, displayed a right-tailed distribution of values with large variation in the fourth quartile (<xref ref-type="supplementary-material" rid="pdig.0000755.s005">S4 Fig</xref>). Total burden, defined as the sum between total tumor and edema volumes, reflected a wider range of scan-level volumes.</p>
<p>To identify scans that indicated progression from a pre-RT baseline scan, the relative change in <bold>contrast-enhancing tumor</bold> was calculated between each baseline scan and subsequent follow-up scan. 134 (23%) scans across 52 (57%) patients indicated <italic>any</italic> increase in contrast-enhancing tumor volume from an initial baseline scan and 125 scans across 50 (54%) patients exhibited a <inline-formula id="pdig.0000755.e012"><alternatives><graphic id="pdig.0000755.e012g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e012.tif" xlink:type="simple"/><mml:math display="inline" id="M12"><mml:mrow><mml:mo>≥</mml:mo><mml:mi>%</mml:mi><mml:mn>5</mml:mn></mml:mrow></mml:math></alternatives></inline-formula> increase. Given the wide range in patient brain volumes and volume changes, <xref ref-type="fig" rid="pdig.0000755.g005">Fig 5c</xref> visualizes the logarithmic relative slope change in contrast-enhancing tumor for all available patient scans over time.</p>
<p>The average dates of the first progression-indicating scans were 6.2 months after end of radiotherapy (stddev: 11.6 months). When compared to a given ground truth progression date for a patient, scans with at least 5% increasing contrast-enhancing lesions occurred an average of 2.6 months (range -33 to 2.1 months, median -.03 months, stddev 5.8 months) prior to clinical standard progression dates.</p>
</sec>
<sec id="sec026">
<title>Comparative analysis</title>
<p>An average of 2.4 steroid prescriptions, 2.4 progression-indicating radiology reports, and 1.4 progression-indicating MRI brain scans were available per patient. The total months to first progression-indicating datapoint were compiled for each method in <xref ref-type="table" rid="pdig.0000755.t001">Table 1a</xref> and the relative time span compared to clinical standard were calculated for the three automated progression data methods in <xref ref-type="table" rid="pdig.0000755.t001">Table 1b</xref>.</p>
<p>There were significant differences observed between the four methods of determining progression for patients that progressed via all four methods <inline-formula id="pdig.0000755.e013"><alternatives><graphic id="pdig.0000755.e013g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e013.tif" xlink:type="simple"/><mml:math display="inline" id="M13"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>χ</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>=</mml:mo><mml:mn>39.7</mml:mn><mml:mo>,</mml:mo><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>1.2</mml:mn><mml:mi>e</mml:mi><mml:mspace width="0.167em"/><mml:mrow><mml:mo>−</mml:mo></mml:mrow><mml:mspace width="0.167em"/><mml:mn>8</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>. Post hoc pairwise comparisons showed significant differences between the clinical standard progression timelines and those obtained from scans with relative <inline-formula id="pdig.0000755.e014"><alternatives><graphic id="pdig.0000755.e014g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e014.tif" xlink:type="simple"/><mml:math display="inline" id="M14"><mml:mrow><mml:mo>≥</mml:mo><mml:mn>5</mml:mn><mml:mi>%</mml:mi></mml:mrow></mml:math></alternatives></inline-formula> increases in contrast-enhancing tumor volumes <inline-formula id="pdig.0000755.e015"><alternatives><graphic id="pdig.0000755.e015g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e015.tif" xlink:type="simple"/><mml:math display="inline" id="M15"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:mn>133.0</mml:mn><mml:mo>,</mml:mo><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>5.7</mml:mn><mml:mi>e</mml:mi><mml:mspace width="0.167em"/><mml:mrow><mml:mo>−</mml:mo></mml:mrow><mml:mspace width="0.167em"/><mml:mn>4</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>, steroid prescriptions <inline-formula id="pdig.0000755.e016"><alternatives><graphic id="pdig.0000755.e016g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e016.tif" xlink:type="simple"/><mml:math display="inline" id="M16"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:mn>234.5</mml:mn><mml:mo>,</mml:mo><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>2.555</mml:mn><mml:mi>e</mml:mi><mml:mspace width="0.167em"/><mml:mrow><mml:mo>−</mml:mo></mml:mrow><mml:mspace width="0.167em"/><mml:mn>6</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>, and radiology reports <inline-formula id="pdig.0000755.e017"><alternatives><graphic id="pdig.0000755.e017g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e017.tif" xlink:type="simple"/><mml:math display="inline" id="M17"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:mn>672.5</mml:mn><mml:mo>,</mml:mo><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mi>.002</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula> after Bonferroni correction. Progression dates derived from reports were significantly different from those derived from steroids <inline-formula id="pdig.0000755.e018"><alternatives><graphic id="pdig.0000755.e018g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e018.tif" xlink:type="simple"/><mml:math display="inline" id="M18"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:mn>136.5</mml:mn><mml:mo>,</mml:mo><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>2.2</mml:mn><mml:mi>e</mml:mi><mml:mspace width="0.167em"/><mml:mrow><mml:mo>−</mml:mo></mml:mrow><mml:mspace width="0.167em"/><mml:mn>06</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula> but not scans <inline-formula id="pdig.0000755.e019"><alternatives><graphic id="pdig.0000755.e019g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e019.tif" xlink:type="simple"/><mml:math display="inline" id="M19"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:mn>281.0</mml:mn><mml:mo>,</mml:mo><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mi>.418</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>. All but one scan progression date occurred earlier than the respective steroid prescriptions for patients with both datapoints available <inline-formula id="pdig.0000755.e020"><alternatives><graphic id="pdig.0000755.e020g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e020.tif" xlink:type="simple"/><mml:math display="inline" id="M20"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:mn>12.0</mml:mn><mml:mo>,</mml:mo><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>8.1</mml:mn><mml:mi>e</mml:mi><mml:mspace width="0.167em"/><mml:mrow><mml:mo>−</mml:mo></mml:mrow><mml:mspace width="0.167em"/><mml:mn>09</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>.</p>
<p>Compared to the clinical standard method that identified progression in 99% (n = 91) of patients, the report NLP indicated the highest number of recurrent patients (n = 72), followed by steroid prescription analysis (n = 58), and lastly volumetric-based analysis of scans (n = 50) (<xref ref-type="fig" rid="pdig.0000755.g006">Fig 6</xref>). The data modality that came closest to the clinical standard progression dates was steroids (avg 4.5 months later), followed by volumetric-based scan analysis (avg 2.6 months earlier), and then report-based NLP (avg 6.9 months earlier).</p>
<fig id="pdig.0000755.g006" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0000755.g006</object-id><label>Fig 6</label><caption><title>Progression-indicating datapoints for studied patient cohort.</title></caption>
<graphic mimetype="image" position="float" xlink:href="pdig.0000755.g006.tif" xlink:type="simple"/></fig>
<p><xref ref-type="fig" rid="pdig.0000755.g004">Fig 4</xref> visualizes the boxplot and scatterplot distribution of progression dates for each method. The available data-derived progression dates were within 2 months of the clinical standard progression dates for 36% of report-derived dates, 66% of scan-derived dates, and 36% of steroid-derived dates.</p>
</sec>
</sec>
<sec id="sec027" sec-type="conclusions">
<title>Discussion</title>
<p>We compare results between manual and different data-driven and/or machine learning methods to capture progression events using diverse data modalities within an integrated patient data framework.</p>
<sec id="sec028">
<title>Clinical standard</title>
<p>RANO criteria is the current standard for determining progression for a given brain malignancy patient. Standard clinical application of RANO criteria involves review of multiple sources of medical data available to a specialized or skilled clinician. This process can be disrupted without complete compilation of scans, radiology reports, progress notes, and other clinical context over long periods and potentially across multiple medical institutions. Manual review of patient charts is also time consuming and labor-intensive. However, expert clinicians hold rich domain knowledge and can incorporate additional context and judgment available in clinic or during dual review of other chart elements.</p>
<p>Readers during the study noted difficulty in making definitive progression determinations during many patient cases. For example, sometimes progression would be indicated within a specific radiology report despite no changes made in a patient’s treatment protocol. This indicated that another clinician likely chose to follow-up and/or wait for further information before adjusting or changing their treatment approach. These results highlighted that the clinical standard manual approach of determining progression mixes objective factors, such as the appearance of new lesions, with other subjective factors such as worsening of neurological symptoms. Thus, there is potential for high variability in RANO judgments between clinicians and between patients even when using the same sources of data. These variable factors could impact results during patient care and data analysis, especially if PFS dates are shared in public data sets without reviewable context on the RANO criteria decision.</p>
</sec>
<sec id="sec029">
<title>Corticosteroid prescription analysis</title>
<p>Post-radiotherapy steroid prescriptions may provide context about a patient’s disease management that allow clinicians and researchers to further probe for progression evidence. However, in this paper, corticosteroid prescription analysis identified fewer numbers of patients as experiencing progression overall compared to the other manual and data-driven methods. This could be due to patients receiving care management from outside providers after completion of treatment at our center, highlighting barriers that remain within an integrated data framework approach. Conversely, it is important to be cautious when using this method as steroids can be prescribed for non-progression-related reasons, including post-surgical changes. Given that treatment protocols may vary from center to center, it may be appropriate to adjust the date periods in which steroid prescriptions are filtered after surgery and radiotherapy. Thus, steroid-driven progression analysis may include both false positive and false negative errors due to inclusion of non-progression and exclusion of progression-related steroid prescriptions.</p>
<p>This method requires access to patient prescriptions, and can be done with simple data analysis techniques using tabular format data. Moreover, given the finite and structured nature of prescription EHR data, this method required the least amount of data preparation and cleaning. Review of steroid prescriptions also does not necessarily require a specialized clinical expert to query or review the data. As a result, this method may be more straightforward and accessible to non-oncologists researching PFS outcomes in patient cohorts.</p>
<p>However, given that steroids are commonly prescribed for neurological symptoms associated with radiotherapy treatment, it is important to acknowledge that a prescription database may not actually reflect real-world patient medication schedules. It is not uncommon for providers to adjust their dosage and recommendations to patients based on their symptoms after receiving a given prescription. We observed high variability in prescription doses, frequencies, and types of administration in this study (<xref ref-type="fig" rid="pdig.0000755.g005">Fig 5c</xref>). Given that steroids are also generally prescribed during RT, this prescription data method of determining progression may identify later progression dates if patients hold onto a previous dose of steroids and administer them later on. This disconnect between digital data and real-world behavior remains an issue across multiple areas of clinical research.</p>
</sec>
<sec id="sec030">
<title>Natural language processing of radiology reports</title>
<p>Ultimately, the rule-based NLP method identified the most number of patients as having progressed in the cohort. While the method displayed the furthest date difference from the clinical standard method, it was also the only method to identify progression in patients with very long stable disease (&gt;100 months) (<xref ref-type="supplementary-material" rid="pdig.0000755.s003">S2 Fig</xref>). These results suggest potential overall benefits from deploying an NLP method, but with a need for further algorithmic design and parameter tuning if close clinical correlation is desired.</p>
<p>The rule-based NLP approach employed in this paper provided a summary of progression-related terms and the context in which they were mentioned for each available report. We opted for a rule-based implementation over other large-scale language models in order to employ a simple, reproducible framework that could be deployed locally. The rule-based approach was also selected to provide improved decision interpretability and reviewability, as the custom progression- and stability-related term matchers allowed researchers to verify progression evidence over the entire course of medical history and seek further context within the original report, if desired (<xref ref-type="supplementary-material" rid="pdig.0000755.s004">S3b Fig</xref>). This method could be embedded into real-world practice where an interested clinician or researcher is provided with an overall graphic interpretation of a patient’s medical history based on these key terms, with the ability to further investigate the actual free text and associated results for time periods of interest. Further research is needed to develop appropriate tutorials for expert users of these systems and evaluate various approaches to report term weighting, evidence presentation, and overall method interpretability in practice.</p>
<p>In order to translate these progression-related terms into a report-level judgment, we weighed terms indicating progression against terms indicating stability or surgical changes. To avoid calling progression too early given the wide range of clinical standard progression patient timelines, we also decided not to weight mentions of progression within report more strongly than mentions of stability or surgical changes. Given high likelihoods of surgical changes being correlated with <italic>pseudoprogression</italic>, we also chose to handle progression-related changes in surgical cavities as indicating “stability” for a patient. The net effect of these choices resulted in a fewer subset of patients in the overall cohort having a report indicating progression.</p>
<p>There was likely some tradeoff in implementing stricter linguistic criteria, as identifying a first date of progression later in long-term stable patients likely came at the expense of identifying progression at all in short-term progressors. It is worth noting that these term formulas could be manipulated in different contexts to give more weight to terms indicating progression versus stability or surgical changes, or a specific subset of terms within each overall category. Adjustments to these formulas may have the net effect of identifying a higher or lower number of progressed patients and/or adjusting the timelines in which patient progression is identified via radiology report. These decisions require judgement as to a preference for high sensitivity or specificity, and the impact of a false positive or false negative may change based on the context that progression data is deployed. Future studies may explore other NLP approaches to mine radiology reports for progression evidence, including the use of large language models (LLMs), algorithms trained with document-level “ground truth” labels for overall progression and stability, and evaluation of the area under the receiver operating curve in order to determine optimal formula weighting and thresholds.</p>
</sec>
<sec id="sec031">
<title>Computer vision analysis of MRI scans</title>
<p>Imaging reflects a patient’s real-time disease state and can be used in the clinic to guide treatment decisions for a given patient. Radiomic algorithms may provide increased quantitative evidence for decisions in the clinic, as volumetric parameters may be difficult to estimate in practice given the limitations of viewing only two dimensions of a 3D scan slice at any point in time. Human intuitions about volumetric imaging can be subject to errors due to differences in search techniques and cognitive load [<xref ref-type="bibr" rid="pdig.0000755.ref082">82</xref>]. The ability to identify regions of contrast-enhancing tumor, non-contrast-enhancing tumor, and edema in a scan closely reflects current clinical imaging practices dictated by RANO progression criteria.</p>
<p>In this paper, we set out to examine the influence of increases in <bold>contrast-enhancing tumor</bold> regions given the independence of growth in relation to steroids and its inclusion within RANO criteria. Perhaps surprisingly, only around half of patients actually progressed by definition of a <inline-formula id="pdig.0000755.e021"><alternatives><graphic id="pdig.0000755.e021g" mimetype="image" position="anchor" xlink:href="pdig.0000755.e021.tif" xlink:type="simple"/><mml:math display="inline" id="M21"><mml:mrow><mml:mo>≥</mml:mo><mml:mn>5</mml:mn><mml:mi>%</mml:mi></mml:mrow></mml:math></alternatives></inline-formula> increase in contrast-enhancing tumor despite a majority of patients progressing by manual clinical standards. Our findings align with Kickingereder <italic>et al</italic>., who also observed reduced patient progression rates when comparing increases in contrast-enhancing tumor volumes to manual RANO assessments [<xref ref-type="bibr" rid="pdig.0000755.ref019">19</xref>]. This suggests practical differences between the way that RANO criteria are implemented in clinic and how contrast-enhancing tumors manifest on imaging, both volumetrically and perceptually. Given that all but one patients progressed under the manual clinical standard criteria, our scan-based progression findings indicate that clinicians may be overestimating the growth of tumor volumes on scans, or that they are often using other RANO criteria, including worsening clinical symptoms, to determine progression. These results suggest a gap between the underlying logic of RANO criteria and how the clinical principles are applied in practice.</p>
</sec>
<sec id="sec032">
<title>Comparative analysis</title>
<p>Defining tumor progression is a critical, yet imperfect challenge in cancer management and treatment. The ability to “objectively” determine progression is limited by complex, poorly understood cancer biology and tumor proliferation mechanisms. As a result, any attempt to determine tumor progression within a patient will amount to an imperfect proxy of the underlying ground truth state. Given diverse motivations to study tumor progression, the ideal definition and data points of interest will likely shift between audiences.</p>
<p>Overall, automating progression from only one type of EHR data often resulted in an earlier progression date compared to the manually determined ground truth. If these automated PFS metrics were consulted during clinical treatment, this could result in earlier implementation of more drastic interventions, such as potential re-irradiation or initiation of other therapeutic agents. If the same methods were implemented retroactively during data analysis, earlier progression dates would imply that certain subpopulations of patients had more aggressive disease.</p>
<p>Given that the scan-based progression method identified almost all progressed patients earlier than by clinical criteria, but also identified the fewest number of patients experiencing any progressed, the scan-based method demonstrated a propensity to commit both false positive and negative errors. This indicates that the CV method may benefit from a more nuanced definition of progression, such as taking into account the initial tumor volume size, or incorporating factors of non-contrast-enhancing tumor tissue or edema into future scan-based progression methods.</p>
<p>The report-based method also identified patients as progressing earlier, with the most number of patients being identified compared to the other automated data-driven methods. Given the rule-based nature of the NLP method deployed in this paper, report-level decisions could be adjusted based on the disease aspects most relevant to a given research team. Thus, these results indicate the distribution of outcomes from an automated endpoint extraction framework can be shaped by both data source and algorithmic design.</p>
</sec>
<sec id="sec033">
<title>Sociotechnical considerations</title>
<p>Human patient and clinician behavior may interact with the design of information systems to shape the process of ground truth construction and extraction of outcome endpoints from EHR data.</p>
<sec id="sec034">
<title>Changing practices over time.</title>
<p>The use of RANO criteria, radiology reports, and steroid prescriptions all reflect attempts to use human behavior as a proxy for a biological process. Human behavior is cataloged into the electronic health record, either by structured fields via medication prescriptions or by unstructured text via radiology reports and progress notes. As a result, these measures can only capture decisions made in the real world and may undergo “dataset shift” [<xref ref-type="bibr" rid="pdig.0000755.ref066">66</xref>] when reflecting medical practices and choices made at the time [<xref ref-type="bibr" rid="pdig.0000755.ref027">27</xref>]. Given the relatively small size of the data set in the study, future studies may opt to analyze changes in term, frequency, and prescription patterns over time.</p>
</sec>
<sec id="sec035">
<title>Tradeoffs between data modalities.</title>
<p>Many patients in the studied cohort had far more radiology reports available for analysis compared to actual imaging scans. This is potentially surprising given that radiology reports are an interpretation of the processed imaging file and thus, are a degree removed away from the original data source. We speculate that the increased accessibility of radiology reports may be due to patient choices in cancer management. If patients are choosing to continue follow-up care at local facilities, it is possible that current data sharing infrastructure better supports the distribution of radiology reports compared to raw or processed imaging files.</p>
<p>Many available, pre-processed images in the study had to be excluded due to poor image quality or inability of the trained CV algorithm to identify appropriate areas of contrast enhancing and non-contrast enhancing tumor. Moreover, the high number of radiology reports that did not correspond to an available scan indicates existing infrastructure challenges in sharing and querying imaging files. Clinicians and researchers looking to automate PFS via quantitative tumor volume parameters may be limited to fewer datapoints in their analysis given the higher processing burdens of imaging. Conversely, researchers may prefer the use of other higher frequency data types to provide a more continuous picture of a patient’s disease [<xref ref-type="bibr" rid="pdig.0000755.ref083">83</xref>].</p>
</sec>
</sec>
<sec id="sec036">
<title>Limitations</title>
<p><italic>Pseudoprogression.</italic> Tumor progression can be difficult to objectively determine for a patient as patients may exhibit signs of pseudoprogression immediately following treatment. When conducting document-level analysis, a patient may demonstrate progression in one scan, a slowing down of progression in a following scan, and then a reversal of slowed progression in the future. This can make it more challenging to rely on a single document to obtain progression data given the importance of context during clinical evaluation. Thus, a framework relying on multiple points of data, such as CV-based volumetric imaging changes, may make it easier to identify between visit changes such as pseudoprogression and stability from a previous progression instance.</p>
<p><italic>Application of RANO criteria.</italic> One limitation of the study could be the application of RANO criteria and its use as a benchmark against other automated methods. While RANO criteria are the current clinical gold standard, their application requires clinical context that may not have been retrospectively queryable within a system’s EHR. The application of the criteria is a subjective, collaborative process during which we did not have access to individual physician datapoints and thus, were not able to report interrater reliability or agreement on the application of RANO criteria. Bulk analysis methods may omit documents that are not available via a queryable framework, such as scanned, faxed, and/or handwritten notes from historical charts. Radiation treatment plan data was not available at the time of analysis, so 80% isodose lines could not be used to verify progression versus pseudoprogression when evaluating growth in the size or number of lesions. However, given that most radiologists do not have access to this data either, this limitation closely mirrors and reflects real world practice. Future studies may incorporate non-digital documents and radiation treatment plan data to evaluate the extent to which progression versus pseudoprogression is actually captured by observing changes within and outside of the isodose lines.</p>
<p><italic>Report-level ground truth for NLP.</italic> We were constrained by time- and expert-related resources in obtaining report-level ground truth for the nearly 2,000 radiology reports analyzed in this study. Given that the treatment response and disease progression timeline can vary greatly between patients (e.g., one patient demonstrating no change consistently until a given scan indicates a significant change vs. another patient with alternating periods punctuated by slow change and stability), we were not able to identify a satisfactory proxy in determining the overall evidence for progression or stability in a given report. As a result, we were not able to refine or test our selected one-to-one threshold weighting for terms indicating progression or stability. Future studies could curate (or when possible, employ any newly available public) datasets with report-level ground truth to test and robustly benchmark various rule-based weightings to obtain overall report-level progression determinations.</p>
<p><italic>Inferring behavior from data.</italic> The analysis of post-radiotherapy steroid prescriptions may have been limited by the fact that we only had access to visits conducted at our medical facility. It is possible that patients may have been received medications, scans, and visits from outside providers. Additionally, given varying practices in tapering prescription schedules for steroids, it was difficult to draw finer insights from differences in prescribed doses or lengths of tapering schedules. This reflects challenges of siloed medical data systems and limits the ability of queryable data frameworks to better approximate “ground truth” determinations.</p>
<p><italic>Data cohort.</italic> The collection of data at the NIH may also reflect a more unique context in which patients are diagnosed, treated, and managed for complex diseases. Given that patients are often referred from other centers to the NIH where treatment is not associated with insurance billing, it is difficult to assess the representativeness and generalizability of data sets collected at this institution, compared to the general population of individuals affected by a given disease. To our knowledge, this is the first paper that attempts to collect and contrast different modalities of data in order to obtain a subjective patient outcome, and there are no other publicly available data sets to validate this approach yet. Future studies may try incorporating data available outside the NIH, such as radiology reports authored by different clinicians or insurance billing codes, to validate and probe differences in data sets generated between institutions.</p>
<p><italic>Missing clinical context and accountability.</italic> Some may have valid concerns with non-specialized researchers making progression determinations from the only data that they have available. Further work needs to explore the explainability and interpretability of NLP- and CV-based methods to obtain progression from free-text documents and imaging. There may also be concerns that using single data sources, such as prescriptions or free text documents, may inadvertently result in individuals ignoring relevant information contained in other data modalities. Efforts to improve data sharing and integrated frameworks also need to consider privacy and security concerns when attempting to aggregate large, multi-site sources of data for a given patient.</p>
<p><italic>Single stream analysis.</italic> Lastly, it is worth noting that all of these automated data-derived progression methods focused on using only one type of data, while the multidisciplinary team clinical standard method incorporated multiple data sources in the EHR to manually determine progression. This paper intentionally set out to focus on the abilities and limitations of individual data sources in identifying “ground truth” within a patient’s clinical history timeline. This decision was made to approximate many real-world clinical scenarios where complete, integrated datasets are not available and difficult to compile and curate. However, with these insights in mind, future studies may investigate multimodal learning techniques to provide progression free survival dates based on a totality of available patient data, including late stage fusion, or aggregation, of the individual models developed for this paper.</p>
</sec>
</sec>
<sec id="sec037" sec-type="conclusions">
<title>Conclusion</title>
<p>Progression free survival (PFS) is a critical yet under utilized endpoint during biomarker analysis of various malignancies. The current clinical standard to determine progression within a glioblastoma patient involves the application of RANO criteria, a composite of clinical events and imaging findings, during consultation with a multidisciplinary team. This paper set out to explore the benefits and challenges associated with mining different EHR data modalities and automating the extraction of progression free survival metrics via machine learning algorithms. We developed three separate methods to automatically identify progression within a cohort of 92 glioblastoma patients treated on study at the NIH, including 1) selection of categorical corticosteroid prescriptions, 2) rule-based natural language processing of free text radiology reports, and 3) computer vision-based volumetric analysis of brain MRI scans.</p>
<p>Though all three methods were able to provide a progression date for a majority of the patient cohort, they identified fewer patients as having progressed overall compared to the manual clinical standard. Steroid prescriptions were more likely to identify progression later than the manual clinical standard, while CV-based volumetric scan and NLP-based report analysis identified progression much earlier. Approximately half of analyzed patients did not an increase in tumor volumes, indicating that human intuitions about tumor changes during disease progression may not align with quantified volumetric parameters. Our results suggest that various EHR data modalities can be queried to automate PFS analysis, though algorithm design choices, including data modality and progression parameters, will have downstream impacts on clinical decision making or biomedical analysis. Future research directions may explore the benefits and challenges of integrating multiple EHR data modalities, also known as multi-modal analysis, during automated analysis.</p>
</sec>
<sec id="sec039" sec-type="supplementary-material">
<title>Supporting information</title>
<supplementary-material id="pdig.0000755.s001" mimetype="application/pdf" position="float" xlink:href="pdig.0000755.s001.pdf" xlink:type="simple">
<label>S1 Fig</label>
<caption>
<title>Clinical standard timeline.</title>
<p>Regression indicates that there is no statistical relationship between total PFS and time of treatment.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0000755.s002" mimetype="application/pdf" position="float" xlink:href="pdig.0000755.s002.pdf" xlink:type="simple">
<label>S1 Table</label>
<caption>
<title>Custom NLP pipeline terms.</title>
<p>Terms flagged in the progression and stable category, as well as additional modifier terms added to the negation, historical, and custom surgical contextual pipelines. An asterisk (*) indicates that any stem related to or lemma derived from the term was captured.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0000755.s003" mimetype="application/pdf" position="float" xlink:href="pdig.0000755.s003.pdf" xlink:type="simple">
<label>S2 Fig</label>
<caption>
<title>Progression free survival dates for all data-driven methods by patient.</title>
<p>Patients are listed in descending order of clinical standard PFS.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0000755.s004" mimetype="application/pdf" position="float" xlink:href="pdig.0000755.s004.pdf" xlink:type="simple">
<label>S3 Fig</label>
<caption>
<title>a) Term frequency statistics from local, customized spaCy-based NLP of radiology reports. b) Example term timeline with sentence-level context.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0000755.s005" mimetype="application/pdf" position="float" xlink:href="pdig.0000755.s005.pdf" xlink:type="simple">
<label>S4 Fig</label>
<caption>
<title>Boxplot distributions for volumes extracted from brain MRI scans.</title>
<p>All volumes are reported in <italic>cm</italic><sup>3</sup>. NE tumor = non-contrast-enhancing tumor, CE tumor = constrast-enhancing tumor, Total tumor = NE tumor + CE tumor, Total burden = Total tumor + Edema.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0000755.s006" mimetype="application/pdf" position="float" xlink:href="pdig.0000755.s006.pdf" xlink:type="simple">
<label>S2 Table</label>
<caption>
<title>Descriptive summary statistics for CV-computed tumor volumes across all available MRI brain scans.</title>
<p>All volumes are reported in <italic>cm</italic><sup>3</sup>. NE tumor = non-contrast-enhancing tumor, CE tumor = constrast-enhancing tumor, Total tumor = NE tumor + CE tumor, Total burden = Total tumor + Edema.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>This research was supported [in part] by the Intramural Research Program of the NIH. The authors would like to thank all patient participants, the NIDAP Engineering Team, Dr. Govind Nair and the qMRI Core at the National Institute of Neurological Disorders and Strokes, Dr. Frank Maldarelli, and the NCI Center for Bioinformatics and Information Technology. SC is a PhD student in the NIH Oxford-Cambridge Scholars Program.</p>
</ack>
<ref-list>
<ref id="pdig.0000755.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Mohammed</surname> <given-names>S</given-names></name>, <name><surname>Dinesan</surname> <given-names>M</given-names></name>, <name><surname>Ajayakumar</surname> <given-names>T</given-names></name>. <article-title>Survival and quality of life analysis in glioblastoma multiforme with adjuvant chemoradiotherapy: a retrospective study</article-title>. <source>Rep Pract Oncol Radiother</source>. <year>2022</year>;<volume>27</volume>(<issue>6</issue>):<fpage>1026</fpage>–<lpage>36</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5603/RPOR.a2022.0113" xlink:type="simple">10.5603/RPOR.a2022.0113</ext-link></comment> <object-id pub-id-type="pmid">36632307</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Wen</surname> <given-names>PY</given-names></name>, <name><surname>Macdonald</surname> <given-names>DR</given-names></name>, <name><surname>Reardon</surname> <given-names>DA</given-names></name>, <name><surname>Cloughesy</surname> <given-names>TF</given-names></name>, <name><surname>Sorensen</surname> <given-names>AG</given-names></name>, <name><surname>Galanis</surname> <given-names>E</given-names></name>, <etal>et. al</etal>. <article-title>Updated response assessment criteria for high-grade gliomas: response assessment in neuro-oncology working group</article-title>. <source>J Clin Oncol</source>. <year>2010</year>;<volume>28</volume>(<issue>11</issue>):<fpage>1963</fpage>–<lpage>72</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1200/JCO.2009.26.3541" xlink:type="simple">10.1200/JCO.2009.26.3541</ext-link></comment> <object-id pub-id-type="pmid">20231676</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Henriksen</surname> <given-names>OM</given-names></name>, <name><surname>Del Mar Álvarez-Torres</surname> <given-names>M</given-names></name>, <name><surname>Figueiredo</surname> <given-names>P</given-names></name>, <name><surname>Hangel</surname> <given-names>G</given-names></name>, <name><surname>Keil</surname> <given-names>VC</given-names></name>, <name><surname>Nechifor</surname> <given-names>RE</given-names></name>, <etal>et. al</etal>. <article-title>High-grade glioma treatment response monitoring biomarkers: a position statement on the evidence supporting the use of advanced MRI techniques in the clinic, the latest bench-to-bedside developments. part 1: perfusion and diffusion techniques</article-title>. <source>Front Oncol</source>. <year>2022</year>;<volume>12</volume>:<fpage>810263</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fonc.2022.810263" xlink:type="simple">10.3389/fonc.2022.810263</ext-link></comment> <object-id pub-id-type="pmid">35359414</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Le Fèvre</surname> <given-names>C</given-names></name>, <name><surname>Lhermitte</surname> <given-names>B</given-names></name>, <name><surname>Ahle</surname> <given-names>G</given-names></name>, <name><surname>Chambrelant</surname> <given-names>I</given-names></name>, <name><surname>Cebula</surname> <given-names>H</given-names></name>, <name><surname>Antoni</surname> <given-names>D</given-names></name>, <etal>et. al</etal>. <article-title>Pseudoprogression versus true progression in glioblastoma patients: a multiapproach literature review: Part 1 - Molecular, morphological and clinical features</article-title>. <source>Crit Rev Oncol Hematol</source>. <year>2021</year>;<volume>157</volume>:<fpage>103188</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.critrevonc.2020.103188" xlink:type="simple">10.1016/j.critrevonc.2020.103188</ext-link></comment> <object-id pub-id-type="pmid">33307200</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Le Fèvre</surname> <given-names>C</given-names></name>, <name><surname>Constans</surname> <given-names>J-M</given-names></name>, <name><surname>Chambrelant</surname> <given-names>I</given-names></name>, <name><surname>Antoni</surname> <given-names>D</given-names></name>, <name><surname>Bund</surname> <given-names>C</given-names></name>, <name><surname>Leroy-Freschini</surname> <given-names>B</given-names></name>, <etal>et. al</etal>. <article-title>Pseudoprogression versus true progression in glioblastoma patients: a multiapproach literature review. Part 2 - Radiological features and metric markers</article-title>. <source>Crit Rev Oncol Hematol</source>. <year>2021</year>;<volume>159</volume>:<fpage>103230</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.critrevonc.2021.103230" xlink:type="simple">10.1016/j.critrevonc.2021.103230</ext-link></comment> <object-id pub-id-type="pmid">33515701</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Young</surname> <given-names>JS</given-names></name>, <name><surname>Al-Adli</surname> <given-names>N</given-names></name>, <name><surname>Scotford</surname> <given-names>K</given-names></name>, <name><surname>Cha</surname> <given-names>S</given-names></name>, <name><surname>Berger</surname> <given-names>MS</given-names></name>. <article-title>Pseudoprogression versus true progression in glioblastoma: what neurosurgeons need to know</article-title>. <source>J Neurosurg</source>. <year>2023</year>;<volume>139</volume>(<issue>3</issue>):<fpage>748</fpage>–<lpage>59</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3171/2022.12.JNS222173" xlink:type="simple">10.3171/2022.12.JNS222173</ext-link></comment> <object-id pub-id-type="pmid">36790010</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Macdonald</surname> <given-names>DR</given-names></name>, <name><surname>Cascino</surname> <given-names>TL</given-names></name>, <name><surname>Schold</surname> <given-names>SC</given-names> <suffix>Jr</suffix></name>, <name><surname>Cairncross</surname> <given-names>JG</given-names></name>. <article-title>Response criteria for phase II studies of supratentorial malignant glioma</article-title>. <source>J Clin Oncol</source>. <year>1990</year>;<volume>8</volume>(<issue>7</issue>):<fpage>1277</fpage>–<lpage>80</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1200/JCO.1990.8.7.1277" xlink:type="simple">10.1200/JCO.1990.8.7.1277</ext-link></comment> <object-id pub-id-type="pmid">2358840</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Wen</surname> <given-names>PY</given-names></name>, <name><surname>van den Bent</surname> <given-names>M</given-names></name>, <name><surname>Youssef</surname> <given-names>G</given-names></name>, <name><surname>Cloughesy</surname> <given-names>TF</given-names></name>, <name><surname>Ellingson</surname> <given-names>BM</given-names></name>, <name><surname>Weller</surname> <given-names>M</given-names></name>, <etal>et. al</etal>. <article-title>RANO 2.0: Update to the response assessment in neuro-oncology criteria for high- and low-grade gliomas in adults</article-title>. <source>J Clin Oncol</source>. <year>2023</year>;<volume>41</volume>(<issue>33</issue>):<fpage>5187</fpage>–<lpage>99</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1200/JCO.23.01059" xlink:type="simple">10.1200/JCO.23.01059</ext-link></comment> <object-id pub-id-type="pmid">37774317</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Hutter</surname> <given-names>C</given-names></name>, <name><surname>Zenklusen</surname> <given-names>JC</given-names></name>. <article-title>The cancer genome atlas: creating lasting value beyond its data</article-title>. <source>Cell</source>. <year>2018</year>;<volume>173</volume>(<issue>2</issue>):<fpage>283</fpage>–<lpage>5</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cell.2018.03.042" xlink:type="simple">10.1016/j.cell.2018.03.042</ext-link></comment> <object-id pub-id-type="pmid">29625045</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Clark</surname> <given-names>K</given-names></name>, <name><surname>Vendt</surname> <given-names>B</given-names></name>, <name><surname>Smith</surname> <given-names>K</given-names></name>, <name><surname>Freymann</surname> <given-names>J</given-names></name>, <name><surname>Kirby</surname> <given-names>J</given-names></name>, <name><surname>Koppel</surname> <given-names>P</given-names></name>, <etal>et al</etal>. <article-title>The Cancer Imaging Archive (TCIA): maintaining and operating a public information repository</article-title>. <source>J Digit Imaging</source>. <year>2013</year>;<volume>26</volume>(<issue>6</issue>):<fpage>1045</fpage>–<lpage>57</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s10278-013-9622-7" xlink:type="simple">10.1007/s10278-013-9622-7</ext-link></comment> <object-id pub-id-type="pmid">23884657</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Bhuvaneshwar</surname> <given-names>K</given-names></name>, <name><surname>Belouali</surname> <given-names>A</given-names></name>, <name><surname>Singh</surname> <given-names>V</given-names></name>, <name><surname>Johnson</surname> <given-names>RM</given-names></name>, <name><surname>Song</surname> <given-names>L</given-names></name>, <name><surname>Alaoui</surname> <given-names>A</given-names></name>, <etal>et. al</etal>. <article-title>G-DOC Plus - an integrative bioinformatics platform for precision medicine</article-title>. <source>BMC Bioinformatics</source>. <year>2016</year>;<volume>17</volume>(<issue>1</issue>):<fpage>193</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s12859-016-1010-0" xlink:type="simple">10.1186/s12859-016-1010-0</ext-link></comment> <object-id pub-id-type="pmid">27130330</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Zhao</surname> <given-names>Z</given-names></name>, <name><surname>Zhang</surname> <given-names>K-N</given-names></name>, <name><surname>Wang</surname> <given-names>Q</given-names></name>, <name><surname>Li</surname> <given-names>G</given-names></name>, <name><surname>Zeng</surname> <given-names>F</given-names></name>, <name><surname>Zhang</surname> <given-names>Y</given-names></name>, <etal>et al</etal>. <article-title>Chinese glioma genome atlas (CGGA): a comprehensive resource with functional genomic data from chinese glioma patients</article-title>. <source>Genomics Proteom Bioinform</source>. <year>2021</year>;<volume>19</volume>(<issue>1</issue>):<fpage>1</fpage>–<lpage>12</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.gpb.2020.10.005" xlink:type="simple">10.1016/j.gpb.2020.10.005</ext-link></comment> <object-id pub-id-type="pmid">33662628</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Huang</surname> <given-names>J</given-names></name>, <name><surname>Galal</surname> <given-names>G</given-names></name>, <name><surname>Etemadi</surname> <given-names>M</given-names></name>, <name><surname>Vaidyanathan</surname> <given-names>M</given-names></name>. <article-title>Evaluation and mitigation of racial bias in clinical machine learning models: scoping review</article-title>. <source>JMIR Med Inform</source>. <year>2022</year>;<volume>10</volume>(<issue>5</issue>):e36388. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2196/36388" xlink:type="simple">10.2196/36388</ext-link></comment> <object-id pub-id-type="pmid">35639450</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Nie</surname> <given-names>D</given-names></name>, <name><surname>Lu</surname> <given-names>J</given-names></name>, <name><surname>Zhang</surname> <given-names>H</given-names></name>, <name><surname>Adeli</surname> <given-names>E</given-names></name>, <name><surname>Wang</surname> <given-names>J</given-names></name>, <name><surname>Yu</surname> <given-names>Z</given-names></name>, <etal>et. al</etal>. <article-title>Multi-channel 3D deep feature learning for survival time prediction of brain tumor patients using multi-modal neuroimages</article-title>. <source>Sci Rep</source>. <year>2019</year>;<volume>9</volume>(<issue>1</issue>):<fpage>1103</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41598-018-37387-9" xlink:type="simple">10.1038/s41598-018-37387-9</ext-link></comment> <object-id pub-id-type="pmid">30705340</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Huang</surname> <given-names>S-C</given-names></name>, <name><surname>Pareek</surname> <given-names>A</given-names></name>, <name><surname>Seyyedi</surname> <given-names>S</given-names></name>, <name><surname>Banerjee</surname> <given-names>I</given-names></name>, <name><surname>Lungren</surname> <given-names>MP</given-names></name>. <article-title>Fusion of medical imaging, electronic health records using deep learning: a systematic review and implementation guidelines</article-title>. <source>NPJ Digit Med</source>. <year>2020</year>;<volume>3</volume>:<fpage>136</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41746-020-00341-z" xlink:type="simple">10.1038/s41746-020-00341-z</ext-link></comment> <object-id pub-id-type="pmid">33083571</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Soenksen</surname> <given-names>LR</given-names></name>, <name><surname>Ma</surname> <given-names>Y</given-names></name>, <name><surname>Zeng</surname> <given-names>C</given-names></name>, <name><surname>Boussioux</surname> <given-names>L</given-names></name>, <name><surname>Villalobos Carballo</surname> <given-names>K</given-names></name>, <name><surname>Na</surname> <given-names>L</given-names></name>, <etal>et. al</etal>. <article-title>Integrated multimodal artificial intelligence framework for healthcare applications</article-title>. <source>NPJ Digit Med</source>. <year>2022</year>;<volume>5</volume>(<issue>1</issue>):<fpage>149</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41746-022-00689-4" xlink:type="simple">10.1038/s41746-022-00689-4</ext-link></comment> <object-id pub-id-type="pmid">36127417</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Zhao</surname> <given-names>R</given-names></name>, <name><surname>Zhuge</surname> <given-names>Y</given-names></name>, <name><surname>Camphausen</surname> <given-names>K</given-names></name>, <name><surname>Krauze</surname> <given-names> AV</given-names></name>. <article-title>Machine learning based survival prediction in Glioma using large-scale registry data</article-title>. <source>Health Informatics J</source>. <year>2022</year>;<volume>28</volume>(<issue>4</issue>):<fpage>14604582221135427</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1177/14604582221135427" xlink:type="simple">10.1177/14604582221135427</ext-link></comment> <object-id pub-id-type="pmid">36264067</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Kwiatkowska-Miernik</surname> <given-names>A</given-names></name>, Wasilewski PG, <name><surname>Mruk</surname> <given-names>B</given-names></name>, <name><surname>Sklinda</surname> <given-names>K</given-names></name>, <name><surname>Bujko</surname> <given-names>M</given-names></name>, <name><surname>Walecki</surname> <given-names>J</given-names></name>. <article-title>Estimating progression-free survival in patients with primary high-grade glioma using machine learning</article-title>. <source>J Clin Med</source>. <year>2024</year>;<volume>13</volume>(<issue>20</issue>):<fpage>6172</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3390/jcm13206172" xlink:type="simple">10.3390/jcm13206172</ext-link></comment> <object-id pub-id-type="pmid">39458122</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Kickingereder</surname> <given-names>P</given-names></name>, <name><surname>Isensee</surname> <given-names>F</given-names></name>, <name><surname>Tursunova</surname> <given-names>I</given-names></name>, <name><surname>Petersen</surname> <given-names>J</given-names></name>, <name><surname>Neuberger</surname> <given-names>U</given-names></name>, <name><surname>Bonekamp</surname> <given-names>D</given-names></name>, <etal>et. al</etal>. <article-title>Automated quantitative tumour response assessment of MRI in neuro-oncology with artificial neural networks: a multicentre, retrospective study</article-title>. <source>Lancet Oncol</source>. <year>2019</year>;<volume>20</volume>(<issue>5</issue>):<fpage>728</fpage>–<lpage>40</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/S1470-2045(19)30098-1" xlink:type="simple">10.1016/S1470-2045(19)30098-1</ext-link></comment> <object-id pub-id-type="pmid">30952559</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref020"><label>20</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Nakhate</surname> <given-names>V</given-names></name>, <name><surname>Gonzalez Castro</surname> <given-names>LN</given-names></name>. <article-title>Artificial intelligence in neuro-oncology</article-title>. <source>Front Neurosci</source>. <year>2023</year>;<volume>17</volume>:<fpage>1217629</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fnins.2023.1217629" xlink:type="simple">10.3389/fnins.2023.1217629</ext-link></comment> <object-id pub-id-type="pmid">38161802</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Redlich</surname> <given-names>J-P</given-names></name>, <name><surname>Feuerhake</surname> <given-names>F</given-names></name>, <name><surname>Weis</surname> <given-names>J</given-names></name>, <name><surname>Schaadt</surname> <given-names>NS</given-names></name>, <name><surname>Teuber-Hanselmann</surname> <given-names>S</given-names></name>, <name><surname>Buck</surname> <given-names>C</given-names></name>, <etal>et. al</etal>. <article-title>Applications of artificial intelligence in the analysis of histopathology images of gliomas: a review</article-title>. <source>NPJ Imaging</source>. <year>2024</year>;<volume>2</volume>(<issue>1</issue>):<fpage>1</fpage>–<lpage>16</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s44303-024-00020-8" xlink:type="simple">10.1038/s44303-024-00020-8</ext-link></comment></mixed-citation></ref>
<ref id="pdig.0000755.ref022"><label>22</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Calabrese</surname> <given-names>E</given-names></name>, <name><surname>Villanueva-Meyer</surname> <given-names>JE</given-names></name>, <name><surname>Cha</surname> <given-names>S</given-names></name>. <article-title>A fully automated artificial intelligence method for non-invasive, imaging-based identification of genetic alterations in glioblastomas</article-title>. <source>Sci Rep</source>. <year>2020</year>;<volume>10</volume>(<issue>1</issue>):<fpage>11852</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41598-020-68857-8" xlink:type="simple">10.1038/s41598-020-68857-8</ext-link></comment> <object-id pub-id-type="pmid">32678261</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref023"><label>23</label><mixed-citation publication-type="other" xlink:type="simple">NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines for Guideline. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines for Guideline Name 9.4.2024. copyright National Comprehensive Cancer Network, Inc. 2024. All rights reserved. Accessed [September 4, 2024]. To view the most recent and complete version of the guideline, go online to NCCN.org</mixed-citation></ref>
<ref id="pdig.0000755.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Provenzale</surname> <given-names>JM</given-names></name>, <name><surname>Ison</surname> <given-names>C</given-names></name>, <name><surname>Delong</surname> <given-names>D</given-names></name>. <article-title>Bidimensional measurements in brain tumors: assessment of interobserver variability</article-title>. <source>AJR Am J Roentgenol</source>. <year>2009</year>;<volume>193</volume>(<issue>6</issue>):W515–<lpage>22</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2214/AJR.09.2615" xlink:type="simple">10.2214/AJR.09.2615</ext-link></comment> <object-id pub-id-type="pmid">19933626</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Verheij</surname> <given-names>RA</given-names></name>, <name><surname>Curcin</surname> <given-names>V</given-names></name>, <name><surname>Delaney</surname> <given-names>BC</given-names></name>, <name><surname>McGilchrist</surname> <given-names>MM</given-names></name>. <article-title>Possible Sources of bias in primary care electronic health record data use and reuse</article-title>. <source>J Med Internet Res</source>. <year>2018</year>;<volume>20</volume>(<issue>5</issue>):e185. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2196/jmir.9134" xlink:type="simple">10.2196/jmir.9134</ext-link></comment> <object-id pub-id-type="pmid">29844010</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Perets</surname> <given-names>O</given-names></name>, <name><surname>Stagno</surname> <given-names>E</given-names></name>, <name><surname>Yehuda</surname> <given-names>EB</given-names></name>, <name><surname>McNichol</surname> <given-names>M</given-names></name>, <name><surname>Anthony Celi</surname> <given-names>L</given-names></name>, <name><surname>Rappoport</surname> <given-names>N</given-names></name>, <etal>et. al</etal>. <article-title>Inherent bias in electronic health records: a scoping review of sources of bias</article-title>. <source>medRxiv</source>. <year>2024</year>;2024.04.09.24305594. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/2024.04.09.24305594" xlink:type="simple">10.1101/2024.04.09.24305594</ext-link></comment> <object-id pub-id-type="pmid">38680842</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref027"><label>27</label><mixed-citation publication-type="conf" xlink:type="simple"><name><surname>Zając</surname> <given-names>HD</given-names></name>, <name><surname>Avlona</surname> <given-names>NR</given-names></name>, <name><surname>Kensing</surname> <given-names>F</given-names></name>, <name><surname>Andersen</surname> <given-names>TO</given-names></name>, <name><surname>Shklovski</surname> <given-names>I</given-names></name>. <article-title>Ground truth or dare: factors affecting the creation of medical datasets for training AI</article-title>. In: <conf-name>Proceedings of the 2023 AAAI/ACM Conference on AI, Ethics, and Society</conf-name>. <publisher-loc>Montréal QC Canada: ACM</publisher-loc>; <year>2023</year>. p. <fpage>351</fpage>–<lpage>62</lpage>. Available from: <ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/10.1145/3600211.3604766" xlink:type="simple">https://dl.acm.org/doi/10.1145/3600211.3604766</ext-link></mixed-citation></ref>
<ref id="pdig.0000755.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Sylolypavan</surname> <given-names>A</given-names></name>, <name><surname>Sleeman</surname> <given-names>D</given-names></name>, <name><surname>Wu</surname> <given-names>H</given-names></name>, <name><surname>Sim</surname> <given-names>M</given-names></name>. <article-title>The impact of inconsistent human annotations on AI driven clinical decision making</article-title>. <source>NPJ Digit Med</source>. <year>2023</year>;<volume>6</volume>(<issue>1</issue>):<fpage>26</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41746-023-00773-3" xlink:type="simple">10.1038/s41746-023-00773-3</ext-link></comment> <object-id pub-id-type="pmid">36810915</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Bower</surname> <given-names>JK</given-names></name>, <name><surname>Patel</surname> <given-names>S</given-names></name>, <name><surname>Rudy</surname> <given-names>JE</given-names></name>, <name><surname>Felix</surname> <given-names>AS</given-names></name>. <article-title>Addressing bias in electronic health record-based surveillance of cardiovascular disease risk: finding the signal through the noise</article-title>. <source>Curr Epidemiol Rep</source>. <year>2017</year>;<volume>4</volume>(<issue>4</issue>):<fpage>346</fpage>–<lpage>52</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s40471-017-0130-z" xlink:type="simple">10.1007/s40471-017-0130-z</ext-link></comment> <object-id pub-id-type="pmid">31223556</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Sterling</surname> <given-names>TD</given-names></name>. <article-title>Publication decisions and their possible effects on inferences drawn from tests of significance–or vice versa</article-title>. <source>J Am Statist Assoc</source>. <year>1959</year>;<volume>54</volume>(<issue>285</issue>):<fpage>30</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2307/2282137" xlink:type="simple">10.2307/2282137</ext-link></comment></mixed-citation></ref>
<ref id="pdig.0000755.ref031"><label>31</label><mixed-citation publication-type="conf" xlink:type="simple"><name><surname>Bennett</surname> <given-names>S</given-names></name>, <name><surname>Claisse</surname> <given-names>C</given-names></name>, <name><surname>Luger</surname> <given-names>E</given-names></name>, <name><surname>Durrant</surname> <given-names>AC</given-names></name>. <article-title>Unpicking epistemic injustices in digital health: on the implications of designing data-driven technologies for the management of long-term conditions</article-title>. In: <conf-name>Proceedings of the 2023 AAAI/ACM Conference on AI, Ethics, and Society</conf-name>. <publisher-loc>Montreal, QC, Canada: ACM</publisher-loc>; <year>2023</year>. p. <fpage>322</fpage>–<lpage>32</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/3600211.3604684" xlink:type="simple">10.1145/3600211.3604684</ext-link></comment></mixed-citation></ref>
<ref id="pdig.0000755.ref032"><label>32</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Bernhardt</surname> <given-names>M</given-names></name>, <name><surname>Castro</surname> <given-names>DC</given-names></name>, <name><surname>Tanno</surname> <given-names>R</given-names></name>, <name><surname>Schwaighofer</surname> <given-names>A</given-names></name>, <name><surname>Tezcan</surname> <given-names>KC</given-names></name>, <name><surname>Monteiro</surname> <given-names>M</given-names></name>, <etal>et. al</etal>. <article-title>Active label cleaning for improved dataset quality under resource constraints</article-title>. <source>Nat Commun</source>. <year>2022</year>;<volume>13</volume>(<issue>1</issue>):<fpage>1161</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41467-022-28818-3" xlink:type="simple">10.1038/s41467-022-28818-3</ext-link></comment> <object-id pub-id-type="pmid">35246539</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref033"><label>33</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Rostamzadeh</surname> <given-names>N</given-names></name>, <name><surname>Mincu</surname> <given-names>D</given-names></name>, <name><surname>Roy</surname> <given-names>S</given-names></name>, <name><surname>Smart</surname> <given-names>A</given-names></name>, <name><surname>Wilcox</surname> <given-names>L</given-names></name>, <name><surname>Pushkarna</surname> <given-names>M</given-names></name>, <etal>et. al</etal>. <article-title>Healthsheet: development of a transparency artifact for health datasets</article-title>. In: <conf-name>Proceedings of the 2022 ACM Conference on Fairness, Accountability, and Transparency. FAccT ’22</conf-name>. <publisher-loc>New York, NY, USA: Association for Computing Machinery</publisher-loc>; <year>2022</year>. p. <fpage>1943</fpage>–<lpage>61</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/3531146.3533239" xlink:type="simple">10.1145/3531146.3533239</ext-link></comment></mixed-citation></ref>
<ref id="pdig.0000755.ref034"><label>34</label><mixed-citation publication-type="other" xlink:type="simple">Smit A, Jain S, Rajpurkar P, Pareek A, Ng AY, Lungren MP. <article-title>CheXbert: combining automatic labelers and expert annotations for accurate radiology report labeling using BERT</article-title>; <year>2020</year> . Available from: <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/2004.09167" xlink:type="simple">http://arxiv.org/abs/2004.09167</ext-link></mixed-citation></ref>
<ref id="pdig.0000755.ref035"><label>35</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Aljabri</surname> <given-names>M</given-names></name>, <name><surname>AlAmir</surname> <given-names>M</given-names></name>, <name><surname>AlGhamdi</surname> <given-names>M</given-names></name>, <name><surname>Abdel-Mottaleb</surname> <given-names>M</given-names></name>, <name><surname>Collado-Mesa</surname> <given-names>F</given-names></name>. <article-title>Towards a better understanding of annotation tools for medical imaging: a survey</article-title>. <source>Multimed Tools Appl</source>. <year>2022</year>;<volume>81</volume>(<issue>18</issue>):<fpage>25877</fpage>–<lpage>911</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s11042-022-12100-1" xlink:type="simple">10.1007/s11042-022-12100-1</ext-link></comment> <object-id pub-id-type="pmid">35350630</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref036"><label>36</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Sangariyavanich</surname> <given-names>E</given-names></name>, <name><surname>Ponthongmak</surname> <given-names>W</given-names></name>, <name><surname>Tansawet</surname> <given-names>A</given-names></name>, <name><surname>Theera-Ampornpunt</surname> <given-names>N</given-names></name>, <name><surname>Numthavaj</surname> <given-names>P</given-names></name>, <name><surname>McKay</surname> <given-names>GJ</given-names></name>, <etal>et. al</etal>. <article-title>Systematic review of natural language processing for recurrent cancer detection from electronic medical records</article-title>. <source>Inf Med Unlock</source>. <year>2023</year>;<volume>41</volume>:<fpage>101326</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.imu.2023.101326" xlink:type="simple">10.1016/j.imu.2023.101326</ext-link></comment></mixed-citation></ref>
<ref id="pdig.0000755.ref037"><label>37</label><mixed-citation publication-type="other" xlink:type="simple">Liao W, Liu Z, Dai H, Xu S, Wu Z, Zhang Y, <etal>et. al</etal>. <article-title>Differentiate ChatGPT-generated and human-written medical texts</article-title>; <year>2023</year> . Available from: <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/2304.11567" xlink:type="simple">http://arxiv.org/abs/2304.11567</ext-link></mixed-citation></ref>
<ref id="pdig.0000755.ref038"><label>38</label><mixed-citation publication-type="other" xlink:type="simple">Nori H, King N, McKinney SM, Carignan D, Horvitz E. <article-title>Capabilities of GPT-4 on medical challenge problems</article-title>; <year>2023</year>. Available from: <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/2303.13375" xlink:type="simple">http://arxiv.org/abs/2303.13375</ext-link></mixed-citation></ref>
<ref id="pdig.0000755.ref039"><label>39</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Dennstädt</surname> <given-names>F</given-names></name>, <name><surname>Hastings</surname> <given-names>J</given-names></name>, <name><surname>Putora</surname> <given-names>PM</given-names></name>, <name><surname>Vu</surname> <given-names>E</given-names></name>, <name><surname>Fischer</surname> <given-names>GF</given-names></name>, <name><surname>Süveg</surname> <given-names>K</given-names></name>, <etal>et. al</etal>. <article-title>Exploring capabilities of large language models such as ChatGPT in radiation oncology</article-title>. <source>Adv Radiat Oncol</source>. <year>2023</year>;<volume>9</volume>(<issue>3</issue>):<fpage>101400</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.adro.2023.101400" xlink:type="simple">10.1016/j.adro.2023.101400</ext-link></comment> <object-id pub-id-type="pmid">38304112</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref040"><label>40</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Davis</surname> <given-names>MF</given-names></name>, <name><surname>Sriram</surname> <given-names>S</given-names></name>, <name><surname>Bush</surname> <given-names>WS</given-names></name>, <name><surname>Denny</surname> <given-names>JC</given-names></name>, <name><surname>Haines</surname> <given-names>JL</given-names></name>. <article-title>Automated extraction of clinical traits of multiple sclerosis in electronic medical records</article-title>. <source>J Am Med Inform Assoc</source>. <year>2013</year>;<volume>20</volume>(e2):e334–<lpage>40</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/amiajnl-2013-001999" xlink:type="simple">10.1136/amiajnl-2013-001999</ext-link></comment> <object-id pub-id-type="pmid">24148554</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref041"><label>41</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Sheikhalishahi</surname> <given-names>S</given-names></name>, <name><surname>Miotto</surname> <given-names>R</given-names></name>, <name><surname>Dudley</surname> <given-names>JT</given-names></name>, <name><surname>Lavelli</surname> <given-names>A</given-names></name>, <name><surname>Rinaldi</surname> <given-names>F</given-names></name>, <name><surname>Osmani</surname> <given-names>V</given-names></name>. <article-title>Natural language processing of clinical notes on chronic diseases: systematic review</article-title>. <source>JMIR Med Inform</source>. <year>2019</year>;<volume>7</volume>(<issue>2</issue>):e12239. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2196/12239" xlink:type="simple">10.2196/12239</ext-link></comment> <object-id pub-id-type="pmid">31066697</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref042"><label>42</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Wieland-Jorna</surname> <given-names>Y</given-names></name>, <name><surname>van Kooten</surname> <given-names>D</given-names></name>, <name><surname>Verheij</surname> <given-names>RA</given-names></name>, <name><surname>de Man</surname> <given-names>Y</given-names></name>, <name><surname>Francke</surname> <given-names>AL</given-names></name>, <name><surname>Oosterveld-Vlug</surname> <given-names>MG</given-names></name>. <article-title>Natural language processing systems for extracting information from electronic health records about activities of daily living. A systematic review</article-title>. <source>JAMIA Open</source>. <year>2024</year>;<volume>7</volume>(<issue>2</issue>):ooae044. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/jamiaopen/ooae044" xlink:type="simple">10.1093/jamiaopen/ooae044</ext-link></comment> <object-id pub-id-type="pmid">38798774</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref043"><label>43</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Patra</surname> <given-names>BG</given-names></name>, <name><surname>Sharma</surname> <given-names>MM</given-names></name>, <name><surname>Vekaria</surname> <given-names>V</given-names></name>, <name><surname>Adekkanattu</surname> <given-names>P</given-names></name>, <name><surname>Patterson</surname> <given-names>OV</given-names></name>, <name><surname>Glicksberg</surname> <given-names>B</given-names></name>, <etal>et. al</etal>. <article-title>Extracting social determinants of health from electronic health records using natural language processing: a systematic review</article-title>. <source>J Am Med Inform Assoc</source>. <year>2021</year>;<volume>28</volume>(<issue>12</issue>):<fpage>2716</fpage>–<lpage>27</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/jamia/ocab170" xlink:type="simple">10.1093/jamia/ocab170</ext-link></comment> <object-id pub-id-type="pmid">34613399</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref044"><label>44</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Caccamisi</surname> <given-names>A</given-names></name>, <name><surname>Jørgensen</surname> <given-names>L</given-names></name>, <name><surname>Dalianis</surname> <given-names>H</given-names></name>, <name><surname>Rosenlund</surname> <given-names>M</given-names></name>. <article-title>Natural language processing, machine learning to enable automatic extraction and classification of patients’ smoking status from electronic medical records</article-title>. <source>Ups J Med Sci</source>. <year>2020</year>;<volume>125</volume>(<issue>4</issue>):<fpage>316</fpage>–<lpage>24</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1080/03009734.2020.1792010" xlink:type="simple">10.1080/03009734.2020.1792010</ext-link></comment> <object-id pub-id-type="pmid">32696698</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref045"><label>45</label><mixed-citation publication-type="other" xlink:type="simple">Agrawal M, Hegselmann S, Lang H, Kim Y, Sontag D. <article-title>Large language models are few-shot clinical information extractors</article-title>. In: <conf-name>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing. Abu Dhabi, United Arab Emirates: Association for Computational Linguistics</conf-name>; <year>2022</year>. p. <fpage>1998</fpage>–<lpage>2022</lpage>. Available from: <ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2022.emnlp-main.130" xlink:type="simple">https://aclanthology.org/2022.emnlp-main.130</ext-link></mixed-citation></ref>
<ref id="pdig.0000755.ref046"><label>46</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Hossain</surname> <given-names>E</given-names></name>, <name><surname>Rana</surname> <given-names>R</given-names></name>, <name><surname>Higgins</surname> <given-names>N</given-names></name>, <name><surname>Soar</surname> <given-names>J</given-names></name>, <name><surname>Barua</surname> <given-names>PD</given-names></name>, <name><surname>Pisani</surname> <given-names>AR</given-names></name>, <etal>et. al</etal>. <article-title>Natural Language Processing in Electronic Health Records in relation to healthcare decision-making: a systematic review</article-title>. <source>Comput Biol Med</source>. <year>2023</year>;<volume>155</volume>:<fpage>106649</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.compbiomed.2023.106649" xlink:type="simple">10.1016/j.compbiomed.2023.106649</ext-link></comment> <object-id pub-id-type="pmid">36805219</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref047"><label>47</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Li</surname> <given-names>I</given-names></name>, <name><surname>Pan</surname> <given-names>J</given-names></name>, <name><surname>Goldwasser</surname> <given-names>J</given-names></name>, <name><surname>Verma</surname> <given-names>N</given-names></name>, <name><surname>Wong</surname> <given-names>WP</given-names></name>, <name><surname>Nuzumlalı</surname> <given-names>MY</given-names></name>, <etal>et. al</etal>. <article-title>Neural natural language processing for unstructured data in electronic health records: a review</article-title>. <source>Comput Sci Rev</source>. <year>2022</year>;<volume>46</volume>:<fpage>100511</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cosrev.2022.100511" xlink:type="simple">10.1016/j.cosrev.2022.100511</ext-link></comment></mixed-citation></ref>
<ref id="pdig.0000755.ref048"><label>48</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Koleck</surname> <given-names>TA</given-names></name>, <name><surname>Dreisbach</surname> <given-names>C</given-names></name>, <name><surname>Bourne</surname> <given-names>PE</given-names></name>, <name><surname>Bakken</surname> <given-names>S</given-names></name>. <article-title>Natural language processing of symptoms documented in free-text narratives of electronic health records: a systematic review</article-title>. <source>J Am Med Inform Assoc</source>. <year>2019</year>;<volume>26</volume>(<issue>4</issue>):<fpage>364</fpage>–<lpage>79</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/jamia/ocy173" xlink:type="simple">10.1093/jamia/ocy173</ext-link></comment> <object-id pub-id-type="pmid">30726935</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref049"><label>49</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Leyh-Bannurah</surname> <given-names>S-R</given-names></name>, <name><surname>Tian</surname> <given-names>Z</given-names></name>, <name><surname>Karakiewicz</surname> <given-names>PI</given-names></name>, <name><surname>Wolffgang</surname> <given-names>U</given-names></name>, <name><surname>Sauter</surname> <given-names>G</given-names></name>, <name><surname>Fisch</surname> <given-names>M</given-names></name>, <etal>et. al</etal>. <article-title>Deep learning for natural language processing in urology: state-of-the-art automated extraction of detailed pathologic prostate cancer data from narratively written electronic health records</article-title>. <source>JCO Clin Cancer Inform</source>. <year>2018</year>;<volume>2</volume>:<fpage>1</fpage>–<lpage>9</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1200/CCI.18.00080" xlink:type="simple">10.1200/CCI.18.00080</ext-link></comment> <object-id pub-id-type="pmid">30652616</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref050"><label>50</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Castro</surname> <given-names>SM</given-names></name>, <name><surname>Tseytlin</surname> <given-names>E</given-names></name>, <name><surname>Medvedeva</surname> <given-names>O</given-names></name>, <name><surname>Mitchell</surname> <given-names>K</given-names></name>, <name><surname>Visweswaran</surname> <given-names>S</given-names></name>, <name><surname>Bekhuis</surname> <given-names>T</given-names></name>, <etal>et. al</etal>. <article-title>Automated annotation and classification of BI-RADS assessment from radiology reports</article-title>. <source>J Biomed Inform</source>. <year>2017</year>;<volume>69</volume>:<fpage>177</fpage>–<lpage>87</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jbi.2017.04.011" xlink:type="simple">10.1016/j.jbi.2017.04.011</ext-link></comment></mixed-citation></ref>
<ref id="pdig.0000755.ref051"><label>51</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Zeng</surname> <given-names>J</given-names></name>, <name><surname>Banerjee</surname> <given-names>I</given-names></name>, <name><surname>Henry</surname> <given-names>AS</given-names></name>, <name><surname>Wood</surname> <given-names>DJ</given-names></name>, <name><surname>Shachter</surname> <given-names>RD</given-names></name>, <name><surname>Gensheimer</surname> <given-names>MF</given-names></name>, <etal>et. al</etal>. <article-title>Natural language processing to identify cancer treatments with electronic medical records</article-title>. <source>JCO Clin Cancer Inform</source>. <year>2021</year>;<volume>5</volume>:<fpage>379</fpage>–<lpage>93</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1200/CCI.20.00173" xlink:type="simple">10.1200/CCI.20.00173</ext-link></comment> <object-id pub-id-type="pmid">33822653</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref052"><label>52</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Savova</surname> <given-names>GK</given-names></name>, <name><surname>Tseytlin</surname> <given-names>E</given-names></name>, <name><surname>Finan</surname> <given-names>S</given-names></name>, <name><surname>Castine</surname> <given-names>M</given-names></name>, <name><surname>Miller</surname> <given-names>T</given-names></name>, <name><surname>Medvedeva</surname> <given-names>O</given-names></name>, <etal>et. al</etal>. <article-title>DeepPhe: a natural language processing system for extracting cancer phenotypes from clinical records</article-title>. <source>Cancer Res</source>. <year>2017</year>;<volume>77</volume>(<issue>21</issue>):e115–<lpage>8</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1158/0008-5472.CAN-17-0615" xlink:type="simple">10.1158/0008-5472.CAN-17-0615</ext-link></comment> <object-id pub-id-type="pmid">29092954</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref053"><label>53</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Schiappa</surname> <given-names>R</given-names></name>, <name><surname>Contu</surname> <given-names>S</given-names></name>, <name><surname>Culie</surname> <given-names>D</given-names></name>, <name><surname>Thamphya</surname> <given-names>B</given-names></name>, <name><surname>Chateau</surname> <given-names>Y</given-names></name>, <name><surname>Gal</surname> <given-names>J</given-names></name>, <etal>et. al</etal>. <article-title>RUBY: natural language processing of french electronic medical records for breast cancer research</article-title>. <source>JCO Clin Cancer Inform</source>. <year>2022</year>;<volume>6</volume>:e2100199. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1200/CCI.21.00199" xlink:type="simple">10.1200/CCI.21.00199</ext-link></comment> <object-id pub-id-type="pmid">35960900</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref054"><label>54</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Wang</surname> <given-names>L</given-names></name>, <name><surname>Fu</surname> <given-names>S</given-names></name>, <name><surname>Wen</surname> <given-names>A</given-names></name>, <name><surname>Ruan</surname> <given-names>X</given-names></name>, <name><surname>He</surname> <given-names>H</given-names></name>, <name><surname>Liu</surname> <given-names>S</given-names></name>, <etal>et. al</etal>. <article-title>Assessment of electronic health record for cancer research and patient care through a scoping review of cancer natural language processing</article-title>. <source>JCO Clin Cancer Inform</source>. <year>2022</year>;<volume>6</volume>:e2200006. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1200/CCI.22.00006" xlink:type="simple">10.1200/CCI.22.00006</ext-link></comment> <object-id pub-id-type="pmid">35917480</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref055"><label>55</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Karimi</surname> <given-names>YH</given-names></name>, <name><surname>Blayney</surname> <given-names>DW</given-names></name>, <name><surname>Kurian</surname> <given-names>AW</given-names></name>, <name><surname>Shen</surname> <given-names>J</given-names></name>, <name><surname>Yamashita</surname> <given-names>R</given-names></name>, <name><surname>Rubin</surname> <given-names>D</given-names></name>, <etal>et. al</etal>. <article-title>Development, use of natural language processing for identification of distant cancer recurrence and sites of distant recurrence using unstructured electronic health record data</article-title>. <source>JCO Clin Cancer Inform</source>. <year>2021</year>;<volume>5</volume>:<fpage>469</fpage>–<lpage>78</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1200/CCI.20.00165" xlink:type="simple">10.1200/CCI.20.00165</ext-link></comment> <object-id pub-id-type="pmid">33929889</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref056"><label>56</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Carrell</surname> <given-names>DS</given-names></name>, <name><surname>Halgrim</surname> <given-names>S</given-names></name>, <name><surname>Tran</surname> <given-names>D-T</given-names></name>, <name><surname>Buist</surname> <given-names>DSM</given-names></name>, <name><surname>Chubak</surname> <given-names>J</given-names></name>, <name><surname>Chapman</surname> <given-names>WW</given-names></name>, <etal>et. al</etal>. <article-title>Using natural language processing to improve efficiency of manual chart abstraction in research: the case of breast cancer recurrence</article-title>. <source>Am J Epidemiol</source>. <year>2014</year>;<volume>179</volume>(<issue>6</issue>):<fpage>749</fpage>–<lpage>58</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/aje/kwt441" xlink:type="simple">10.1093/aje/kwt441</ext-link></comment> <object-id pub-id-type="pmid">24488511</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref057"><label>57</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Kehl</surname> <given-names>KL</given-names></name>, <name><surname>Xu</surname> <given-names>W</given-names></name>, <name><surname>Lepisto</surname> <given-names>E</given-names></name>, <name><surname>Elmarakeby</surname> <given-names>H</given-names></name>, <name><surname>Hassett</surname> <given-names>MJ</given-names></name>, <name><surname>Van Allen</surname> <given-names>EM</given-names></name>, <etal>et. al</etal>. <article-title>Natural language processing to ascertain cancer outcomes from medical oncologist notes</article-title>. <source>JCO Clin Cancer Inform</source>. <year>2020</year>;<volume>4</volume>:<fpage>680</fpage>–<lpage>90</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1200/CCI.20.00020" xlink:type="simple">10.1200/CCI.20.00020</ext-link></comment> <object-id pub-id-type="pmid">32755459</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref058"><label>58</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Wang</surname> <given-names>SY</given-names></name>, <name><surname>Tseng</surname> <given-names>B</given-names></name>, <name><surname>Hernandez-Boussard</surname> <given-names>T</given-names></name>. <article-title>Deep learning approaches for predicting glaucoma progression using electronic health records and natural language processing</article-title>. <source>Ophthalmol Sci</source>. <year>2022</year>;<volume>2</volume>(<issue>2</issue>):<fpage>100127</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.xops.2022.100127" xlink:type="simple">10.1016/j.xops.2022.100127</ext-link></comment> <object-id pub-id-type="pmid">36249690</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref059"><label>59</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Morin</surname> <given-names>O</given-names></name>, <name><surname>Vallières</surname> <given-names>M</given-names></name>, <name><surname>Braunstein</surname> <given-names>S</given-names></name>, <name><surname>Ginart</surname> <given-names>JB</given-names></name>, <name><surname>Upadhaya</surname> <given-names>T</given-names></name>, <name><surname>Woodruff</surname> <given-names>HC</given-names></name>, <etal>et. al</etal>. <article-title>An artificial intelligence framework integrating longitudinal electronic health records with real-world data enables continuous pan-cancer prognostication</article-title>. <source>Nat Cancer</source>. <year>2021</year>;<volume>2</volume>(<issue>7</issue>):<fpage>709</fpage>–<lpage>22</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s43018-021-00236-2" xlink:type="simple">10.1038/s43018-021-00236-2</ext-link></comment> <object-id pub-id-type="pmid">35121948</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref060"><label>60</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Tan</surname> <given-names>WK</given-names></name>, <name><surname>Hassanpour</surname> <given-names>S</given-names></name>, <name><surname>Heagerty</surname> <given-names>PJ</given-names></name>, <name><surname>Rundell</surname> <given-names>SD</given-names></name>, <name><surname>Suri</surname> <given-names>P</given-names></name>, <name><surname>Huhdanpaa</surname> <given-names>HT</given-names></name>, <etal>et. al</etal>. <article-title>Comparison of natural language processing rules-based and machine-learning systems to identify lumbar spine imaging findings related to low back pain</article-title>. <source>Acad Radiol</source>. <year>2018</year>;<volume>25</volume>(<issue>11</issue>):<fpage>1422</fpage>–<lpage>32</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.acra.2018.03.008" xlink:type="simple">10.1016/j.acra.2018.03.008</ext-link></comment> <object-id pub-id-type="pmid">29605561</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref061"><label>61</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Berge</surname> <given-names>GT</given-names></name>, <name><surname>Granmo</surname> <given-names>O-C</given-names></name>, <name><surname>Tveit</surname> <given-names>TO</given-names></name>, <name><surname>Ruthjersen</surname> <given-names>AL</given-names></name>, <name><surname>Sharma</surname> <given-names>J</given-names></name>. <article-title>Combining unsupervised, supervised and rule-based learning: the case of detecting patient allergies in electronic health records</article-title>. <source>BMC Med Inform Decis Mak</source>. <year>2023</year>;<volume>23</volume>(<issue>1</issue>):<fpage>188</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s12911-023-02271-8" xlink:type="simple">10.1186/s12911-023-02271-8</ext-link></comment> <object-id pub-id-type="pmid">37723446</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref062"><label>62</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Bhattarai</surname> <given-names>K</given-names></name>, <name><surname>Oh</surname> <given-names>IY</given-names></name>, <name><surname>Sierra</surname> <given-names>JM</given-names></name>, <name><surname>Tang</surname> <given-names>J</given-names></name>, <name><surname>Payne</surname> <given-names>PRO</given-names></name>, <name><surname>Abrams</surname> <given-names>Z</given-names></name>, <etal>et. al</etal>. <article-title>Leveraging GPT-4 for identifying cancer phenotypes in electronic health records: a performance comparison between GPT-4, GPT-3.5-turbo, Flan-T5, Llama-3-8B, and spaCy’s rule-based and machine learning-based methods</article-title>. <source>JAMIA Open</source>. <year>2024</year>;<volume>7</volume>(<issue>3</issue>):ooae060. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/jamiaopen/ooae060" xlink:type="simple">10.1093/jamiaopen/ooae060</ext-link></comment> <object-id pub-id-type="pmid">38962662</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref063"><label>63</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Esteva</surname> <given-names>A</given-names></name>, <name><surname>Chou</surname> <given-names>K</given-names></name>, <name><surname>Yeung</surname> <given-names>S</given-names></name>, <name><surname>Naik</surname> <given-names>N</given-names></name>, <name><surname>Madani</surname> <given-names>A</given-names></name>, <name><surname>Mottaghi</surname> <given-names>A</given-names></name>, <etal>et. al</etal>. <article-title>Deep learning-enabled medical computer vision</article-title>. <source>NPJ Digit Med</source>. <year>2021</year>;<volume>4</volume>(<issue>1</issue>):<fpage>5</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41746-020-00376-2" xlink:type="simple">10.1038/s41746-020-00376-2</ext-link></comment> <object-id pub-id-type="pmid">33420381</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref064"><label>64</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>White</surname> <given-names>T</given-names></name>, <name><surname>Blok</surname> <given-names>E</given-names></name>, <name><surname>Calhoun</surname> <given-names>VD</given-names></name>. <article-title>Data sharing, privacy issues in neuroimaging research: opportunities, obstacles, challenges and monsters under the bed</article-title>. <source>Hum Brain Mapp</source>. <year>2022</year>;<volume>43</volume>(<issue>1</issue>):<fpage>278</fpage>–<lpage>91</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/hbm.25120" xlink:type="simple">10.1002/hbm.25120</ext-link></comment> <object-id pub-id-type="pmid">32621651</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref065"><label>65</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Bakas</surname> <given-names>S</given-names></name>, <name><surname>Sako</surname> <given-names>C</given-names></name>, <name><surname>Akbari</surname> <given-names>H</given-names></name>, <name><surname>Bilello</surname> <given-names>M</given-names></name>, <name><surname>Sotiras</surname> <given-names>A</given-names></name>, <name><surname>Shukla</surname> <given-names>G</given-names></name>, <etal>et al</etal>. <article-title>The University of Pennsylvania glioblastoma (UPenn-GBM) cohort: advanced MRI, clinical, genomics, &amp; radiomics</article-title>. <source>Sci Data</source>. <year>2022</year>;<volume>9</volume>(<issue>1</issue>):<fpage>453</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41597-022-01560-7" xlink:type="simple">10.1038/s41597-022-01560-7</ext-link></comment> <object-id pub-id-type="pmid">35906241</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref066"><label>66</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Kelly</surname> <given-names>CJ</given-names></name>, <name><surname>Karthikesalingam</surname> <given-names>A</given-names></name>, <name><surname>Suleyman</surname> <given-names>M</given-names></name>, <name><surname>Corrado</surname> <given-names>G</given-names></name>, <name><surname>King</surname> <given-names>D</given-names></name>. <article-title>Key challenges for delivering clinical impact with artificial intelligence</article-title>. <source>BMC Med</source>. <year>2019</year>;<volume>17</volume>(<issue>1</issue>):<fpage>195</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s12916-019-1426-2" xlink:type="simple">10.1186/s12916-019-1426-2</ext-link></comment> <object-id pub-id-type="pmid">31665002</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref067"><label>67</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Maleki</surname> <given-names>F</given-names></name>, <name><surname>Ovens</surname> <given-names>K</given-names></name>, <name><surname>Gupta</surname> <given-names>R</given-names></name>, <name><surname>Reinhold</surname> <given-names>C</given-names></name>, <name><surname>Spatz</surname> <given-names>A</given-names></name>, <name><surname>Forghani</surname> <given-names>R</given-names></name>. <article-title>Generalizability of machine learning models: quantitative evaluation of three methodological pitfalls</article-title>. <source>Radiol Artif Intell</source>. <year>2022</year>;<volume>5</volume>(<issue>1</issue>):e220028. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1148/ryai.220028" xlink:type="simple">10.1148/ryai.220028</ext-link></comment> <object-id pub-id-type="pmid">36721408</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref068"><label>68</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Tizi</surname> <given-names>W</given-names></name>, <name><surname>Berrado</surname> <given-names>A</given-names></name>. <chapter-title>Assessing the generalizability of cancer prognosis models: breast, colon cancer case studies</chapter-title>. In: <name><surname>Chen</surname> <given-names>H</given-names></name> <name><surname>Zhou</surname> <given-names>Y</given-names></name>, <name><surname>Xu</surname> <given-names>D</given-names></name>, <name><surname>Vardhanabhuti</surname> <given-names>VV</given-names></name> and editors. <source>Trustworthy artificial intelligence for healthcare</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer Nature Switzerland</publisher-name>; <year>2024</year>. p. <fpage>123</fpage>–<lpage>33</lpage>.</mixed-citation></ref>
<ref id="pdig.0000755.ref069"><label>69</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Lin</surname> <given-names>L</given-names></name>, <name><surname>Dou</surname> <given-names>Q</given-names></name>, <name><surname>Jin</surname> <given-names>Y-M</given-names></name>, <name><surname>Zhou</surname> <given-names>G-Q</given-names></name>, <name><surname>Tang</surname> <given-names>Y-Q</given-names></name>, <name><surname>Chen</surname> <given-names>W-L</given-names></name>, <etal>et. al</etal>. <article-title>Deep learning for automated contouring of primary tumor volumes by MRI for nasopharyngeal carcinoma</article-title>. <source>Radiology</source>. <year>2019</year>;<volume>291</volume>(<issue>3</issue>):<fpage>677</fpage>–<lpage>86</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1148/radiol.2019182012" xlink:type="simple">10.1148/radiol.2019182012</ext-link></comment> <object-id pub-id-type="pmid">30912722</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref070"><label>70</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Long</surname> <given-names>H</given-names></name>, <name><surname>Zhang</surname> <given-names>P</given-names></name>, <name><surname>Bi</surname> <given-names>Y</given-names></name>, <name><surname>Yang</surname> <given-names>C</given-names></name>, <name><surname>Wu</surname> <given-names>M</given-names></name>, <name><surname>He</surname> <given-names>D</given-names></name>, <etal>et. al</etal>. <article-title>MRI radiomic features of peritumoral edema may predict the recurrence sites of glioblastoma multiforme</article-title>. <source>Front Oncol</source>. <year>2023</year>;<volume>12</volume>:<fpage>1042498</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fonc.2022.1042498" xlink:type="simple">10.3389/fonc.2022.1042498</ext-link></comment> <object-id pub-id-type="pmid">36686829</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref071"><label>71</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Zanfardino</surname> <given-names>M</given-names></name>, <name><surname>Pane</surname> <given-names>K</given-names></name>, <name><surname>Mirabelli</surname> <given-names>P</given-names></name>, <name><surname>Salvatore</surname> <given-names>M</given-names></name>, <name><surname>Franzese</surname> <given-names>M</given-names></name>. <article-title>TCGA-TCIA impact on radiogenomics cancer research: a systematic review</article-title>. <source>Int J Mol Sci</source>. <year>2019</year>;<volume>20</volume>(<issue>23</issue>):<fpage>6033</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3390/ijms20236033" xlink:type="simple">10.3390/ijms20236033</ext-link></comment> <object-id pub-id-type="pmid">31795520</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref072"><label>72</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Gutman</surname> <given-names>DA</given-names></name>, <name><surname>Dunn</surname> <given-names>WD</given-names> <suffix>Jr</suffix></name>, <name><surname>Grossmann</surname> <given-names>P</given-names></name>, <name><surname>Cooper</surname> <given-names>LAD</given-names></name>, <name><surname>Holder</surname> <given-names>CA</given-names></name>, <name><surname>Ligon</surname> <given-names>KL</given-names></name>, <etal>et. al</etal>. <article-title>Somatic mutations associated with MRI-derived volumetric features in glioblastoma</article-title>. <source>Neuroradiology</source>. <year>2015</year>;<volume>57</volume>(<issue>12</issue>):<fpage>1227</fpage>–<lpage>37</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s00234-015-1576-7" xlink:type="simple">10.1007/s00234-015-1576-7</ext-link></comment> <object-id pub-id-type="pmid">26337765</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref073"><label>73</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Colen</surname> <given-names>RR</given-names></name>, <name><surname>Wang</surname> <given-names>J</given-names></name>, <name><surname>Singh</surname> <given-names>SK</given-names></name>, <name><surname>Gutman</surname> <given-names>DA</given-names></name>, <name><surname>Zinn</surname> <given-names>PO</given-names></name>. <article-title>Glioblastoma: imaging genomic mapping reveals sex-specific oncogenic associations of cell death</article-title>. <source>Radiology</source>. <year>2015</year>;<volume>275</volume>(<issue>1</issue>):<fpage>215</fpage>–<lpage>27</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1148/radiol.14141800" xlink:type="simple">10.1148/radiol.14141800</ext-link></comment> <object-id pub-id-type="pmid">25490189</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref074"><label>74</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Claus</surname> <given-names>EB</given-names></name>, <name><surname>Walsh</surname> <given-names>KM</given-names></name>, <name><surname>Wiencke</surname> <given-names>JK</given-names></name>, <name><surname>Molinaro</surname> <given-names>AM</given-names></name>, <name><surname>Wiemels</surname> <given-names>JL</given-names></name>, <name><surname>Schildkraut</surname> <given-names>JM</given-names></name>, <etal>et. al</etal>. <article-title>Survival and low-grade glioma: the emergence of genetic information</article-title>. <source>Neurosurg Focus</source>. <year>2015</year>;<volume>38</volume>(<issue>1</issue>):E6. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3171/2014.10.FOCUS12367" xlink:type="simple">10.3171/2014.10.FOCUS12367</ext-link></comment> <object-id pub-id-type="pmid">25552286</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref075"><label>75</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Kidd</surname> <given-names>AC</given-names></name>, <name><surname>Anderson</surname> <given-names>O</given-names></name>, <name><surname>Cowell</surname> <given-names>GW</given-names></name>, <name><surname>Weir</surname> <given-names>AJ</given-names></name>, <name><surname>Voisey</surname> <given-names>JP</given-names></name>, <name><surname>Evison</surname> <given-names>M</given-names></name>, <etal>et. al</etal>. <article-title>Fully automated volumetric measurement of malignant pleural mesothelioma by deep learning AI: validation and comparison with modified RECIST response criteria</article-title>. <source>Thorax</source>. <year>2022</year>;<volume>77</volume>(<issue>12</issue>):<fpage>1251</fpage>–<lpage>9</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/thoraxjnl-2021-217808" xlink:type="simple">10.1136/thoraxjnl-2021-217808</ext-link></comment> <object-id pub-id-type="pmid">35110367</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref076"><label>76</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Ruchalski</surname> <given-names>K</given-names></name>, <name><surname>Braschi-Amirfarzan</surname> <given-names>M</given-names></name>, <name><surname>Douek</surname> <given-names>M</given-names></name>, <name><surname>Sai</surname> <given-names>V</given-names></name>, <name><surname>Gutierrez</surname> <given-names>A</given-names></name>, <name><surname>Dewan</surname> <given-names>R</given-names></name>, <etal>et. al</etal>. <article-title>A primer on RECIST 1.1 for oncologic imaging in clinical drug trials</article-title>. <source>Radiol Imaging Cancer</source>. <year>2021</year>;<volume>3</volume>(<issue>3</issue>):e210008. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1148/rycan.2021210008" xlink:type="simple">10.1148/rycan.2021210008</ext-link></comment> <object-id pub-id-type="pmid">33988475</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref077"><label>77</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Barash</surname> <given-names>Y</given-names></name>, <name><surname>Klang</surname> <given-names>E</given-names></name>. <article-title>Automated quantitative assessment of oncological disease progression using deep learning</article-title>. <source>Ann Transl Med</source>. <year>2019</year>;<volume>7</volume>(Suppl 8):S379. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.21037/atm.2019.12.101" xlink:type="simple">10.21037/atm.2019.12.101</ext-link></comment> <object-id pub-id-type="pmid">32016097</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref078"><label>78</label><mixed-citation publication-type="other" xlink:type="simple">Eyre H, Chapman AB, Peterson KS, Shi J, Alba PR, Jones MM, <etal>et. al</etal>. Launching into clinical space with medspaCy: a new clinical text processing toolkit in Python; 2021 . Available from: <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/2106.07799" xlink:type="simple">http://arxiv.org/abs/2106.07799</ext-link></mixed-citation></ref>
<ref id="pdig.0000755.ref079"><label>79</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Chappidi</surname> <given-names>S</given-names></name>, <name><surname>Lee</surname> <given-names>H</given-names></name>, <name><surname>Jagasia</surname> <given-names>S</given-names></name>, <name><surname>Syal</surname> <given-names>C</given-names></name>, <name><surname>Zaki</surname> <given-names>G</given-names></name>, <name><surname>Junkin</surname> <given-names>D</given-names></name>, <etal>et. al</etal>. <article-title>Abstract 6199 : defining and capturing progression in glioma by harnessing NLP in unstructured electronic health records</article-title>. <source>Cancer Res</source>. <year>2024</year>;<volume>84</volume>(6_Supplement):<fpage>6199</fpage>–<lpage>9</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1158/1538-7445.am2024-6199" xlink:type="simple">10.1158/1538-7445.am2024-6199</ext-link></comment></mixed-citation></ref>
<ref id="pdig.0000755.ref080"><label>80</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Do</surname> <given-names>RKG</given-names></name>, <name><surname>Lupton</surname> <given-names>K</given-names></name>, <name><surname>Causa Andrieu</surname> <given-names>PI</given-names></name>, <name><surname>Luthra</surname> <given-names>A</given-names></name>, <name><surname>Taya</surname> <given-names>M</given-names></name>, <name><surname>Batch</surname> <given-names>K</given-names></name>, <etal>et. al</etal>. <article-title>Patterns of metastatic disease in patients with cancer derived from natural language processing of structured CT radiology reports over a 10-year period</article-title>. <source>Radiology</source>. <year>2021</year>;<volume>301</volume>(<issue>1</issue>):<fpage>115</fpage>–<lpage>22</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1148/radiol.2021210043" xlink:type="simple">10.1148/radiol.2021210043</ext-link></comment> <object-id pub-id-type="pmid">34342503</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref081"><label>81</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Belue</surname> <given-names>MJ</given-names></name>, <name><surname>Harmon</surname> <given-names>SA</given-names></name>, <name><surname>Chappidi</surname> <given-names>S</given-names></name>, <name><surname>Zhuge</surname> <given-names>Y</given-names></name>, <name><surname>Tasci</surname> <given-names>E</given-names></name>, <name><surname>Jagasia</surname> <given-names>S</given-names></name>, <etal>et al</etal>. <article-title>Diagnosing progression in glioblastoma-tackling a neuro-oncology problem using artificial-intelligence-derived volumetric change over time on magnetic resonance imaging to examine progression-free survival in glioblastoma</article-title>. <source>Diagnostics (Basel)</source>. <year>2024</year>;<volume>14</volume>(<issue>13</issue>):<fpage>1374</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3390/diagnostics14131374" xlink:type="simple">10.3390/diagnostics14131374</ext-link></comment> <object-id pub-id-type="pmid">39001264</object-id></mixed-citation></ref>
<ref id="pdig.0000755.ref082"><label>82</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Williams</surname> <given-names>LH</given-names></name>, <name><surname>Drew</surname> <given-names>T</given-names></name>. <article-title>What do we know about volumetric medical image interpretation?: a review of the basic science and medical image perception literatures</article-title>. <source>Cognit Res: Principles Implicat</source>. <year>2019</year>;<volume>4</volume>:<fpage>21</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s41235-019-0171-6" xlink:type="simple">10.1186/s41235-019-0171-6</ext-link></comment></mixed-citation></ref>
<ref id="pdig.0000755.ref083"><label>83</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Sarwar</surname> <given-names>T</given-names></name>, <name><surname>Seifollahi</surname> <given-names>S</given-names></name>, <name><surname>Chan</surname> <given-names>J</given-names></name>, <name><surname>Zhang</surname> <given-names>X</given-names></name>, <name><surname>Aksakalli</surname> <given-names>V</given-names></name>, <name><surname>Hudson</surname> <given-names>I</given-names></name>, <etal>et. al</etal>. <article-title>The secondary use of electronic health records for data mining: data characteristics and challenges</article-title>. <source>ACM Comput Surv</source>. <year>2022</year>;<volume>55</volume>(<issue>2</issue>):<fpage>1</fpage>–<lpage>40</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/3490234" xlink:type="simple">10.1145/3490234</ext-link></comment></mixed-citation></ref>
</ref-list>
</back>
</article>