<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pone.0238290</article-id>
<article-id pub-id-type="publisher-id">PONE-D-19-14927</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Clinical medicine</subject><subj-group><subject>Clinical trials</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Pharmacology</subject><subj-group><subject>Drug research and development</subject><subj-group><subject>Clinical trials</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Clinical trials</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Database and informatics methods</subject><subj-group><subject>Database searching</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Chemistry</subject><subj-group><subject>Chemical elements</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Database and informatics methods</subject><subj-group><subject>Information retrieval</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Chemistry</subject><subj-group><subject>Chemical compounds</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Linguistics</subject><subj-group><subject>Semantics</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Database and informatics methods</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Computer applications</subject><subj-group><subject>Web-based applications</subject></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>An interactive retrieval system for clinical trial studies with context-dependent protocol elements</article-title>
<alt-title alt-title-type="running-head">Clinical trial protocol database system</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-2312-9440</contrib-id>
<name name-style="western">
<surname>Park</surname>
<given-names>Junseok</given-names>
</name>
<role content-type="https://casrai.org/credit/">Conceptualization</role>
<role content-type="https://casrai.org/credit/">Data curation</role>
<role content-type="https://casrai.org/credit/">Investigation</role>
<role content-type="https://casrai.org/credit/">Methodology</role>
<role content-type="https://casrai.org/credit/">Project administration</role>
<role content-type="https://casrai.org/credit/">Resources</role>
<role content-type="https://casrai.org/credit/">Software</role>
<role content-type="https://casrai.org/credit/">Validation</role>
<role content-type="https://casrai.org/credit/">Visualization</role>
<role content-type="https://casrai.org/credit/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Park</surname>
<given-names>Seongkuk</given-names>
</name>
<role content-type="https://casrai.org/credit/">Software</role>
<role content-type="https://casrai.org/credit/">Visualization</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Kim</surname>
<given-names>Kwangmin</given-names>
</name>
<role content-type="https://casrai.org/credit/">Data curation</role>
<role content-type="https://casrai.org/credit/">Resources</role>
<role content-type="https://casrai.org/credit/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Hwang</surname>
<given-names>Woochang</given-names>
</name>
<role content-type="https://casrai.org/credit/">Validation</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-0925-1853</contrib-id>
<name name-style="western">
<surname>Yoo</surname>
<given-names>Sunyong</given-names>
</name>
<role content-type="https://casrai.org/credit/">Conceptualization</role>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Yi</surname>
<given-names>Gwan-su</given-names>
</name>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<name name-style="western">
<surname>Lee</surname>
<given-names>Doheon</given-names>
</name>
<role content-type="https://casrai.org/credit/">Funding acquisition</role>
<role content-type="https://casrai.org/credit/">Project administration</role>
<role content-type="https://casrai.org/credit/">Supervision</role>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Department of Bio and Brain Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, Republic of Korea</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Bio-Synergy Research Center, KAIST, Daejeon, Republic of Korea</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>Information &amp; Electronics Research Institute, Daejeon, Republic of Korea</addr-line></aff>
<aff id="aff004"><label>4</label> <addr-line>The Milner Institute, University of Cambridge, Cambridge, United Kingdom</addr-line></aff>
<aff id="aff005"><label>5</label> <addr-line>School of Electronics and Computer Engineering, Chonnam National University, Gwangju, Republic of Korea</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Idrees</surname>
<given-names>Amira M.</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>Fayoum University Faculty of Computers and Information, EGYPT</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">dhlee@kaist.ac.kr</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>18</day>
<month>9</month>
<year>2020</year>
</pub-date>
<pub-date pub-type="collection">
<year>2020</year>
</pub-date>
<volume>15</volume>
<issue>9</issue>
<elocation-id>e0238290</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>5</month>
<year>2019</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>8</month>
<year>2020</year>
</date>
</history>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Park et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="pone.0238290.pdf"/>
<abstract>
<p>A well-defined protocol for a clinical trial guarantees a successful outcome report. When designing the protocol, most researchers refer to electronic databases and extract protocol elements using a keyword search. However, state-of-the-art database systems only offer text-based searches for user-entered keywords. In this study, we present a database system with a context-dependent and protocol-element-selection function for successfully designing a clinical trial protocol. To do this, we first introduce a database for a protocol retrieval system constructed from individual protocol data extracted from 184,634 clinical trials and 13,210 frame structures of clinical trial protocols. The database contains a variety of semantic information that allows the filtering of protocols during the search operation. Based on the database, we developed a web application called the clinical trial protocol database system (CLIPS; available at <ext-link ext-link-type="uri" xlink:href="https://corus.kaist.edu/clips" xlink:type="simple">https://corus.kaist.edu/clips</ext-link>). This system enables an interactive search by utilizing protocol elements. To enable an interactive search for combinations of protocol elements, CLIPS provides optional next element selection according to the previous element in the form of a connected tree. The validation results show that our method achieves better performance than that of existing databases in predicting phenotypic features.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution>Ministry of Science ICT and Future Planning (KR)</institution>
</funding-source>
<award-id>NRF-2012M3A9C4048758</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Lee</surname>
<given-names>Doheon</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>This work was supported by the Bio-Synergy Research Project (NRF-2012M3A9C4048758, 2012M3A9C4048759) of the Ministry of Science and ICT through the National Research Foundation of the Republic of Korea. The funders had no role in study design, data collection, and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="7"/>
<table-count count="5"/>
<page-count count="21"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All relevant data are within the manuscript and its Supporting Information files.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>According to a recent report, the clinical cost per approved new drug has increased to $2.9 billion [<xref ref-type="bibr" rid="pone.0238290.ref001">1</xref>]. Another report also indicated that the global clinical trial market is estimated to be worth $68.9 billion and grow at a compound annual growth rate of 5.7% by 2027, owing to high demand for treatment of chronic diseases [<xref ref-type="bibr" rid="pone.0238290.ref002">2</xref>]. Clinical trial protocols are essential for the ultimate success of trials [<xref ref-type="bibr" rid="pone.0238290.ref003">3</xref>]. Well-established and optimized protocols are warranted to improve adherence to clinical procedures, help avoid unnecessary protocol amendments, facilitate preliminary assessments of latent issues, and reduce the risk of trial failure, thereby contributing to the success of clinical trials [<xref ref-type="bibr" rid="pone.0238290.ref004">4</xref>, <xref ref-type="bibr" rid="pone.0238290.ref005">5</xref>]. Poor study design may lead to constraints on subject participation, dropout, and early termination, resulting in increased cost burdens and loss of efficiency [<xref ref-type="bibr" rid="pone.0238290.ref006">6</xref>, <xref ref-type="bibr" rid="pone.0238290.ref007">7</xref>]. Moreover, it results in protocol amendments, some of which may be avoidable [<xref ref-type="bibr" rid="pone.0238290.ref008">8</xref>]. This has brought about an unprecedented demand for computerized systems designed to assist many stakeholders (i.e., pharmaceutical companies, clinical research organizations, health authorities, ethical committees or institutional review boards, courier vendors, and academic medical centers), aimed at making specific and deliberated choices and developing standardized protocols [<xref ref-type="bibr" rid="pone.0238290.ref009">9</xref>, <xref ref-type="bibr" rid="pone.0238290.ref010">10</xref>]. Although, it is well known that clinical trials should be supported by rigorously developed protocols, there remains limited published evidence or guidance on the development of clinical trial protocols [<xref ref-type="bibr" rid="pone.0238290.ref011">11</xref>]. The majority of protocols still adhere to expert guidelines combined with scientific evidence and expert clinicians' opinions; thus, limiting the amount of knowledge available that can be used to underpin the development of cost-effective and efficient protocols.</p>
<p>The advantage of using computerized systems is that they are quick and allow iterated searches for previous protocols related to the study of interest. In this regard, computerized systems used to retrieve reliable protocols should have a database system as the baseline feature set and an advanced information retrieval method. The database system is a set of systematically managed data repositories [<xref ref-type="bibr" rid="pone.0238290.ref012">12</xref>]. It could include information analysis to find features for accurate search. The information retrieval method consists of query analysis, information analysis, and relevance calculation between query and information [<xref ref-type="bibr" rid="pone.0238290.ref013">13</xref>]. The method we propose in this study addresses only the database system; the information retrieval method is the next step of our study.</p>
<p>From the database system perspective, the previous approaches can be grouped into two categories: expert guidelines and computerized systems. Computerized systems can be subdivided further into database and automated systems.</p>
<p>First, existing expert guidelines help researchers design their own trial protocols [<xref ref-type="bibr" rid="pone.0238290.ref014">14</xref>]. Although referring to expert opinions guarantees the credibility of the protocol design, there are two obvious limitations: the protocol can only be applied if credible guidelines exist in that particular clinical field and not all guidelines offer specific values for all elements of trial design. Consequently, the determination of these specific elements relies on the subjective intuition of individual researchers.</p>
<p>Second, computerized system approaches offer an automated method for designing a protocol. For example, a context-aware architecture has been developed for clinical trial protocol design composed of a decision support module and semantic search engine [<xref ref-type="bibr" rid="pone.0238290.ref015">15</xref>]. Although the idea of constructing an automated system was an innovative approach when it was proposed, this system offers only limited performance. For instance, the idea focuses on creating scientific queries for finding information about a clinical trial protocol and retrieving only related papers through queries. Furthermore, the web-based service is no longer available.</p>
<p>Current state-of-the-art systems are computerized and use a database of clinical trial protocols. These databases contain extensive information on previous clinical trials and clinical experiences covering a wide range of clinical fields [<xref ref-type="bibr" rid="pone.0238290.ref016">16</xref>–<xref ref-type="bibr" rid="pone.0238290.ref018">18</xref>]. Researchers can retrieve information from specific clinical trials according to their purpose. However, current clinical trial databases offer only limited support in searching for clinical trial protocols. The clinical trial database system uses medical subject heading (MeSH) terms but does not cover all of the text in the protocols, preventing a truly semantic search [<xref ref-type="bibr" rid="pone.0238290.ref016">16</xref>]. Moreover, the current databases do not allow structural protocol searches to retrieve context-dependent protocol elements. A biomedical literature database system (PubMed) could be used to search for clinical trial protocols [<xref ref-type="bibr" rid="pone.0238290.ref019">19</xref>]. However, this would not be efficient because additional work would be required to extract the necessary information from the retrieved literature.</p>
<p>To overcome the limitations of the current database systems, we present a clinical trial protocol database system (CLIPS) that enables a semantic search for the core content of clinical trial protocols, along with filterable semantic features and frame structures for the protocols. In detail, we collected essential data and defined the structure of protocols from a public database of clinical trials [<xref ref-type="bibr" rid="pone.0238290.ref020">20</xref>]. We used a text mining pipeline based on Metamap, Moara, and Chemspot to understand the contextual meaning of texts in clinical trial protocols and to increase search accuracy [<xref ref-type="bibr" rid="pone.0238290.ref021">21</xref>–<xref ref-type="bibr" rid="pone.0238290.ref024">24</xref>]. To resolve the difficulty of retrieving specific protocols from a database of complex structures, we developed a graph-based querying system (<xref ref-type="fig" rid="pone.0238290.g001">Fig 1</xref>).</p>
<fig id="pone.0238290.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Overview of CLIPS development process.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0238290.g001.tif" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec002">
<title>Definitions</title>
<p>Clinical trial protocols consist of several elements that can be grouped into factors according to their characteristics [<xref ref-type="bibr" rid="pone.0238290.ref025">25</xref>]. We define the terms “element” and “factor” as follows.</p>
<list list-type="bullet">
<list-item><p>Element: individual items constituting the clinical trial protocol. An element has a value that defines the protocol. For example, “model” is an element, and the value of this element can be “crossover.”</p></list-item>
<list-item><p>Factor: a common characteristic of grouped elements. A factor can have multiple elements. For instance, “model” and “allocation” elements are used to design a protocol. Thus, they belong to the “design” factor. Another example is the “enrollment type” and “gender” elements which determine the subject of a protocol and are part of the “subject” factor.</p></list-item>
</list>
<p>For instance, the “design” factor of a protocol includes 14 elements, and among the elements, the “model” contains 12 values. The values consist of “crossover assignment” to “case-only”, and the “model” element of the “design” factor has one of the values. <xref ref-type="supplementary-material" rid="pone.0238290.s005">S1 Fig</xref> shows the above example.</p>
</sec>
<sec id="sec003">
<title>Related work</title>
<sec id="sec004">
<title>Guidelines</title>
<p>The retrieval of documents containing the contents of a protocol design is one method of determining a clinical research protocol. The document containing the guidelines covers the overall information on clinical research protocols. This is the most basic approach for gathering information to develop a protocol. Chan et al. [<xref ref-type="bibr" rid="pone.0238290.ref003">3</xref>] proposed SPIRIT, which is a high-quality guideline containing 33 checklist items for the development of a clinical trial protocol. Meeker-O’Connell et al. [<xref ref-type="bibr" rid="pone.0238290.ref026">26</xref>] developed a principle document that defines the factors needed to assure patient safety and reliability in a trial. Moreover, some guidelines specify protocols for examining the efficacy of food or food components for specific diseases [<xref ref-type="bibr" rid="pone.0238290.ref027">27</xref>]. For instance, documents describing gut health and immunity, diet-related cancer, and atherosclerosis are included [<xref ref-type="bibr" rid="pone.0238290.ref028">28</xref>–<xref ref-type="bibr" rid="pone.0238290.ref030">30</xref>]. However, guideline-based approaches use subjective judgment in determining the information to be included [<xref ref-type="bibr" rid="pone.0238290.ref031">31</xref>]. This limitation can result in different outcomes depending on the user.</p>
</sec>
<sec id="sec005">
<title>Database systems</title>
<p>Database-based information retrieval systems can be utilized to retrieve clinical protocols. Zarin et al. developed clinicaltrials.gov, the largest database retrieval system, by collecting all published clinical trial documents, including regulatory mandates and a broad group of trial sponsors [<xref ref-type="bibr" rid="pone.0238290.ref018">18</xref>]. Tasneem et al. established and operated a relational database containing all clinical trials registered with clinicaltrials.gov [<xref ref-type="bibr" rid="pone.0238290.ref016">16</xref>]. Furthermore, systems for protocol retrieval use general document retrieval technologies, e.g., PubMed, Scopus, Web of Science, and Google Scholar [<xref ref-type="bibr" rid="pone.0238290.ref032">32</xref>–<xref ref-type="bibr" rid="pone.0238290.ref035">35</xref>]. However, current database-based retrieval systems have limited ability for protocol-specific search objectives, such as retrieving the protocol structure or sequentially selecting context-dependent protocol elements.</p>
</sec>
<sec id="sec006">
<title>Intelligent systems</title>
<p>Intelligent systems are an effective approach for retrieving clinical trial protocols. Tsatsaronis et al. developed an intelligent system based on a context-aware approach for automated protocol design [<xref ref-type="bibr" rid="pone.0238290.ref015">15</xref>]. Their system supports study- and domain-driven searches. Study-driven searches use the parameters (i.e., condition, intervention) of a particular trial as provided by a researcher. In domain-driven searches, a researcher selects options according to the study domain; the system then automatically searches and categorizes the retrieved information. However, a system such as this is currently not accessible. We assume that the author terminated the system.</p>
</sec>
</sec>
<sec id="sec007" sec-type="materials|methods">
<title>Methods</title>
<p>We developed a clinical trial protocol database system. To accomplish the objective, we proposed step-by-step methods including database development, semantic feature generation, and a web-based retrieval system. We constructed the database from a public database of clinical trials and organized essential data to reflect the structure of protocols. Semantic feature generation is a core part of the clinical trial protocol retrieval system. We generated filterable semantic features to offer context-specific searches for the protocols from the original text based on named entity recognition tools. The semantic features consist of phenotypes, genes, and chemical compounds. Finally, we made a web-based protocol retrieval application. Text-based search cannot scrutinize the complex structure of the protocols. Therefore, we devised a graph-based search interface as a query refinement method.</p>
<sec id="sec008">
<title>Database development for a protocol retrieval system</title>
<p>A clinical trial protocol presents the structure of a clinical trial and is composed of various elements that can be clustered into key factors. In this study, we defined five key factors based on a previous baseline research project [<xref ref-type="bibr" rid="pone.0238290.ref025">25</xref>]: design, subject, variables, statistical issues, and descriptions. The design factor determines how the trial is structured and modeled to measure data generated during the trial. The subject factor determines who is eligible to participate in the trial and how they are treated to ensure the generalizability of the target population. The variables are the parameters to be measured to evaluate the efficacy or safety of a drug or treatment. The statistical issues describe how the clinical trial will be analyzed, specifying sampling procedures or statistical significance. Finally, the description factor covers additional information such as the organization, different phases, and additional explanations of the protocol or trial itself.</p>
<p>We selected and clustered elements from Aggregate Analysis of ClinicalTrials.gov (AACT), which was released on March 27, 2015 [<xref ref-type="bibr" rid="pone.0238290.ref016">16</xref>]. To do this, we first downloaded a dump file of the AACT database and completely overhauled the loaded database to obtain 42 tables of 270 columns. Then, we classified the data types into four elements: categorical, value, description, and not union (N/U). Categorical-type elements contain categorical variables; the sequential selection of these elements can determine the protocol structure (<xref ref-type="supplementary-material" rid="pone.0238290.s001">S1 File</xref>). Value-type elements include interval and ratio data that are important values in key factors. Description-type elements contain additional explanatory text, numeric values, and abbreviated words or dates for the description factor. The N/U-type element consists of primary keys, foreign keys, and database management values.</p>
<p>We clustered the categorical and value types into the pre-defined five key factors according to the above-mentioned criteria, including design, subject, variable, and statistical issue factors (<xref ref-type="table" rid="pone.0238290.t001">Table 1</xref>).</p>
<table-wrap id="pone.0238290.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.t001</object-id>
<label>Table 1</label> <caption><title>Key factors and their elements in CLIPS.</title></caption>
<alternatives>
<graphic id="pone.0238290.t001g" mimetype="image" position="float" xlink:href="pone.0238290.t001.tif" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Key factors</th>
<th align="left">Categorical Type Elements</th>
<th align="left">Value Type Elements</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left"><bold>Design</bold></td>
<td align="left">type, model, allocation, time perspective, masking, masked role, primary purpose, endpoint classification, group type, intervention type</td>
<td align="left">number of groups, design group, group label, description, intervention name, intervention other name, intervention desc</td>
</tr>
<tr>
<td align="left"><bold>Subject</bold></td>
<td align="left">enrollment type, gender, is health volunteers, minimum age unit, maximum age unit</td>
<td align="left">enrollment, minimum age, maximum age, study population, target population, criteria</td>
</tr>
<tr>
<td align="left"><bold>Variable</bold></td>
<td align="left">variable group, safety issue, measure type, dispersion, measure type</td>
<td align="left">biospec descry, biospec retention, measure, time frame, description, unit of measure, category title</td>
</tr>
<tr>
<td align="left"><bold>Statistical Issue</bold></td>
<td align="left">sampling method, variable group, dispersion type</td>
<td align="left">population, measure, param type, dispersion type, dispersion value, statistical method, ci percent, ci lower limit, ci upper limit, ci n slides, categorical title, baseline value, spread, lower limit, upper limit</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>We designed a table schema and amassed the data compilation progress. The N/U elements were eliminated because we constructed a relational table with key-value attributes, discarding unnecessary keys and values for database management. The use of key-value attributes makes it possible to search the skeleton structure of a clinical trial protocol efficiently and effectively manage the data storage for deploying inconsistent data [<xref ref-type="bibr" rid="pone.0238290.ref036">36</xref>]. We designed a table schema accounting for these aspects (<xref ref-type="table" rid="pone.0238290.t002">Table 2</xref>). The next step was data compilation. We organized the element values by resolving typographical errors and reflecting dependency structures. Then, we removed some control characters and type conversions. Furthermore, we removed ambiguous design types, which are null, and expanded the access and observations (patient registry) to search for specific clinical trial protocols. As a result, we collected 184,634 clinical trial protocols and their detailed information. The resulting database can be used to optimize query refinement for retrieving protocol information.</p>
<table-wrap id="pone.0238290.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.t002</object-id>
<label>Table 2</label> <caption><title>Table schema for clinical trial protocol.</title></caption>
<alternatives>
<graphic id="pone.0238290.t002g" mimetype="image" position="float" xlink:href="pone.0238290.t002.tif" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Column Name</th>
<th align="left">Column Type</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">'ClinicalTrialID</td>
<td align="left">Char(10)</td>
</tr>
<tr>
<td align="left">Design</td>
<td align="left">JSON</td>
</tr>
<tr>
<td align="left">Subject</td>
<td align="left">JSON</td>
</tr>
<tr>
<td align="left">Variable</td>
<td align="left">JSON</td>
</tr>
<tr>
<td align="left">Statistical Issue</td>
<td align="left">JSON</td>
</tr>
<tr>
<td align="left">Description</td>
<td align="left">JSON</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
</sec>
<sec id="sec009">
<title>Semantic filtering feature generation</title>
<p>Although we developed a clinical trial protocol database by using a frame structure for all protocols to have a similar structure, frame structure similarity does not guarantee similarity of the detailed protocol content. MeSH offers a potential solution and is used for indexing and cataloging clinical trials in ClinicalTrials.gov and AACT [<xref ref-type="bibr" rid="pone.0238290.ref016">16</xref>, <xref ref-type="bibr" rid="pone.0238290.ref027">27</xref>]. However, MeSH has limited coverage that does not extend across the spectrum of various biomedical terminologies [<xref ref-type="bibr" rid="pone.0238290.ref037">37</xref>]. To solve this limitation, various biomedical semantic features were extracted to find or filter similar clinical trials in the structure being searched.</p>
<p>We generated filterable semantic features related to the conditions and interventions that are considered significant in clinical trials, resulting in a subdivided semantic similarity search. The condition was a phenotype, including any diseases and disorders, observed during clinical trials as well as reported symptoms. The disease-specific phenotype is a set of observable characteristics. Drugs commonly refer to interventions that are the focus of clinical trials; they can involve chemical compounds [<xref ref-type="bibr" rid="pone.0238290.ref038">38</xref>]. Similar clinical trials can be searched for or filtered using each of the corresponding elements. In addition, the identification of similar target genes or proteins is a potential method of searching for similarities among chemical compounds and phenotypes as they are a molecular proxy that links them [<xref ref-type="bibr" rid="pone.0238290.ref039">39</xref>, <xref ref-type="bibr" rid="pone.0238290.ref040">40</xref>]. Thus, we applied named entity recognition (NER) to the phenotypes, chemical compounds, and genes to enable a semantic search, for which semantic filters were employed for the following description elements: brief title, official title, brief summary, detailed description, keywords, and conditions (<xref ref-type="fig" rid="pone.0238290.g002">Fig 2</xref>).</p>
<fig id="pone.0238290.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Generation of data for semantic search.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0238290.g002.tif" xlink:type="simple"/>
</fig>
<sec id="sec010">
<title>Phenotype</title>
<p>We extracted semantic features to represent disease-specific phenotype terminology. The unified medical language system (UMLS) is a repository of integrated biomedical terminologies, and thus, we used UMLS2015AB to process phenotype words [<xref ref-type="bibr" rid="pone.0238290.ref037">37</xref>]. To employ NER on descriptive values, we applied Metamap 2016 and cTakes 3.2.2 [<xref ref-type="bibr" rid="pone.0238290.ref041">41</xref>, <xref ref-type="bibr" rid="pone.0238290.ref042">42</xref>]. We combined each result and removed duplicates using the above-mentioned tools to synthesize the advantages [<xref ref-type="bibr" rid="pone.0238290.ref043">43</xref>]. Next, we selected 15 semantic types, which are considered disease phenotypic types, and removed the other types from the results (<xref ref-type="table" rid="pone.0238290.t003">Table 3</xref>). As a result, disease phenotypic features with unique concept IDs were generated for each clinical trial.</p>
<table-wrap id="pone.0238290.t003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.t003</object-id>
<label>Table 3</label> <caption><title>Selected phenotypic types from UMLS.</title></caption>
<alternatives>
<graphic id="pone.0238290.t003g" mimetype="image" position="float" xlink:href="pone.0238290.t003.tif" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Entity Type ID (TUI)</th>
<th align="left">Entity Type Name</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">T038</td>
<td align="left">Biologic Function</td>
</tr>
<tr>
<td align="left">T039</td>
<td align="left">Physiologic Function</td>
</tr>
<tr>
<td align="left">T041</td>
<td align="left">Mental Process</td>
</tr>
<tr>
<td align="left">T019</td>
<td align="left">Congenital Abnormality</td>
</tr>
<tr>
<td align="left">T020</td>
<td align="left">Acquired Abnormality</td>
</tr>
<tr>
<td align="left">T033</td>
<td align="left">Finding</td>
</tr>
<tr>
<td align="left">T034</td>
<td align="left">Laboratory or Test Result</td>
</tr>
<tr>
<td align="left">T046</td>
<td align="left">Pathologic Function</td>
</tr>
<tr>
<td align="left">T047</td>
<td align="left">Disease or Syndrome</td>
</tr>
<tr>
<td align="left">T048</td>
<td align="left">Mental or Behavioral Dysfunction</td>
</tr>
<tr>
<td align="left">T049</td>
<td align="left">Cell or Molecular Dysfunction</td>
</tr>
<tr>
<td align="left">T184</td>
<td align="left">Sign or Symptom</td>
</tr>
<tr>
<td align="left">T190</td>
<td align="left">Anatomical Abnormality</td>
</tr>
<tr>
<td align="left">T191</td>
<td align="left">Neoplastic Process</td>
</tr>
<tr>
<td align="left">T037</td>
<td align="left">Injury or Poisoning</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
</sec>
<sec id="sec011">
<title>Chemical compound</title>
<p>We applied NER to chemical compound entities from the descriptions given by ChemSpot [<xref ref-type="bibr" rid="pone.0238290.ref044">44</xref>]. ChemSpot provides Chemical Abstract Service (CAS) IDs and International Chemical Identifiers (InChI) but does not provide standard InChIKeys. The InChIKey is the compacted version of InChI, and the standard InChIKey is a stable identifier for reflecting the identifier version designation [<xref ref-type="bibr" rid="pone.0238290.ref045">45</xref>]. Moreover, standard InChIKeys are considered to provide equivalent descriptions between compounds in drug discovery [<xref ref-type="bibr" rid="pone.0238290.ref046">46</xref>]. To take advantage of the standard InChIKeys for chemical compound entities, we examined the original words of the NER-processed entities by using ChemSpider [<xref ref-type="bibr" rid="pone.0238290.ref023">23</xref>]. ChemSpider’s simple application programming interface (API) was an advantage that allowed us to generate chemical compound entities with standard InChIKeys, InChIs, and the simplified molecular-input line-entry system notation.</p>
</sec>
</sec>
<sec id="sec012">
<title>Gene</title>
<p>We appended gene entities in the elements of semantic filters. The gene annotation tool, Moara, was used for gene NER, considering that Moara can perform both recognition and normalization of gene entities, recognizing entities and their positions in the input text, and linking the entities to gene IDs in a known gene database [<xref ref-type="bibr" rid="pone.0238290.ref047">47</xref>]. Moara provides various preconstructed machine learning models for certain organism species. For our task, we adopted the human-oriented model. For gene normalization, we obtained lists of gene IDs corresponding to each gene entity. The gene ID with the highest score was selected and mapped to the recognized gene term.</p>
</sec>
<sec id="sec013">
<title>Web application development for query refinement</title>
<p>The structures of clinical trial protocols have become increasingly complex [<xref ref-type="bibr" rid="pone.0238290.ref048">48</xref>]. The complexity of the protocol level is inversely related to clinical trial performance, as complex protocols negatively impact factors such as protocol amendment rates, patient recruitment, and retention rates [<xref ref-type="bibr" rid="pone.0238290.ref049">49</xref>]. In addition, the increasing complexity of the protocols hinders the design of new protocols because clinicians referring to previous clinical trials to design a protocol inevitably face difficulties in searching for suitable examples. Thus, from a query refinement standpoint, we developed the CLIPS web application to provide a graph querying interface for retrieving information about reliable clinical trial protocols, rather than a text querying interface that cannot visualize the dependency among prior elements affecting protocol structure [<xref ref-type="bibr" rid="pone.0238290.ref050">50</xref>, <xref ref-type="bibr" rid="pone.0238290.ref051">51</xref>].</p>
<p>We defined categorical-type elements as the frame structures of protocols. Although we have provided default orders of the elements, the user is free to choose the order. Once a decision about the order has been made, the user can find varying combinations of protocol elements via the graph-based search interface. Dependent elements are retrieved from the database in real-time, and the user can confirm the number of existing protocols corresponding to selected elements. The user also can search for other protocol frame structures after clipping the selected structure to the user clip pane. To search for complete information about the selected protocol structures, the user must click on a clipped protocol to examine the details of the selected protocol and trial information. Furthermore, the user can add semantic filters when searching for complete information to reduce the search scope or focus on certain areas of clinical trials. The data flow of the interface is illustrated in <xref ref-type="fig" rid="pone.0238290.g003">Fig 3</xref>.</p>
<fig id="pone.0238290.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Example of background data-flow on CLIPS from a research question in a clinical trial.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0238290.g003.tif" xlink:type="simple"/>
</fig>
<p>The backend of the interface was developed using Node.js [<xref ref-type="bibr" rid="pone.0238290.ref052">52</xref>], and the visualization of the interface was manipulated by d3.js [<xref ref-type="bibr" rid="pone.0238290.ref053">53</xref>]. We developed custom functions on d3.js to present each element title and protocol count for the user’s selection. To implement semantic filtering of the user’s free-text input, a backend engine was connected to the representational state transfer (REST) NER API. This provided NER of processed entities and types, allowing relevant entities to be searched for in the database. We designed the system architecture to combine the interface application, APIs, and database for stable operation in a cloud-computing environment (<xref ref-type="supplementary-material" rid="pone.0238290.s005">S1 Fig</xref>).</p>
</sec>
</sec>
<sec id="sec014" sec-type="results">
<title>Results</title>
<sec id="sec015">
<title>Database</title>
<p>We collected 184,634 clinical trial protocols and the frame structures of 13,210 clinical trial protocols from which we extracted 5,765,054 phenotypes, 1,151,053 chemical compounds, and 222,966 gene features for semantic filtering (<xref ref-type="table" rid="pone.0238290.t004">Table 4</xref>). Furthermore, we designed a continuous process of data update so that the protocol methods could evolve naturally, thus enhancing the quality of the database (<xref ref-type="supplementary-material" rid="pone.0238290.s006">S2 Fig</xref>). In conclusion, we developed a database system that efficiently retrieves information about existing clinical trial protocols for use in designing new clinical trial protocols.</p>
<table-wrap id="pone.0238290.t004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.t004</object-id>
<label>Table 4</label> <caption><title>Table schema for clinical trial protocol.</title></caption>
<alternatives>
<graphic id="pone.0238290.t004g" mimetype="image" position="float" xlink:href="pone.0238290.t004.tif" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Entity</th>
<th align="left">Rows (Unique)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Clinical Research Protocol</td>
<td align="left">184,634 (184,634)</td>
</tr>
<tr>
<td align="left">Frame Structures</td>
<td align="left">13,210 (13,210)</td>
</tr>
<tr>
<td align="left">Phenotype</td>
<td align="left">5,765,054 (18,438)</td>
</tr>
<tr>
<td align="left">Chemical Compounds</td>
<td align="left">1,151,053 (12,792)</td>
</tr>
<tr>
<td align="left">Genes</td>
<td align="left">222,966 (4,705)</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
</sec>
<sec id="sec016">
<title>Application</title>
<p>The web application provides a service for retrieving protocol structures and inquiring about protocol information. The user traverses four stages when using this service: (1) setting the order of the protocols; (2) designing the protocol structure by selecting the elements that correspond to each sequence; (3) setting various functions required to search for the desired protocol information; and (4) providing the desired protocol information to explore the contents in detail. We developed the necessary interfaces to perform all of these processes (<xref ref-type="fig" rid="pone.0238290.g004">Fig 4</xref>).</p>
<fig id="pone.0238290.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.g004</object-id>
<label>Fig 4</label>
<caption>
<title>System overview.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0238290.g004.tif" xlink:type="simple"/>
</fig>
<p>Before retrieving the protocol structure, the user needs to define the protocol sequence. This process uses a drag-and-drop interface to sort the list into one box and set the order; it allows users to work more intuitively [<xref ref-type="bibr" rid="pone.0238290.ref054">54</xref>]. After determining the protocol sequence, the vector-based collapsible and zoomable tree diagram visualization interface, which is an uncomplicated tool, is used for navigating the protocol structures with the selected elements [<xref ref-type="bibr" rid="pone.0238290.ref055">55</xref>, <xref ref-type="bibr" rid="pone.0238290.ref056">56</xref>]. The loaded protocol data are assembled into a hierarchical data structure. This visualization is rendered as a relation tree with a parent/child structure. The user clicks on the edge of the tree to add the next-step protocol. Conversely, to remove protocol edges from the current stage, the user must click on the parent edge of the previous step. The entire data structure is synchronized and updated every time the process occurs [<xref ref-type="bibr" rid="pone.0238290.ref057">57</xref>].</p>
<p>After the protocol structure has been retrieved, the user obtains protocol information based on the selected protocol structure. We developed a function called Clip to back up the protocol design. This allows the user to reuse previously selected protocol structures and receive corresponding study information. In addition, a protocol-information-filter function allows the user to retrieve the study information. The user searches for a disease and generates a label that contains the disease code. The protocol information is then filtered according to the set label. The resulting data are rendered as a table, which can be sorted with respect to the column entities to focus on and export specific data. When exploring detailed protocol information, our system transforms the data into a collapsible interface instead of providing raw text.</p>
<p>Although protocol design concepts have evolved globally, the development of tools to design clinical trial protocols is trivial [<xref ref-type="bibr" rid="pone.0238290.ref058">58</xref>–<xref ref-type="bibr" rid="pone.0238290.ref060">60</xref>]. Our aim was to simplify clinical trial retrieval and the design stage by developing a dedicated interface. We expect this to be the starting point for the creation, sharing, and development of more clinical trial protocols.</p>
</sec>
</sec>
<sec id="sec017">
<title>Validation</title>
<sec id="sec018">
<title>Technical validation</title>
<p>The goal of CLIPS was to provide a database system for information retrieval method that can search complex clinical trial protocols. To achieve this, we developed a search tool that can build and utilize a database suitable for protocol structures. Furthermore, we created semantic features in CLIPS using text-mining methods. As a result, it was possible to perform accurate searches using the protocol’s contextual meaning. To evaluate the performance of CLIPS, we attempted to verify whether the semantic filters perform better than a keyword search.</p>
<p>For technically validating the CLIPS’ semantic filter we used the relational information between clinical trial protocols and corresponding disease conditions as collected from clinicaltrials.gov [<xref ref-type="bibr" rid="pone.0238290.ref018">18</xref>]. As this disease condition assignment is manually curated by experts and does not originate from the protocol itself, it can be used as a gold standard to evaluate the semantic filters of CLIPS.</p>
<p>The gold standard set of disease conditions and corresponding trial protocols was obtained by crawling the topic page of clinicaltrials.gov [<xref ref-type="bibr" rid="pone.0238290.ref018">18</xref>]. Among 25 conditional categories provided by clinicaltrials.gov, the “Cancers and Other Neoplasms” condition category was selected as the first set because it covers 44.74% of the total protocol set. Consequently, the corresponding trial protocols and corresponding disease conditions were identified. For instance, in our gold standard set, 353 distinct protocols were associated with the disease condition “Abdominal neoplasm.” As a result, a set of 82,584 distinct protocols corresponding to 520 disease conditions was compiled (as of July 12, 2017) and used as a gold standard set for technical validation. In addition, we expanded the gold standard set to the rest of the 24 categories for sufficient validation. The expanded set has 289,956 distinct protocols corresponding to 6,172 disease conditions that were compiled (as of June 21, 2020). However, the collected set showed a 2.4% protocol loss from clinicaltrials.gov. This was occasioned by server overload during data crawling, and we omitted the loss.</p>
<p>The semantic search performance of CLIPS was validated using the following procedure. In CLIPS, search keywords that contained the disease condition’s nomenclature were supplied as input queries to the system. The semantic entities were translated from the search keyword through the text-mining-based models described in the previous section. The results were obtained by conducting a search using the translated semantic entities from the CLIPS database. Exact matching with the AACT database was used as a baseline. CLIPS and AACT databases were configured on a single local server.</p>
<p>We used a condition name (e.g., Adrenocortical Carcinoma) as a search keyword to retrieve the condition field of the source database and semantic entities (e.g., C0206686) from CLIPS. We then validated our retrievals by comparing the number of retrieved identifiers (NCTID) of protocols (<xref ref-type="supplementary-material" rid="pone.0238290.s002">S2 File</xref>) [<xref ref-type="bibr" rid="pone.0238290.ref061">61</xref>] and calculated the precision, recall, and F1-score of the results. Precision refers to how accurately the model is categorized and is calculated as by the ratio of properly categorized data to the total data (1). Recall is the number of positively classified data divided by the original number of positive data (2). The F1-Score is a harmonic mean that considers the complementary characteristics of precision and recall (3); it is commonly used to compare the performance of different information retrieval systems [<xref ref-type="bibr" rid="pone.0238290.ref062">62</xref>]. True positive + false positive is the count of all NCTIDs retrieved from each database. True positive is the count of the retrieved NCTIDs of each database intersected with the gold standard. True positive+ false negative is the total number of NCTIDs in the gold standard corresponding to each input condition name.</p>
<disp-formula id="pone.0238290.e001">
<alternatives>
<graphic id="pone.0238290.e001g" mimetype="image" position="anchor" xlink:href="pone.0238290.e001.tif" xlink:type="simple"/>
<mml:math display="block" id="M1">
<mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">u</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mspace width="0.25em"/><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">v</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">u</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mspace width="0.25em"/><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">v</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mo>+</mml:mo><mml:mi mathvariant="normal">f</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mspace width="0.25em"/><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">v</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:mrow></mml:mfrac>
</mml:math>
</alternatives>
<label>(1)</label>
</disp-formula>
<disp-formula id="pone.0238290.e002">
<alternatives>
<graphic id="pone.0238290.e002g" mimetype="image" position="anchor" xlink:href="pone.0238290.e002.tif" xlink:type="simple"/>
<mml:math display="block" id="M2">
<mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">u</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mspace width="0.25em"/><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">v</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">u</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mspace width="0.25em"/><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">v</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mo>+</mml:mo><mml:mi mathvariant="normal">f</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mspace width="0.25em"/><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">g</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">v</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:mrow></mml:mfrac>
</mml:math>
</alternatives>
<label>(2)</label>
</disp-formula>
<disp-formula id="pone.0238290.e003">
<alternatives>
<graphic id="pone.0238290.e003g" mimetype="image" position="anchor" xlink:href="pone.0238290.e003.tif" xlink:type="simple"/>
<mml:math display="block" id="M3">
<mml:mi mathvariant="normal">F</mml:mi><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:mi mathvariant="normal">S</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:mo>*</mml:mo><mml:mo>(</mml:mo><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mo>*</mml:mo><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mo>+</mml:mo><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mfrac>
</mml:math>
</alternatives>
<label>(3)</label>
</disp-formula>
<p>The CLIPS’ F1-Score of CLIPS (0.515) was higher than that of the keyword search (0.38) (<xref ref-type="fig" rid="pone.0238290.g005">Fig 5A</xref>). The precision of CLIPS was 0.437, which was slightly lower than that of the keyword search (0.668), but it outperformed the keyword search by more than a factor of two in terms of recall (0.63 and 0.26, respectively). In the expanded conditional categories, the F1-Score of CLIPS (0.55) was higher than that of the keyword search (0.12) (<xref ref-type="fig" rid="pone.0238290.g005">Fig 5B</xref>). The precision of CLIPS was 0.44, which was slightly higher than that of the keyword search (0.39), but it also outperformed the keyword search by more than a factor of two in terms of recall (0.38 and 0.08, respectively). As higher recall values are a positive factor in clinical trial design, CLIPS can retrieve more protocols that provide more suitable references for protocol design.</p>
<fig id="pone.0238290.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.g005</object-id>
<label>Fig 5</label>
<caption>
<title/>
<p>Evaluation results of keyword search and using semantic filter of CLIPS (a) Cancer and Other Neoplasms (b) The average values of the expanded 24 conditional categories.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0238290.g005.tif" xlink:type="simple"/>
</fig>
<p><xref ref-type="fig" rid="pone.0238290.g006">Fig 6</xref>. shows the detailed scores of each expanded conditional category. It also shows that recall scores of each disease condition belong to a conditional category depicted as an area graph. Labels of the x-axis on the area graph are the initial character of each disease condition.</p>
<fig id="pone.0238290.g006" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.g006</object-id>
<label>Fig 6</label>
<caption>
<title>The detailed results of the keyword search and using the semantic filter of CLIPS on the expanded conditional categories.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0238290.g006.tif" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec019">
<title>User experience</title>
<p>As described earlier, the CLIPS search system was developed for different purposes than those of the existing clinical trial search systems. CLIPS is intended to assist in the effective design of a specific trial protocol, whereas existing search systems are generally used to process a variety of information on clinical trials. Therefore, to evaluate the performance of CLIPS, an evaluation method that reflects this purpose should be constructed. As the ultimate purpose of a search system is to help users collect the information they require, user’s satisfaction is a significant if subjective, measure of performance. Thus, the evaluation of a search system should be able to quantify the subjective impressions of users as well as objective indicators. By considering these factors, we conducted an evaluation trial that compared CLIPS with the conventional search system provided by clinicaltrials.gov.</p>
<p>Ten participants aged between 24 and 33 were recruited from a group of experts in the field of bioinformatics (Bio and Brain Engineering Department of Korea Advanced Institute of Science and Technology, Republic of Korea) They included two undergraduates, three master’s students, four Ph.D. candidates, and one postdoctoral researcher. All participants provided informed consent before conducting the evaluation trial. The participants were assigned the task of finding a suitable clinical trial set for a simulated problem. Two separate tasks were given to the participants, who were asked to construct the most common previous protocol design under the given trial conditions and research questions and perform each task using each of the two search systems within the time limit (5 min each). To construct the most common trial design, participants had to collect information about the various elements of the trial protocol (<xref ref-type="fig" rid="pone.0238290.g007">Fig 7</xref>). After the task, participants completed a questionnaire on their subjective satisfaction regarding the system. The questionnaire consisted of six questions that inquired about the participants’ satisfaction using a 7-point Likert scale [<xref ref-type="bibr" rid="pone.0238290.ref063">63</xref>].</p>
<fig id="pone.0238290.g007" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.g007</object-id>
<label>Fig 7</label>
<caption>
<title>Tasks given to participants for evaluation.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="pone.0238290.g007.tif" xlink:type="simple"/>
</fig>
<p>Participants were observed to perform better when using CLIPS than when using the clinicaltrials.gov search system. By using CLIPS, participants obtained more answers within the time limit, and the average time required to perform the task was shorter than that with clinicaltrials.gov. The number of clinical trials retrieved from CLIPS was less than that from clinicaltrials.gov because it was possible to apply more detailed search filters to narrow the search scope. Participants were more satisfied with CLIPS than with the existing search systems, as evidenced by the average score of the questionnaire responses (<xref ref-type="table" rid="pone.0238290.t005">Table 5</xref>). These results show that CLIPS can be effectively used to retrieve certain types of trial protocols.</p>
<table-wrap id="pone.0238290.t005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0238290.t005</object-id>
<label>Table 5</label> <caption><title>Evaluation results.</title></caption>
<alternatives>
<graphic id="pone.0238290.t005g" mimetype="image" position="float" xlink:href="pone.0238290.t005.tif" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left" rowspan="2">Measure (n = 10)</th>
<th align="center" colspan="2">CLIPS</th>
<th align="center" colspan="2">clinicaltrials.gov</th>
</tr>
<tr>
<th align="center">Task1</th>
<th align="center">Task2</th>
<th align="center">Task1</th>
<th align="center">Task2</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">Answer submitted, %</td>
<td align="center">100.0</td>
<td align="center">100.0</td>
<td align="center">12.0</td>
<td align="center">17.14</td>
</tr>
<tr>
<td align="center">Elapsed time(minutes), mean(SD)</td>
<td align="center">4.05 (0.86)</td>
<td align="center">5.0 (0)</td>
<td align="center">3.95 (0.89)</td>
<td align="center">5.0 (0)</td>
</tr>
<tr>
<td align="center">Count of retrieved trials by search, mean</td>
<td align="center">137</td>
<td align="center">179</td>
<td align="center">1,591</td>
<td align="center">8,484</td>
</tr>
<tr>
<td align="center">How much do you think the retrieved result is suitable for the task? (range 1–7), mean (SD)</td>
<td align="center" colspan="2">6.8 (0.18)</td>
<td align="center" colspan="2">2.2(1.07)</td>
</tr>
<tr>
<td align="center">How much do you think you have had enough time to perform the task? (range 1–7), mean (SD)</td>
<td align="center" colspan="2">6.6 (0.27)</td>
<td align="center" colspan="2">1.2 (0.18)</td>
</tr>
<tr>
<td align="center">How difficult do you think it was to perform the task? (range 1–7), mean (SD)</td>
<td align="center" colspan="2">1.6 (0.93)</td>
<td align="center" colspan="2">6.2(1.07)</td>
</tr>
<tr>
<td align="center">How much do you trust the search result? (range 1–7), mean (SD)</td>
<td align="center" colspan="2">6.2 (0.18)</td>
<td align="center" colspan="2">3.6 (3.6)</td>
</tr>
<tr>
<td align="center">How satisfied are you with your answers based on your search results? (range 1–7), mean (SD)</td>
<td align="center" colspan="2">6.4 (0.27)</td>
<td align="center" colspan="2">1.8 (0.84)</td>
</tr>
<tr>
<td align="center">How satisfied are you with the search system? (range 1–7), mean (SD)</td>
<td align="center" colspan="2">6.5 (0.5)</td>
<td align="center" colspan="2">2.3 (0.68)</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
</sec>
</sec>
<sec id="sec020" sec-type="conclusions">
<title>Discussion</title>
<p>Clinical trial protocols are the foundation for planning, approving, conducting, and reporting clinical trials [<xref ref-type="bibr" rid="pone.0238290.ref003">3</xref>]. They include general information, objectives, trial design, the selection and withdrawal of subjects, treatment, safety assessments, quality control procedures, and record-keeping processes [<xref ref-type="bibr" rid="pone.0238290.ref064">64</xref>]. This study aimed to develop an efficient system of providing the information necessary for clinical trial protocol development. In particular, we have made it possible to find previous protocols of the desired type using the structural features of the protocol composed of context-dependent protocol elements. Furthermore, semantic filtering was included to ensure the retrieval of relevant protocol context information.</p>
<p>CLIPS can search for protocols or specific disease names and structures. In addition, CLIPS can be used for a combination of structural searches, structural order searches, semantic searches, or searches including both structure and semantic context. For instance, our system can perform the following functions:</p>
<p>We suppose that a user has a plan to develop a clinical trial protocol about cardiovascular infections, and the user will perform an observational study and should decide the sampling method.</p>
<list list-type="order">
<list-item><p>Input 'Cardiovascular infections' to the semantic filtering input box.</p></list-item>
<list-item><p>Order elements as follows: 1. Type, 2. Sampling method. The system orders the rest of the elements automatically.</p></list-item>
<list-item><p>Click 'complete' on the protocol order interface.</p></list-item>
<list-item><p>Select 'node' on the graph-based search interface.</p></list-item>
<list-item><p>Click 'type' and then wait until the next element result is provided.</p></list-item>
<list-item><p>Click 'observational', and then, the user can find 'Non-probability sample' is the sampling method to the specific disease.</p></list-item>
<list-item><p>Click 'Clips' below the graph-based search interface</p></list-item>
<list-item><p>Click clipped protocol structure, and then, the system provides detailed information of 6 searched protocols.</p></list-item>
<list-item><p>User can follow or download more detailed protocol information based on a detailed description of the searched protocols from the explanation interface such as model, enrollment type, time perspective, and outcome variables.</p></list-item>
</list>
<p>In accordance with the above results, we believe that many clinicians will be able to utilize our system to design more reliable clinical trial protocols. The developed semantic filter can be used to search for protocols and can be used for drug discovery using the retrieved protocols. CLIPS provides search results as a downloadable file containing the semantic filters as well as protocol structure information. This ensures wide protocol search coverage. For example, we used UMLS as a phenotype semantic filter. UMLS is an integrated terminology system that combines biomedical terminologies including SNOMED-CT, MeSH, and MedDRA [<xref ref-type="bibr" rid="pone.0238290.ref065">65</xref>]. Clinical terms in SNOMED CT have been integrated into the UMLS metathesaurus since 2003 [<xref ref-type="bibr" rid="pone.0238290.ref066">66</xref>]. For instance, coverage of the HPO term is higher than in SNOMED CT [<xref ref-type="bibr" rid="pone.0238290.ref067">67</xref>]. According to Bodenreider’s study, UMLS covered 54% of the HPO phenotype terms, whereas SNOMED CT covered only 30% [<xref ref-type="bibr" rid="pone.0238290.ref037">37</xref>]. Based on the semantic filter, researchers can screen chemical compounds of drug candidate substances, regardless of whether they are known to be effective, from the previously retrieved protocols [<xref ref-type="bibr" rid="pone.0238290.ref068">68</xref>]. Gene and phenotype can also be used for drug efficacy screening or efficacious drug combinations in massive biological networks using a similar approach [<xref ref-type="bibr" rid="pone.0238290.ref069">69</xref>–<xref ref-type="bibr" rid="pone.0238290.ref071">71</xref>]. Furthermore, users can download the entire database. The efficacy weight of edges can then be predicted, the predicted pathways validated in large-scale biological networks, and be utilized to find their maximum therapeutic benefits [<xref ref-type="bibr" rid="pone.0238290.ref072">72</xref>, <xref ref-type="bibr" rid="pone.0238290.ref073">73</xref>].</p>
<p>The present study is limited in terms of user-experience validation. This is because clinicaltrials.gov and CLIPS have different objectives. Clinicaltrials.gov was developed to register clinical trials [<xref ref-type="bibr" rid="pone.0238290.ref018">18</xref>]. The registered information can be retrieved by clinical researchers, patients, and families of patients. This is a different objective from that of CLIPS, which is specially designed for the protocol search task. As the objective is different, the method is different. Therefore, it cannot be claimed that the clinicaltrials.gov offers worse performance than CLIPS, although the user-experience validation experiment showed a low score for use of clinicaltrials.gov. The satisfaction score of CLIPS is high in terms of the objective of protocol searching. If clinicaltrials.gov were to include the search function of CLIPS, it would offer clinical researchers the ability to search comprehensively for specific information.</p>
<p>Moreover, this study is the baseline step of a computerized clinical protocol development system. In future work, it would be promising to define the comprehensive similarity among the protocols and rank them based on semantic vector representations of each data feature of the protocols [<xref ref-type="bibr" rid="pone.0238290.ref074">74</xref>]. To validate the result, ranking evaluation models on information retrieval, such as MRR (mean reciprocal rank), nDCG (normalizing discounted cumulative gain), MAP (mean average precision) or Top-Kr recall methods should be used based on a consensus on the rank criteria [<xref ref-type="bibr" rid="pone.0238290.ref075">75</xref>].</p>
</sec>
<sec id="sec021" sec-type="conclusions">
<title>Conclusion</title>
<p>Clinical trial protocols are crucial for clinical trials to achieve their primary purposes. However, clinical researchers tend to design clinical trials according to their expertise. This type of bias or ambiguity may lead to inconsistencies and objectivity problems when clinical trial protocols are designed. To solve these problems, an information retrieval system for clinical trial protocols is needed. In this study, we developed a clinical trial protocol database system and a web application. The database contains design, subject, variable, statistical issue, description, and structure of clinical trial protocols. The web-based system provides a graph-based search interface based on the structure; users can then find relevant information on a protocol from the database. Furthermore, the database also includes semantic features to assist the context-specific protocol search. Unlike the previous clinical trial database system, our system has the following two main strengths: (i) it provides structural information to present simplified element-wise selection and (ii) extends the search field based on filterable semantic features to do a context-specific search for clinical trial protocols. We believe that CLIPS will be a major resource for clinical trials and will be of interest to clinicians and pharmaceutical companies or even regulatory agencies by providing information about clinical trial protocols, conveniently. This study has described the formulation of the CLIPS database system and explained its implementation and advantages over existing keyword-based search systems.</p>
<p>The whole database is available for download (<ext-link ext-link-type="uri" xlink:href="http://corus.kaist.edu/clips" xlink:type="simple">http://corus.kaist.edu/clips</ext-link>).</p>
</sec>
<sec id="sec022">
<title>Supporting information</title>
<supplementary-material id="pone.0238290.s001" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="pone.0238290.s001.xlsx" xlink:type="simple">
<label>S1 File</label>
<caption>
<title>Supplementary Data 1.</title>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0238290.s002" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="pone.0238290.s002.xlsx" xlink:type="simple">
<label>S2 File</label>
<caption>
<title>Supplementary Data 2.</title>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0238290.s003" mimetype="text/csv" position="float" xlink:href="pone.0238290.s003.csv" xlink:type="simple">
<label>S3 File</label>
<caption>
<title>Supplementary Data 3.</title>
<p>(CSV)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0238290.s004" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="pone.0238290.s004.docx" xlink:type="simple">
<label>S4 File</label>
<caption>
<title>Supplementary File.</title>
<p>(DOCX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0238290.s005" mimetype="application/pdf" position="float" xlink:href="pone.0238290.s005.pdf" xlink:type="simple">
<label>S1 Fig</label>
<caption>
<title>Example of factor, element, and value in the definition section.</title>
<p>The “design” factor of a protocol includes elements, and among the elements, the “model” contains values. <italic>E</italic><sub><italic>n</italic></sub> is the number of elements in a factor; <italic>V</italic><sub><italic>n</italic></sub> is the number of values in an element. For example, The values consist of “Crossover Assignment” to “Case-only”, and the “model” element of the “design” factor has one of the values.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0238290.s006" mimetype="application/pdf" position="float" xlink:href="pone.0238290.s006.pdf" xlink:type="simple">
<label>S2 Fig</label>
<caption>
<title>CLIPS service architecture on Amazon web service.</title>
<p>Domain name service (DNS) uses the KAIST domain server to use kaist.edu. A user accesses the CLIPS service through the DNS. When accessing CLIPS through the DNS, the interface elastic compute cloud (EC2, <ext-link ext-link-type="uri" xlink:href="https://aws.amazon.com/ec2/" xlink:type="simple">https://aws.amazon.com/ec2/</ext-link>) is called, and it displays a screen to the user. Interface EC2 connects to API Engine EC2 to process the data requested by the user. If the user uses a semantic filter, API engine EC2 transfers the input value of the user to text mining EC2, and then, it receives the result. Particularly, text mining EC2 is composed of Metamap<sup>1</sup>, Moara<sup>2</sup>, and Chemspot<sup>3</sup>, Dockers<sup>4</sup>, which we customize for our service in the elastic container service (ECS, <ext-link ext-link-type="uri" xlink:href="https://aws.amazon.com/ecs/" xlink:type="simple">https://aws.amazon.com/ecs/</ext-link>) group. To provide the data requested by the user, API engine EC2 receives the searched result from the CLIPS relational database service (RDS, <ext-link ext-link-type="uri" xlink:href="https://aws.amazon.com/rds/" xlink:type="simple">https://aws.amazon.com/rds/</ext-link>) in which the clinical trial protocol data are stored, and it transfers the result to interface EC2. Furthermore, we use elastic load balancing (ELB, <ext-link ext-link-type="uri" xlink:href="https://aws.amazon.com/elasticloadbalancing/" xlink:type="simple">https://aws.amazon.com/elasticloadbalancing/</ext-link>) for stable service traffic control, and ELB is required to make requests for the EC2 groups that are grouped into the auto scaling group (<ext-link ext-link-type="uri" xlink:href="https://aws.amazon.com/ec2/autoscaling/" xlink:type="simple">https://aws.amazon.com/ec2/autoscaling/</ext-link>).</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0238290.s007" mimetype="application/eps" position="float" xlink:href="pone.0238290.s007.eps" xlink:type="simple">
<label>S3 Fig</label>
<caption>
<title>Continuous data update flowchart of CLIPS.</title>
<p>The DB Checker, a database change detection function based on the Quartz job scheduler (<ext-link ext-link-type="uri" xlink:href="http://www.quartz-scheduler.org/" xlink:type="simple">http://www.quartz-scheduler.org</ext-link>) built in the CLIPS API engine, operates as follows: (1) DB checker detects whether the source database is changed or not. (2) DB Checker detects whether schema of the database is changed or not. (3) Schema change (3–1). In case of schema change, the program cannot process it automatically. Therefore, it needs to analyze the data manually. The DB checker sends an update notification email to the CLIPS developers using the AWS simple notification service (<ext-link ext-link-type="uri" xlink:href="https://aws.amazon.com/sns/" xlink:type="simple">https://aws.amazon.com/sns/</ext-link>). (4) Schema not changed. Only new data are added. (4–1) The modified dataset is stored in the CLIPS temporary data storage table. (4–2) Protocol structure information is extracted from a temporary data table. (4–3) Semantic features are extracted from the texts of the data using the CLIPS text mining engine. (4–4) The result obtained in the previous step is stored in the CLIPS database, and the update is completed.</p>
<p>(EPS)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0238290.s008" mimetype="image/png" position="float" xlink:href="pone.0238290.s008.png" xlink:type="simple">
<label>S4 Fig</label>
<caption>
<title>CLIPS example of protocol structure retrieval by the selection of a categorical type element.</title>
<p>A user selects the type as the first element in the categorical type element and then chooses the intervention category. The second categorical type element is of primary purpose, and the diagnostic is decided among the nine categories included in the element.</p>
<p>(PNG)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>We would like to thank David Sharpe for his cooperation in using ChemSpider APIs and Seyol Yoon for helping us select phenotypic TUIs from UMLS.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pone.0238290.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Tay-Teo</surname> <given-names>K.</given-names></name>, <name><surname>Ilbawi</surname> <given-names>A.</given-names></name>, and <name><surname>Hill</surname> <given-names>S. R.</given-names></name>, "<article-title>Comparison of sales income and research and development costs for fda-approved cancer drugs sold by originator drug companies</article-title>," <source><italic>JAMA network open</italic></source>, vol. <volume>2</volume>, no. <issue>1</issue>, pp. <fpage>e186875</fpage>–<lpage>e186875</lpage>, <year>2019</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1001/jamanetworkopen.2018.6875" xlink:type="simple">10.1001/jamanetworkopen.2018.6875</ext-link></comment> <object-id pub-id-type="pmid">30644967</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref002"><label>2</label><mixed-citation publication-type="other" xlink:type="simple">G. V. Research. "Clinical Trials Market Size Worth $68.9 Billion By 2026 | CAGR: 5.7%." <ext-link ext-link-type="uri" xlink:href="https://www.grandviewresearch.com/press-release/clinical-trials-market" xlink:type="simple">https://www.grandviewresearch.com/press-release/clinical-trials-market</ext-link> (accessed 2019).</mixed-citation></ref>
<ref id="pone.0238290.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Chan</surname> <given-names>A.-W.</given-names></name> <etal>et al</etal>., "<article-title>SPIRIT 2013 statement: defining standard protocol items for clinical trials</article-title>," vol. <volume>158</volume>, no. <issue>3</issue>, pp. <fpage>200</fpage>–<lpage>207</lpage>, <year>2013</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.7326/0003-4819-158-3-201302050-00583" xlink:type="simple">10.7326/0003-4819-158-3-201302050-00583</ext-link></comment> <object-id pub-id-type="pmid">23295957</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Getz</surname> <given-names>K. A.</given-names></name>, <name><surname>Wenger</surname> <given-names>J.</given-names></name>, <name><surname>Campo</surname> <given-names>R. A.</given-names></name>, <name><surname>Seguine</surname> <given-names>E. S.</given-names></name>, and <name><surname>Kaitin</surname> <given-names>K. I. J. A. j. o. t.</given-names></name>, "<article-title>Assessing the impact of protocol design changes on clinical trial performance</article-title>," vol. <volume>15</volume>, no. <issue>5</issue>, pp. <fpage>450</fpage>–<lpage>457</lpage>, <year>2008</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1097/MJT.0b013e31816b9027" xlink:type="simple">10.1097/MJT.0b013e31816b9027</ext-link></comment> <object-id pub-id-type="pmid">18806521</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Claerhout</surname> <given-names>B.</given-names></name> <etal>et al</etal>., "<article-title>Federated electronic health records research technology to support clinical trial protocol optimization: Evidence from EHR4CR and the InSite platform</article-title>," <source><italic>Journal of Biomedical Informatics</italic></source>, vol. <volume>90</volume>, p. <fpage>103090</fpage>, 2019/02/01/ <year>2019</year>, <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jbi.2018.12.004" xlink:type="simple">10.1016/j.jbi.2018.12.004</ext-link></comment> <object-id pub-id-type="pmid">30611012</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Getz</surname> <given-names>K. A.</given-names></name>, <name><surname>Zuckerman</surname> <given-names>R.</given-names></name>, <name><surname>Cropp</surname> <given-names>A. B.</given-names></name>, <name><surname>Hindle</surname> <given-names>A. L.</given-names></name>, <name><surname>Krauss</surname> <given-names>R.</given-names></name>, and <name><surname>Kaitin</surname> <given-names>K. I. J. D. I. J.</given-names></name>, "<article-title>Measuring the incidence, causes, and repercussions of protocol amendments</article-title>," vol. <volume>45</volume>, no. <issue>3</issue>, pp. <fpage>265</fpage>–<lpage>275</lpage>, <year>2011</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Malikova</surname> <given-names>M. A.</given-names></name>, "<article-title>Optimization of protocol design: a path to efficient, lower cost clinical trial execution</article-title>," <source><italic>Future science OA</italic></source>, vol. <volume>2</volume>, no. <issue>1</issue>, <year>2016</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Getz</surname> <given-names>K. A.</given-names></name> <etal>et al</etal>., "<article-title>The impact of protocol amendments on clinical trial performance and cost</article-title>," vol. <volume>50</volume>, no. <issue>4</issue>, pp. <fpage>436</fpage>–<lpage>441</lpage>, <year>2016</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1177/2168479016632271" xlink:type="simple">10.1177/2168479016632271</ext-link></comment> <object-id pub-id-type="pmid">30227022</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Rodrigues</surname> <given-names>L. M. L.</given-names></name> <etal>et al</etal>., "<article-title>Towards a standardized protocol for conducting randomized clinical trial for software</article-title>," <source><italic>Procedia computer science</italic></source>, vol. <volume>138</volume>, pp. <fpage>125</fpage>–<lpage>130</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Park</surname> <given-names>Y. R.</given-names></name> <etal>et al</etal>., "<article-title>Utilization of a Clinical Trial Management System for the Whole Clinical Trial Process as an Integrated Database: System Development</article-title>," <source><italic>J Med Internet Res</italic></source>, vol. <volume>20</volume>, no. <issue>4</issue>, p. <fpage>e103</fpage>, <month>Apr</month> <day>24</day> <year>2018</year>, <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2196/jmir.9312" xlink:type="simple">10.2196/jmir.9312</ext-link></comment> <object-id pub-id-type="pmid">29691212</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Tetzlaff</surname> <given-names>J. M.</given-names></name>, <name><surname>Moher</surname> <given-names>D.</given-names></name>, and <name><surname>Chan</surname> <given-names>A. W.</given-names></name>, "<article-title>Developing a guideline for clinical trial protocol content: Delphi consensus survey</article-title>," <source><italic>Trials</italic></source>, vol. <volume>13</volume>, p. <fpage>176</fpage>, <month>Sep</month> <day>24</day> <year>2012</year>, <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1745-6215-13-176" xlink:type="simple">10.1186/1745-6215-13-176</ext-link></comment> <object-id pub-id-type="pmid">23006145</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref012"><label>12</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Silberschatz</surname> <given-names>A.</given-names></name>, <name><surname>Korth</surname> <given-names>H. F.</given-names></name>, and <name><surname>Sudarshan</surname> <given-names>S.</given-names></name>, <source><italic>Database system concepts</italic></source>. <publisher-name>McGraw-Hill</publisher-name> <publisher-loc>New York</publisher-loc>, <year>1997</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Cambridge</surname> <given-names>U.</given-names></name>, "<article-title>Introduction to information retrieval</article-title>," <year>2009</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Steeves</surname> <given-names>J.</given-names></name> <etal>et al</etal>., <article-title>"Guidelines for the conduct of clinical trials for spinal cord injury (SCI) as developed by the ICCP panel: clinical trial outcome measures,"</article-title> vol. <volume>45</volume>, no. <issue>3</issue>, p. <fpage>206</fpage>, <year>2007</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/sj.sc.3102008" xlink:type="simple">10.1038/sj.sc.3102008</ext-link></comment> <object-id pub-id-type="pmid">17179972</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Tsatsaronis</surname> <given-names>G.</given-names></name> <etal>et al</etal>., <source>"PONTE: a context-aware approach for automated clinical trial protocol design," in <italic>proceedings of the 6th International Workshop on Personalized Access</italic>, <italic>Profile Management</italic>, <italic>and Context Awareness in Databases in conjunction with VLDB</italic></source>, <year>2012</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Tasneem</surname> <given-names>A.</given-names></name> <etal>et al</etal>., <article-title>"The database for aggregate analysis of ClinicalTrials. gov (AACT) and subsequent regrouping by clinical specialty,"</article-title> vol. <volume>7</volume>, no. <issue>3</issue>, p. <fpage>e33677</fpage>, <year>2012</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pone.0033677" xlink:type="simple">10.1371/journal.pone.0033677</ext-link></comment> <object-id pub-id-type="pmid">22438982</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Griesenauer</surname> <given-names>R. H.</given-names></name>, <name><surname>Schillebeeckx</surname> <given-names>C.</given-names></name>, and <name><surname>Kinch</surname> <given-names>M. S.</given-names></name>, "<article-title>CDEK: Clinical Drug Experience Knowledgebase</article-title>," <source><italic>Database</italic></source>, vol. <volume>2019</volume>, <year>2019</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Zarin</surname> <given-names>D. A.</given-names></name> and <name><surname>Keselman</surname> <given-names>A. J. C.</given-names></name>, "<article-title>Registering a clinical trial in ClinicalTrials. gov</article-title>," vol. <volume>131</volume>, no. <issue>3</issue>, pp. <fpage>909</fpage>–<lpage>912</lpage>, <year>2007</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1378/chest.06-2450" xlink:type="simple">10.1378/chest.06-2450</ext-link></comment> <object-id pub-id-type="pmid">17303677</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref019"><label>19</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Canese</surname> <given-names>K.</given-names></name> and <name><surname>Weis</surname> <given-names>S.</given-names></name><chapter-title>, "PubMed: the bibliographic database,"</chapter-title> in <publisher-name>The NCBI Handbook</publisher-name> <italic>[Internet]</italic>. <edition designator="2">2nd edition</edition>: <publisher-name>National Center for Biotechnology Information (US)</publisher-name>, <year>2013</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref020"><label>20</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Tasneem</surname> <given-names>A.</given-names></name> <etal>et al</etal>., <article-title>"The database for aggregate analysis of ClinicalTrials. gov (AACT) and subsequent regrouping by clinical specialty,"</article-title> <source><italic>PloS one</italic></source>, vol. <volume>7</volume>, no. <issue>3</issue>, p. <fpage>e33677</fpage>, <year>2012</year>. [Online]. Available: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3306288/pdf/pone.0033677.pdf" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3306288/pdf/pone.0033677.pdf</ext-link>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pone.0033677" xlink:type="simple">10.1371/journal.pone.0033677</ext-link></comment> <object-id pub-id-type="pmid">22438982</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>De Gruttola</surname> <given-names>V. G.</given-names></name> <etal>et al</etal>., "<article-title>Considerations in the Evaluation of Surrogate Endpoints in Clinical Trials</article-title>," <source><italic>Controlled Clinical Trials</italic></source>, vol. <volume>22</volume>, no. <issue>5</issue>, pp. <fpage>485</fpage>–<lpage>502</lpage>, 2001/10/01/ <year>2001</year>, <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/s0197-2456%2801%2900153-2" xlink:type="simple">10.1016/s0197-2456(01)00153-2</ext-link></comment> <object-id pub-id-type="pmid">11578783</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref022"><label>22</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Neves</surname> <given-names>M. L.</given-names></name>, <name><surname>Carazo</surname> <given-names>J.-M.</given-names></name>, and <name><surname>Pascual-Montano</surname> <given-names>A.</given-names></name>, "<article-title>Moara: a Java library for extracting and normalizing gene and protein mentions</article-title>," <source><italic>BMC bioinformatics</italic></source>, vol. <volume>11</volume>, no. <issue>1</issue>, p. <fpage>157</fpage>, <year>2010</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref023"><label>23</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Pence</surname> <given-names>H. E.</given-names></name> and <name><surname>Williams</surname> <given-names>A.</given-names></name>, "<chapter-title>ChemSpider: an online chemical information resource</chapter-title>," ed: <publisher-name>ACS Publications</publisher-name>, <year>2010</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Savova</surname> <given-names>G. K.</given-names></name> <etal>et al</etal>., <article-title>"Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications,"</article-title> <source><italic>Journal of the American Medical Informatics Association</italic></source>, vol. <volume>17</volume>, no. <issue>5</issue>, pp. <fpage>507</fpage>–<lpage>513</lpage>, <year>2010</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/jamia.2009.001560" xlink:type="simple">10.1136/jamia.2009.001560</ext-link></comment> <object-id pub-id-type="pmid">20819853</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref025"><label>25</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Hulley</surname> <given-names>S. B.</given-names></name>, <chapter-title><italic>Designing clinical research</italic></chapter-title>. <publisher-name>Lippincott Williams &amp; Wilkins</publisher-name>, <year>2007</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Meeker-O’Connell</surname> <given-names>A.</given-names></name> <etal>et al</etal>., "<article-title>Enhancing clinical evidence by proactively building quality into clinical trials</article-title>," vol. <volume>13</volume>, no. <issue>4</issue>, pp. <fpage>439</fpage>–<lpage>444</lpage>, <year>2016</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1177/1740774516643491" xlink:type="simple">10.1177/1740774516643491</ext-link></comment> <object-id pub-id-type="pmid">27098014</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Aggett</surname> <given-names>P. J.</given-names></name> <etal>et al</etal>., "<article-title>Passclaim</article-title>," vol. <volume>44</volume>, pp. <fpage>i5</fpage>–<lpage>i30</lpage>, <year>2005</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s00394-005-1104-3" xlink:type="simple">10.1007/s00394-005-1104-3</ext-link></comment> <object-id pub-id-type="pmid">15933809</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Kok</surname> <given-names>F. J.</given-names></name> and <name><surname>Kromhout</surname> <given-names>D. J. E. j. o. n.</given-names></name>, "<source>Atherosclerosis</source>," vol. <volume>43</volume>, no. <issue>1</issue>, pp. <fpage>i2</fpage>–<lpage>i5</lpage>, <year>2004</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Mensink</surname> <given-names>R. P.</given-names></name> <etal>et al</etal>., "<article-title>PASSCLAIM–Diet-related cardiovascular disease</article-title>," vol. <volume>42</volume>, no. <issue>1</issue>, pp. <fpage>i6</fpage>–<lpage>i27</lpage>, <year>2003</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Cummings</surname> <given-names>J. H.</given-names></name> <etal>et al</etal>., "<article-title>PASSCLAIM 1—gut health and immunity</article-title>," vol. <volume>43</volume>, no. <issue>2</issue>, pp. <fpage>ii118</fpage>–<lpage>ii173</lpage>, <year>2004</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref031"><label>31</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Adams</surname> <given-names>A. S.</given-names></name>, <name><surname>Soumerai</surname> <given-names>S. B.</given-names></name>, <name><surname>Lomas</surname> <given-names>J.</given-names></name>, and <name><surname>Ross-Degnan</surname> <given-names>D. J. I. J. f. Q. i. H. C.</given-names></name>, "<article-title>Evidence of self-report bias in assessing adherence to guidelines</article-title>," vol. <volume>11</volume>, no. <issue>3</issue>, pp. <fpage>187</fpage>–<lpage>192</lpage>, <year>1999</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/intqhc/11.3.187" xlink:type="simple">10.1093/intqhc/11.3.187</ext-link></comment> <object-id pub-id-type="pmid">10435838</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref032"><label>32</label><mixed-citation publication-type="other" xlink:type="simple">Google, "Google scholar," 2019. [Online]. Available: <ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/" xlink:type="simple">https://scholar.google.com/</ext-link>.</mixed-citation></ref>
<ref id="pone.0238290.ref033"><label>33</label><mixed-citation publication-type="other" xlink:type="simple">P.-. NCBI, "National Center for Biotechnology Information," 2019. [Online]. Available: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/</ext-link>.</mixed-citation></ref>
<ref id="pone.0238290.ref034"><label>34</label><mixed-citation publication-type="other" xlink:type="simple">Clarivate, "Web of Science," 2019. [Online]. Available: <ext-link ext-link-type="uri" xlink:href="https://apps.webofknowledge.com" xlink:type="simple">https://apps.webofknowledge.com</ext-link>.</mixed-citation></ref>
<ref id="pone.0238290.ref035"><label>35</label><mixed-citation publication-type="other" xlink:type="simple">Elsevier, "Scopus," 2019. [Online]. Available: <ext-link ext-link-type="uri" xlink:href="https://www.scopus.com/search/form.uri" xlink:type="simple">https://www.scopus.com/search/form.uri</ext-link>.</mixed-citation></ref>
<ref id="pone.0238290.ref036"><label>36</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>DeCandia</surname> <given-names>G.</given-names></name> <etal>et al</etal>., "<article-title>Dynamo: amazon's highly available key-value store</article-title>," in <source><italic>ACM SIGOPS operating systems review</italic></source>, <year>2007</year>, vol. <volume>41</volume>, no. <issue>6</issue>: ACM, pp. <fpage>205</fpage>–<lpage>220</lpage>.</mixed-citation></ref>
<ref id="pone.0238290.ref037"><label>37</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Bodenreider</surname> <given-names>O. J. N. a. r.</given-names></name>, <article-title>"The unified medical language system (UMLS): integrating biomedical terminology,"</article-title> vol. <volume>32</volume>, no. <issue>suppl_1</issue>, pp. <fpage>D267</fpage>–<lpage>D270</lpage>, <year>2004</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref038"><label>38</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Krallinger</surname> <given-names>M.</given-names></name>, <name><surname>Leitner</surname> <given-names>F.</given-names></name>, <name><surname>Rabal</surname> <given-names>O.</given-names></name>, <name><surname>Vazquez</surname> <given-names>M.</given-names></name>, <name><surname>Oyarzabal</surname> <given-names>J.</given-names></name>, and <name><surname>Valencia</surname> <given-names>A. J. J. o. c.</given-names></name>, "<article-title>CHEMDNER: The drugs and chemical names extraction challenge</article-title>," vol. <volume>7</volume>, no. <issue>1</issue>, p. <fpage>S1</fpage>, <year>2015</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref039"><label>39</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Li</surname> <given-names>J.</given-names></name> <etal>et al</etal>., "<article-title>DOSim: an R package for similarity between diseases based on disease ontology</article-title>," vol. <volume>12</volume>, no. <issue>1</issue>, p. <fpage>266</fpage>, <year>2011</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref040"><label>40</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Mathur</surname> <given-names>S.</given-names></name> and <name><surname>Dinakarpandian</surname> <given-names>D. J. S. o. T. B.</given-names></name>, "<article-title>Automated ontological gene annotation for computing disease similarity</article-title>," vol. <volume>2010</volume>, p. <fpage>12</fpage>, <year>2010</year>. <object-id pub-id-type="pmid">21347137</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref041"><label>41</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Savova</surname> <given-names>G. K.</given-names></name> <etal>et al</etal>., <article-title>"Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications,"</article-title> vol. <volume>17</volume>, no. <issue>5</issue>, pp. <fpage>507</fpage>–<lpage>513</lpage>, <year>2010</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/jamia.2009.001560" xlink:type="simple">10.1136/jamia.2009.001560</ext-link></comment> <object-id pub-id-type="pmid">20819853</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref042"><label>42</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>De Gruttola</surname> <given-names>V. G.</given-names></name> <etal>et al</etal>., "<article-title>Considerations in the evaluation of surrogate endpoints in clinical trials: summary of a National Institutes of Health workshop</article-title>," vol. <volume>22</volume>, no. <issue>5</issue>, pp. <fpage>485</fpage>–<lpage>502</lpage>, <year>2001</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/s0197-2456%2801%2900153-2" xlink:type="simple">10.1016/s0197-2456(01)00153-2</ext-link></comment> <object-id pub-id-type="pmid">11578783</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref043"><label>43</label><mixed-citation publication-type="other" xlink:type="simple">Y. Xia et al., "Combining MetaMap and cTAKES in Disorder Recognition: THCIB at CLEF eHealth Lab 2013 Task 1," in <italic>CLEF (Working Notes)</italic>, 2013.</mixed-citation></ref>
<ref id="pone.0238290.ref044"><label>44</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Rocktäschel</surname> <given-names>T.</given-names></name>, <name><surname>Weidlich</surname> <given-names>M.</given-names></name>, and <name><surname>Leser</surname> <given-names>U. J. B.</given-names></name>, "<article-title>ChemSpot: a hybrid system for chemical named entity recognition</article-title>," vol. <volume>28</volume>, no. <issue>12</issue>, pp. <fpage>1633</fpage>–<lpage>1640</lpage>, <year>2012</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/bts183" xlink:type="simple">10.1093/bioinformatics/bts183</ext-link></comment> <object-id pub-id-type="pmid">22500000</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref045"><label>45</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Heller</surname> <given-names>S.</given-names></name>, <name><surname>McNaught</surname> <given-names>A.</given-names></name>, <name><surname>Stein</surname> <given-names>S.</given-names></name>, <name><surname>Tchekhovskoi</surname> <given-names>D.</given-names></name>, and <name><surname>Pletnev</surname> <given-names>I. J. J. o. c.</given-names></name>, "<article-title>InChI-the worldwide chemical structure identifier standard</article-title>," vol. <volume>5</volume>, no. <issue>1</issue>, p. <fpage>7</fpage>, <year>2013</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1758-2946-5-7" xlink:type="simple">10.1186/1758-2946-5-7</ext-link></comment> <object-id pub-id-type="pmid">23343401</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref046"><label>46</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Chambers</surname> <given-names>J.</given-names></name> <etal>et al</etal>., "<article-title>UniChem: a unified chemical structure cross-referencing and identifier tracking system</article-title>," vol. <volume>5</volume>, no. <issue>1</issue>, p. <fpage>3</fpage>, <year>2013</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1758-2946-5-3" xlink:type="simple">10.1186/1758-2946-5-3</ext-link></comment> <object-id pub-id-type="pmid">23317286</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref047"><label>47</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Neves</surname> <given-names>M. L.</given-names></name>, <name><surname>Carazo</surname> <given-names>J.-M.</given-names></name>, and <name><surname>Pascual-Montano</surname> <given-names>A. J. B. b.</given-names></name>, "<article-title>Moara: a Java library for extracting and normalizing gene and protein mentions</article-title>," vol. <volume>11</volume>, no. <issue>1</issue>, p. <fpage>157</fpage>, <year>2010</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref048"><label>48</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Getz</surname> <given-names>K. A.</given-names></name> and <name><surname>Campo</surname> <given-names>R. A.</given-names></name>, "<article-title>Trial watch: trends in clinical trial design complexity</article-title>," ed: <source>Nature Publishing Group</source>, <year>2017</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref049"><label>49</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Getz</surname> <given-names>K. A.</given-names></name>, <name><surname>Kim</surname> <given-names>J.</given-names></name>, <name><surname>Stergiopoulos</surname> <given-names>S.</given-names></name>, <name><surname>Kaitin</surname> <given-names>K. I. J. T. i.</given-names></name>, and <article-title>r. science, "New governance mechanisms to optimize protocol design,"</article-title> vol. <volume>47</volume>, no. <issue>6</issue>, pp. <fpage>651</fpage>–<lpage>655</lpage>, <year>2013</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref050"><label>50</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Tian</surname> <given-names>Y.</given-names></name> and <name><surname>Patel</surname> <given-names>J. M.</given-names></name>, "<chapter-title>Tale: A tool for approximate large graph matching</chapter-title>," in <source><italic>2008 IEEE 24th International Conference on Data Engineering</italic></source>, <year>2008</year>: <publisher-name>IEEE</publisher-name>, pp. <fpage>963</fpage>–<lpage>972</lpage>.</mixed-citation></ref>
<ref id="pone.0238290.ref051"><label>51</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Tong</surname> <given-names>H.</given-names></name>, <name><surname>Faloutsos</surname> <given-names>C.</given-names></name>, <name><surname>Faloutsos</surname> <given-names>C.</given-names></name>, <name><surname>Gallagher</surname> <given-names>B.</given-names></name>, and <name><surname>Eliassi-Rad</surname> <given-names>T.</given-names></name>, "<chapter-title>Fast best-effort pattern matching in large attributed graphs</chapter-title>," in <source><italic>Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining</italic></source>, <year>2007</year>: <publisher-name>ACM</publisher-name>, pp. <fpage>737</fpage>–<lpage>746</lpage>.</mixed-citation></ref>
<ref id="pone.0238290.ref052"><label>52</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Tilkov</surname> <given-names>S.</given-names></name> and <name><surname>Vinoski</surname> <given-names>S. J. I. I. C.</given-names></name>, "<article-title>Node. js: Using JavaScript to build high-performance network programs</article-title>," vol. <volume>14</volume>, no. <issue>6</issue>, pp. <fpage>80</fpage>–<lpage>83</lpage>, <year>2010</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref053"><label>53</label><mixed-citation publication-type="journal" xlink:type="simple"><source>M. J. U. h. d. j. o. Bostock</source>, "<article-title>D3. js-data-driven documents (2016)</article-title>," <year>2016</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref054"><label>54</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Hausler</surname> <given-names>J.</given-names></name>, "<article-title>4 Major Patterns for Accessible Drag and Drop</article-title>," <year>2018</year>. [Online]. Available: <ext-link ext-link-type="uri" xlink:href="https://medium.com/salesforce-ux/4-major-patterns-for-accessible-drag-and-drop-1d43f64ebf09" xlink:type="simple">https://medium.com/salesforce-ux/4-major-patterns-for-accessible-drag-and-drop-1d43f64ebf09</ext-link>.</mixed-citation></ref>
<ref id="pone.0238290.ref055"><label>55</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Andrews</surname> <given-names>S.</given-names></name> and <name><surname>Hirsch</surname> <given-names>L.</given-names></name>, "<chapter-title>A tool for creating and visualising formal concept trees</chapter-title>," in <source><italic>CEUR Workshop Proceedings</italic></source>, <year>2016</year>, vol. <volume>1637</volume>: <publisher-name>Tilburg University</publisher-name>, pp. <fpage>1</fpage>–<lpage>9</lpage>.</mixed-citation></ref>
<ref id="pone.0238290.ref056"><label>56</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Morisset</surname> <given-names>C.</given-names></name> and <name><surname>Sanchez</surname> <given-names>D.</given-names></name>, "<source>VisABAC: A Tool for Visualising ABAC Policies</source>," in <publisher-name>ICISSP</publisher-name>, <year>2018</year>, pp. <fpage>117</fpage>–<lpage>126</lpage>.</mixed-citation></ref>
<ref id="pone.0238290.ref057"><label>57</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Maarek</surname> <given-names>Y. S.</given-names></name>, <name><surname>Vortman</surname> <given-names>P.</given-names></name>, and <name><surname>Wecker</surname> <given-names>A. J.</given-names></name>, "<article-title>Interactive, tree structured, graphical visualization aid</article-title>," ed: <source>Google Patents</source>, <year>1999</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref058"><label>58</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Chow</surname> <given-names>S.-C.</given-names></name> and <name><surname>Liu</surname> <given-names>J.-p.</given-names></name>, <chapter-title><italic>Design and analysis of clinical trials: concepts and methodologies</italic></chapter-title>. <publisher-name>John Wiley &amp; Sons</publisher-name>, <year>2008</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref059"><label>59</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Storer</surname> <given-names>B. E. J. B.</given-names></name>, "<article-title>Design and analysis of phase I clinical trials</article-title>," vol. <volume>45</volume>, no. <issue>3</issue>, pp. <fpage>925</fpage>–<lpage>937</lpage>, <year>1989</year>. <object-id pub-id-type="pmid">2790129</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref060"><label>60</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Pocock</surname> <given-names>S. J. J. B. M. J.</given-names></name>, "<article-title>Current issues in the design and interpretation of clinical trials</article-title>," vol. <volume>290</volume>, no. <issue>6461</issue>, pp. <fpage>39</fpage>–<lpage>42</lpage>, <year>1985</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/bmj.290.6461.39" xlink:type="simple">10.1136/bmj.290.6461.39</ext-link></comment> <object-id pub-id-type="pmid">3917328</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref061"><label>61</label><mixed-citation publication-type="other" xlink:type="simple">U. S. N. L. o. Medicine, "Clinical Trial Registry Numbers in MEDLINE/PubMed Records," 2019. [Online]. Available: <ext-link ext-link-type="uri" xlink:href="https://www.nlm.nih.gov/bsd/policy/clin_trials.html" xlink:type="simple">https://www.nlm.nih.gov/bsd/policy/clin_trials.html</ext-link>.</mixed-citation></ref>
<ref id="pone.0238290.ref062"><label>62</label><mixed-citation publication-type="book" xlink:type="simple"><name><surname>Goutte</surname> <given-names>C.</given-names></name> and <name><surname>Gaussier</surname> <given-names>E.</given-names></name>, "<chapter-title>A probabilistic interpretation of precision, recall and F-score, with implication for evaluation</chapter-title>," in <source><italic>European Conference on Information Retrieval</italic></source>, <year>2005</year>: <publisher-name>Springer</publisher-name>, pp. <fpage>345</fpage>–<lpage>359</lpage>.</mixed-citation></ref>
<ref id="pone.0238290.ref063"><label>63</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Likert</surname> <given-names>R. J. A. o. p.</given-names></name>, "<article-title>A technique for the measurement of attitudes</article-title>," <year>1932</year>.</mixed-citation></ref>
<ref id="pone.0238290.ref064"><label>64</label><mixed-citation publication-type="journal" xlink:type="simple">I. H. T. J. J. P. M. Guideline, "<article-title>Guideline for good clinical practice</article-title>," vol. <volume>47</volume>, no. <issue>3</issue>, pp. <fpage>199</fpage>–<lpage>203</lpage>, <year>2001</year>. <object-id pub-id-type="pmid">11832625</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref065"><label>65</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Brown</surname> <given-names>E. G.</given-names></name>, <name><surname>Wood</surname> <given-names>L.</given-names></name>, and <name><surname>Wood</surname> <given-names>S. J. D. s.</given-names></name>, "<article-title>The medical dictionary for regulatory activities (MedDRA)</article-title>," vol. <volume>20</volume>, no. <issue>2</issue>, pp. <fpage>109</fpage>–<lpage>117</lpage>, <year>1999</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2165/00002018-199920020-00002" xlink:type="simple">10.2165/00002018-199920020-00002</ext-link></comment> <object-id pub-id-type="pmid">10082069</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref066"><label>66</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Fung</surname> <given-names>K. W.</given-names></name>, <name><surname>Hole</surname> <given-names>W. T.</given-names></name>, <name><surname>Nelson</surname> <given-names>S. J.</given-names></name>, <name><surname>Srinivasan</surname> <given-names>S.</given-names></name>, <name><surname>Powell</surname> <given-names>T.</given-names></name>, and <name><surname>Roth</surname> <given-names>L. J. J. o. t. A. M. I. A.</given-names></name>, "<article-title>Integrating SNOMED CT into the UMLS: an exploration of different views of synonymy and quality of editing</article-title>," vol. <volume>12</volume>, no. <issue>4</issue>, pp. <fpage>486</fpage>–<lpage>494</lpage>, <year>2005</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1197/jamia.M1767" xlink:type="simple">10.1197/jamia.M1767</ext-link></comment> <object-id pub-id-type="pmid">15802483</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref067"><label>67</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Robinson</surname> <given-names>P. N.</given-names></name>, <name><surname>Köhler</surname> <given-names>S.</given-names></name>, <name><surname>Bauer</surname> <given-names>S.</given-names></name>, <name><surname>Seelow</surname> <given-names>D.</given-names></name>, <name><surname>Horn</surname> <given-names>D.</given-names></name>, and <name><surname>Mundlos</surname> <given-names>S. J. T. A. J. o. H. G.</given-names></name>, "<article-title>The Human Phenotype Ontology: a tool for annotating and analyzing human hereditary disease</article-title>," vol. <volume>83</volume>, no. <issue>5</issue>, pp. <fpage>610</fpage>–<lpage>615</lpage>, <year>2008</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.ajhg.2008.09.017" xlink:type="simple">10.1016/j.ajhg.2008.09.017</ext-link></comment> <object-id pub-id-type="pmid">18950739</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref068"><label>68</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Schirle</surname> <given-names>M.</given-names></name> and <name><surname>Jenkins</surname> <given-names>J. L. J. D. d. t.</given-names></name>, "<article-title>Identifying compound efficacy targets in phenotypic drug discovery</article-title>," vol. <volume>21</volume>, no. <issue>1</issue>, pp. <fpage>82</fpage>–<lpage>89</lpage>, <year>2016</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.drudis.2015.08.001" xlink:type="simple">10.1016/j.drudis.2015.08.001</ext-link></comment> <object-id pub-id-type="pmid">26272035</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref069"><label>69</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Iskar</surname> <given-names>M.</given-names></name>, <name><surname>Zeller</surname> <given-names>G.</given-names></name>, <name><surname>Zhao</surname> <given-names>X.-M.</given-names></name>, <name><surname>van Noort</surname> <given-names>V.</given-names></name>, and <name><surname>Bork</surname> <given-names>P. J. C. o. i. b.</given-names></name>, "<article-title>Drug discovery in the age of systems biology: the rise of computational approaches for data integration</article-title>," vol. <volume>23</volume>, no. <issue>4</issue>, pp. <fpage>609</fpage>–<lpage>616</lpage>, <year>2012</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.copbio.2011.11.010" xlink:type="simple">10.1016/j.copbio.2011.11.010</ext-link></comment> <object-id pub-id-type="pmid">22153034</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref070"><label>70</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Cheng</surname> <given-names>F.</given-names></name>, <name><surname>Kovács</surname> <given-names>I. A.</given-names></name>, and <name><surname>Barabási</surname> <given-names>A.-L.</given-names></name>, "<article-title>Network-based prediction of drug combinations</article-title>," <source><italic>Nature Communications</italic></source>, vol. <volume>10</volume>, no. <issue>1</issue>, p. <fpage>1197</fpage>, 2019/03/13 <year>2019</year>, <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41467-019-09186-x" xlink:type="simple">10.1038/s41467-019-09186-x</ext-link></comment> <object-id pub-id-type="pmid">30867426</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref071"><label>71</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Guney</surname> <given-names>E.</given-names></name>, <name><surname>Menche</surname> <given-names>J.</given-names></name>, <name><surname>Vidal</surname> <given-names>M.</given-names></name>, and <name><surname>Barábasi</surname> <given-names>A.-L. J. N. c.</given-names></name>, "<article-title>Network-based in silico drug efficacy screening</article-title>," vol. <volume>7</volume>, p. <fpage>10331</fpage>, <year>2016</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/ncomms10331" xlink:type="simple">10.1038/ncomms10331</ext-link></comment> <object-id pub-id-type="pmid">26831545</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref072"><label>72</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Hopkins</surname> <given-names>A. L. J. N. c. b.</given-names></name>, "<article-title>Network pharmacology: the next paradigm in drug discovery</article-title>," vol. <volume>4</volume>, no. <issue>11</issue>, p. <fpage>682</fpage>, <year>2008</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nchembio.118" xlink:type="simple">10.1038/nchembio.118</ext-link></comment> <object-id pub-id-type="pmid">18936753</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref073"><label>73</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>van Hasselt</surname> <given-names>J. C.</given-names></name> and <name><surname>Iyengar</surname> <given-names>R.</given-names></name>, "<article-title>Systems pharmacology: defining the interactions of drug combinations</article-title>," <source><italic>Annual review of pharmacology and toxicology</italic></source>, vol. <volume>59</volume>, pp. <fpage>21</fpage>–<lpage>40</lpage>, <year>2019</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1146/annurev-pharmtox-010818-021511" xlink:type="simple">10.1146/annurev-pharmtox-010818-021511</ext-link></comment> <object-id pub-id-type="pmid">30260737</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref074"><label>74</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>Park</surname> <given-names>J.</given-names></name>, <name><surname>Kim</surname> <given-names>K.</given-names></name>, <name><surname>Hwang</surname> <given-names>W.</given-names></name>, and <name><surname>Lee</surname> <given-names>D.</given-names></name>, "<article-title>Concept embedding to measure semantic relatedness for biomedical information ontologies</article-title>," <source><italic>Journal of biomedical informatics</italic></source>, vol. <volume>94</volume>, p. <fpage>103182</fpage>, <year>2019</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jbi.2019.103182" xlink:type="simple">10.1016/j.jbi.2019.103182</ext-link></comment> <object-id pub-id-type="pmid">31009761</object-id></mixed-citation></ref>
<ref id="pone.0238290.ref075"><label>75</label><mixed-citation publication-type="journal" xlink:type="simple"><name><surname>McFee</surname> <given-names>B.</given-names></name> and <name><surname>Lanckriet</surname> <given-names>G. R.</given-names></name>, "<article-title>Metric learning to rank</article-title>," in <source><italic>Proceedings of the 27th International Conference on Machine Learning (ICML-10)</italic></source>, <year>2010</year>, pp. <fpage>775</fpage>–<lpage>782</lpage>.</mixed-citation></ref>
</ref-list>
</back>
</article>