<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC " -//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "JATS-journalpublishing1.dtd">

<!--

File produced by pipelineRunner package (for JATS 2 SCJATS with pipeline SCJATS)
At: 2024-12-11T14:17:11.726Z

Version        : 1.16.1
Last update    : 2024-08-27
Modified by    : dunnm

-->
<article  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  article-type="research-article" xml:lang="en">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">rasti</journal-id>
      <journal-title-group>
        <journal-title>RAS Techniques and Instruments</journal-title>
        <abbrev-journal-title abbrev-type="pubmed">RAS Tech. Instrum.</abbrev-journal-title>
        <abbrev-journal-title abbrev-type="publisher">RASTI</abbrev-journal-title>
      </journal-title-group>
      <issn pub-type="epub">2752-8200</issn>
      <publisher>
        <publisher-name>Oxford University Press</publisher-name>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.1093/rasti/rzae025</article-id>
      <article-id pub-id-type="publisher-id">rzae025</article-id>
      <article-categories>
        <subj-group subj-group-type="category-toc-heading">
          <subject>Article</subject>
          <subj-group subj-group-type="category-toc-heading">
            <subject>Instrumentation, Detectors and Telescopes</subject>
          </subj-group>
        </subj-group>
        <subj-group subj-group-type="category-taxonomy-collection">
          <subject>rasti/6</subject>
        </subj-group>
        <subj-group subj-group-type="category-taxonomy-collection">
          <subject>AcademicSubjects/SCI00010</subject>
          <subject>AcademicSubjects/SCI01970</subject>
          <subject>AcademicSubjects/SCI00020</subject>
          <subject>AcademicSubjects/SCI02275</subject>
          <subject>AcademicSubjects/SCI00380</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Enhanced Bayesian RFI mitigation and transient flagging using likelihood reweighting</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" corresp="yes">
          <contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-1742-7417</contrib-id>
          <name>
            <surname>Anstey</surname>
            <given-names>Dominic</given-names>
          </name>
          <email xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple">da401@cam.ac.uk</email>
          <aff>
            <institution>Astrophysics Group, Cavendish Laboratory</institution>, <addr-line>J. J. Thomson Avenue, Cambridge CB3 0HE</addr-line>, <country country="GB">UK</country></aff>
          <aff>
            <institution>Kavli Institute for Cosmology</institution>, <addr-line>Madingley Road, Cambridge CB3 0HA</addr-line>, <country country="GB">UK</country></aff>
          <xref ref-type="corresp" rid="cor1" />
        </contrib>
        <contrib contrib-type="author">
          <contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-4366-1119</contrib-id>
          <name>
            <surname>Leeney</surname>
            <given-names>Samuel A K</given-names>
          </name>
          <aff>
            <institution>Astrophysics Group, Cavendish Laboratory</institution>, <addr-line>J. J. Thomson Avenue, Cambridge CB3 0HE</addr-line>, <country country="GB">UK</country></aff>
          <aff>
            <institution>Kavli Institute for Cosmology</institution>, <addr-line>Madingley Road, Cambridge CB3 0HA</addr-line>, <country country="GB">UK</country></aff>
        </contrib>
      </contrib-group>
      <author-notes>
        <corresp id="cor1">E-mail: <email xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple">da401@cam.ac.uk</email></corresp>
      </author-notes>
      <pub-date pub-type="cover">
        <month>January</month>
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="collection" iso-8601-date="2024-01-05">
        <day>05</day>
        <month>01</month>
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub" iso-8601-date="2024-07-05">
        <day>05</day>
        <month>07</month>
        <year>2024</year>
      </pub-date>
      <volume>3</volume>
      <issue>1</issue>
      <fpage>372</fpage>
      <lpage>384</lpage>
      <supplementary-material xmlns:xlink="http://www.w3.org/1999/xlink" id="sup1" content-type="data-supplement" xlink:href="rzae025_supplemental_files.zip" mimetype="text">
        <label>rzae025_Supplemental_Files</label>
      </supplementary-material>
      <history>
        <date date-type="received">
          <day>12</day>
          <month>10</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>21</day>
          <month>05</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>03</day>
          <month>07</month>
          <year>2024</year>
        </date>
        <date date-type="corrected-typeset">
          <day>30</day>
          <month>07</month>
          <year>2024</year>
        </date>
      </history>
      <permissions>
        <copyright-statement>© 2024 The Author(s). Published by Oxford University Press on behalf of Royal Astronomical Society.</copyright-statement>
        <copyright-year>2024</copyright-year>
        <license xmlns:xlink="http://www.w3.org/1999/xlink" license-type="cc-by" xlink:href="https://creativecommons.org/licenses/by/4.0/">
          <license-p>This is an Open Access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
        </license>
      </permissions>
      <self-uri xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025.pdf" />
      <abstract abstract-type="abstract">
        <title>Abstract</title>
        <p>Contamination by radio frequency interference (RFI) is a ubiquitous challenge for radio astronomy. In particular, transient RFI is difficult to detect and avoid, especially in large data sets with many time bins. In this work, we present a Bayesian methodology for time-dependent, transient anomaly mitigation performed jointly with model fitting. The computation time for correcting transient anomalies in this manner in time-separated data sets grows proportionally with the number of time bins. We demonstrate that utilizing likelihood reweighting can allow our Bayesian anomaly mitigation method to be performed with a computation time close to independent of the number of time bins. In particular, we identify a factor of 44 improvement in computation time for a test case with 2000 time bins. We also demonstrate how this method enables the flagging threshold to be fit as a free parameter, fully automating the mitigation process. We find that this threshold fitting also prevents overcorrecting of the data in the case of wide priors. Finally, we investigate the potential of the methodology as a transient detector. We demonstrate that the method is able to reliably flag an individual anomalous data point out of 302 000 provided the Signal to Noise Ratio is <inline-formula><tex-math id="TM0001" notation="LaTeX"><![CDATA[$\ge\!{10}$]]></tex-math></inline-formula>.</p>
      </abstract>
      <kwd-group>
        <kwd>Data Methods</kwd>
        <kwd>Bayesian</kwd>
        <kwd>Transients</kwd>
      </kwd-group>
      <funding-group>
        <award-group award-type="grant">
          <funding-source>
            <institution-wrap>
              <institution>Science and Technology Facilities Council</institution>
              <institution-id institution-id-type="DOI">10.13039/501100000271</institution-id>
            </institution-wrap>
          </funding-source>
        </award-group>
        <award-group award-type="grant">
          <funding-source>
            <institution-wrap>
              <institution>European Research Council</institution>
              <institution-id institution-id-type="DOI">10.13039/100010663</institution-id>
            </institution-wrap>
          </funding-source>
        </award-group>
        <award-group award-type="grant">
          <funding-source>
            <institution-wrap>
              <institution>UKRI</institution>
              <institution-id institution-id-type="DOI">10.13039/100014013</institution-id>
            </institution-wrap>
          </funding-source>
        </award-group>
      </funding-group>
      <counts>
        <page-count count="13" />
      </counts>
    </article-meta>
  </front>
  <body>
    <sec id="sec1" sec-type="intro">
      <label>1.</label>
      <title>INTRODUCTION</title>
      <p>The field of radio astronomy has been rapidly growing in terms of both reach and data complexity. The number of known radio sources has been exponentially increasing, and will increase further in the future with the development of the Square Kilometre Array (SKA) (Braun et al. <xref ref-type="bibr" rid="bib6">2015</xref>) and the next-generation Very Large Array (ngVLA) (McKinnon et al. <xref ref-type="bibr" rid="bib18">2019</xref>). As the volume of information on the sky increases, astronomers also seek increasingly faint signals requiring more sensitive instruments and advanced data analysis techniques.</p>
      <p>With the development of modern telecommunications devices, data from radio telescopes are becoming increasingly contaminated with interfering, anomalous signals such as radio frequency interference (RFI; Arrubarrena et al. <xref ref-type="bibr" rid="bib3">2024</xref>), which has become very difficult to avoid entirely (Fridman &amp; Baan <xref ref-type="bibr" rid="bib13">2001</xref>; Pritchard et al. <xref ref-type="bibr" rid="bib25">2024</xref>), except in observations from extremely remote locations (Monsalve et al. <xref ref-type="bibr" rid="bib20">2024</xref>). This problem is worsened by the fact that current signals of interest, such as the Global 21-cm signals (Bowman et al. <xref ref-type="bibr" rid="bib5">2018</xref>; de Lera Acedo et al. <xref ref-type="bibr" rid="bib10">2022</xref>; Singh et al. <xref ref-type="bibr" rid="bib31">2022</xref>; Razavi-Ghods et al. <xref ref-type="bibr" rid="bib26">2023</xref>; Monsalve et al. <xref ref-type="bibr" rid="bib20">2024</xref>), lie in the same unprotected frequency bands as said devices.</p>
      <p>RFI can emanate from a range of human-made sources, such as communication devices, satellites, and radar systems. It poses a significant challenge for radio astronomy, as it can obscure or mimic genuine celestial signals. RFI can be constant in time or transient (Czech, Mishra &amp; Inggs <xref ref-type="bibr" rid="bib8">2018a</xref>). Transient RFI is particularly problematic because it is hard to detect and thus much more difficult to avoid. The SKA will gather up to 1 TB of data per second (Scaife <xref ref-type="bibr" rid="bib29">2020</xref>). With the volume of data to be analysed so large and the level of complex contaminants so high, there is a serious need for new data analysis techniques that are highly efficient and sensitive to such transient events.</p>
      <p>There are various proposed ways to mitigate RFI. Offringa et al. (<xref ref-type="bibr" rid="bib23">2010</xref>) use post-correlation classification methods in <sc>aoflagger</sc>, which is used by the LOFAR (Röttgering <xref ref-type="bibr" rid="bib28">2003</xref>). The <italic>FAST</italic> (Nan <xref ref-type="bibr" rid="bib21">2006</xref>) telescope uses spatial filtering techniques (Wang et al. <xref ref-type="bibr" rid="bib33">2022</xref>). More recently, deep learning methods have been utilized (Kerrigan et al. <xref ref-type="bibr" rid="bib16">2019</xref>), latent nearest neighbours used to distinguish RFI by learning uncontaminated data (Mesarcik et al. <xref ref-type="bibr" rid="bib19">2022</xref>), and Bayesian methods used to calibrate satellite RFI based on trajectories (Finlay et al. <xref ref-type="bibr" rid="bib11">2023</xref>). For a more in-depth review of the current literature, we recommend Ford &amp; Buch (<xref ref-type="bibr" rid="bib12">2014</xref>) or Baan (<xref ref-type="bibr" rid="bib4">2019</xref>). There are few methods designed specifically for transient RFI detection, as noted by Czech, Mishra &amp; Inggs (<xref ref-type="bibr" rid="bib9">2018b</xref>), who propose a dictionary-based approach to transient RFI detection.</p>
      <p>Transient RFI is exceptionally problematic when the signal of interest is itself transient. For example, a transient RFI burst could not only obscure a signal [such as a fast radio burst (FRB)] but also mimic it leading to a false detection (Cendes et al. <xref ref-type="bibr" rid="bib7">2018</xref>). A satellite passing over the telescope e.g. could lead to such a transient anomaly (Finlay et al. <xref ref-type="bibr" rid="bib11">2023</xref>). This problem can be partially addressed using spectral kurtosis (Nita, Keimpema &amp; Paragi <xref ref-type="bibr" rid="bib22">2019</xref>). However, spectral kurtosis is inadequate in various cases as described in Smith, Lynch &amp; Pisano (<xref ref-type="bibr" rid="bib32">2022</xref>). Furthermore, with many modern projects utilizing Bayesian methods in their data analysis pipelines, there is an urgent need for Bayesian RFI mitigation techniques.</p>
      <p>In this paper, we present a novel Bayesian anomaly detection methodology that is efficient and sensitive to transient anomalies as well as time constant anomalies. In Section <xref ref-type="sec" rid="sec2">2</xref>, we define the method. In Section <xref ref-type="sec" rid="sec3">3</xref>, we test our methods on a simple toy model. In Section <xref ref-type="sec" rid="sec4">4</xref>, we evaluate these methods when used to locate transient signals themselves, as well as mitigate against them. Finally, in Section <xref ref-type="sec" rid="sec5">5</xref>, we present our conclusions.</p>
    </sec>
    <sec id="sec2" sec-type="materials|methods">
      <label>2.</label>
      <title>METHODS</title>
      <sec id="sec2-1">
        <label>2.1</label>
        <title>Bayesian anomaly mitigation</title>
        <p>Leeney, Handley &amp; Acedo (<xref ref-type="bibr" rid="bib17">2023</xref>) proposed a fully Bayesian methodology for simultaneous anomaly flagging and excision, which is performed jointly with the primary model fit to the data, and can be readily folded into Bayesian analysis pipelines via a simple modification to their likelihood. The term <italic>likelihood</italic>, in the context of a single data point, defines the probability of observing that data point <inline-formula><tex-math id="TM0002" notation="LaTeX"><![CDATA[$\mathcal {D}_i$]]></tex-math></inline-formula> given some model and its constituent parameters <inline-formula><tex-math id="TM0003" notation="LaTeX"><![CDATA[$\mathcal {M}_i\left(\theta \right)$]]></tex-math></inline-formula>. This assumes a predefined probability distribution for the noise. For instance, assuming uncorrelated Gaussian noise results in a likelihood of the form</p>
        <disp-formula id="update1720681453889">
          <label>(1)</label>
          <tex-math id="TM0004" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\log \mathcal {L}_i\left(\theta \right) = -\frac{1}{2}\log \left(2\pi \sigma _\mathrm{n}^{2}\right) - \frac{1}{2}\left(\frac{\mathcal {D}_i - \mathcal {M}_i\left(\theta \right) }{\sigma _\mathrm{n}}\right)^{2},
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>where <inline-formula><tex-math id="TM0005" notation="LaTeX"><![CDATA[$\theta$]]></tex-math></inline-formula> is the parameter(s) and <inline-formula><tex-math id="TM0006" notation="LaTeX"><![CDATA[$\sigma _\mathrm{n}$]]></tex-math></inline-formula> is the noise amplitude. Alternatively, for a data set containing <inline-formula><tex-math id="TM0007" notation="LaTeX"><![CDATA[$N_x$]]></tex-math></inline-formula> data points, and a corresponding model with a single parameter set <inline-formula><tex-math id="TM0008" notation="LaTeX"><![CDATA[$\theta$]]></tex-math></inline-formula> for all points, assuming no correlations, the overall likelihood is the product of the probabilities of each datum (or alternatively, the sum of log probabilities), giving</p>
        <disp-formula id="update1720681549014">
          <label>(2)</label>
          <tex-math id="TM0009" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\log \mathcal {L}\left(\theta \right) = \sum _i^{N_x} \log \mathcal {L}_i\left(\theta \right),
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>where <italic>i</italic> indexes over the data points. In many cases, this will be an index over a variable <italic>x</italic>. For the examples in this work, this is taken to be observation frequency.</p>
        <p>However, this likelihood cannot account for isolated data points that do not match the probability distribution of the rest of the data set. Throughout this work, we refer to such data points as ‘anomalous’. Contamination by RFI is a common source of such anomalous data points. Typically, anomalies are flagged and excised prior to the Bayesian fitting procedure. This can be problematic, as it leads to potentially useful information being thrown away. Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>) showed that anomalies can be modelled simultaneously in a Bayesian fashion by using a piece-wise likelihood capable of modelling both the probability of abnormality and the probability that each datum fits to the model of interest.</p>
        <p>This was achieved in Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>) by first defining a model in which, if a data point is contaminated, the probability of observing that data becomes independent of the parameters. It is instead assumed to be uniform over a defined scale <inline-formula><tex-math id="TM0012" notation="LaTeX"><![CDATA[$[0-\Delta ]$]]></tex-math></inline-formula>, where <inline-formula><tex-math id="TM0013" notation="LaTeX"><![CDATA[$\Delta$]]></tex-math></inline-formula> describes the scale of the contamination, in the same units as those of the data, and can typically be set to <inline-formula><tex-math id="TM0014" notation="LaTeX"><![CDATA[$\Delta \approx {\small MAX}$]]></tex-math></inline-formula> (data). The probability of observing a given data point given the model then depends on whether that point is contaminated or not, as</p>
        <disp-formula id="update1721908713556">
          <label>(3)</label>
          <tex-math id="TM0015" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(\mathcal {D}_i|\theta ) = \left\lbrace \begin{array}{@{}l@{\quad }l@{}}\mathcal {L}_i(\theta ), & \text{uncontaminated}\\
\\
\Delta ^{-1}, & \text{contaminated}. \end{array}\right.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>This can be expressed with a Boolean mask to identify each data point as contaminated or not:</p>
        <disp-formula id="update1720524892058">
          <label>(4)</label>
          <tex-math id="TM0016" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(\mathcal {D}_i|\theta , \epsilon ) = \prod _i {\mathcal {L}_i(\theta )}^{\epsilon _i} \left(\Delta ^{-1}\right)^{1-\epsilon _i}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>Here, <inline-formula><tex-math id="TM0017" notation="LaTeX"><![CDATA[$\epsilon _i=1$]]></tex-math></inline-formula> indicates uncontaminated data and <inline-formula><tex-math id="TM0018" notation="LaTeX"><![CDATA[$\epsilon _i=0$]]></tex-math></inline-formula> indicates contaminated data.</p>
        <p>As it is not necessarily known which data points are contaminated a priori, a probability can be assigned to each given epsilon mask. This gives the probability of observing the data point, given the model, as</p>
        <disp-formula id="equ5">
          <label>(5)</label>
          <tex-math id="TM0019" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(\mathcal {D}_i, \epsilon |\theta ) = \prod _i [\mathcal {L}_i(\theta )P(\epsilon _i=1)]^{\epsilon _i} [\Delta ^{-1}P(\epsilon _i=0)]^{1-\epsilon _i}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>) then makes the assumption that the probability of data points being contaminated are uncorrelated, with a probability <inline-formula><tex-math id="TM0020" notation="LaTeX"><![CDATA[$p_i$]]></tex-math></inline-formula> that any individual point is contaminated, such that</p>
        <disp-formula id="equ6">
          <label>(6)</label>
          <tex-math id="TM0021" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(\epsilon _i=0) = p_i
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>and</p>
        <disp-formula id="equ7">
          <label>(7)</label>
          <tex-math id="TM0022" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(\epsilon _i=1) = 1-p_i.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>Substituting in these values into equation (<xref ref-type="disp-formula" rid="equ5">5</xref>) gives</p>
        <disp-formula id="equ8">
          <label>(8)</label>
          <tex-math id="TM0023" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(\mathcal {D}_i, \epsilon |\theta ) = \prod _i [\mathcal {L}_i(\theta )(1-p_i)]^{\epsilon _i} [p_i/\Delta ]^{1-\epsilon _i}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>Ideally, <inline-formula><tex-math id="TM0024" notation="LaTeX"><![CDATA[$\epsilon _i$]]></tex-math></inline-formula> should be marginalized over. However, this is computationally impractical in most cases. Therefore, Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>) makes the approximation that the <inline-formula><tex-math id="TM0025" notation="LaTeX"><![CDATA[$\epsilon _i$]]></tex-math></inline-formula> that returns the highest likelihood dominates in the marginalization. This highest likelihood mask is given by</p>
        <disp-formula id="equ9">
          <label>(9)</label>
          <tex-math id="TM0026" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\epsilon _{i} = \left\lbrace \begin{array}{@{}l@{\quad }l@{}}1, & \mathcal {L}_{i}(1-p_{i}) \gt p_{i}/ \Delta \\
\\
0, & \text{otherwise}. \end{array}\right.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>Therefore, the final likelihood can be expressed as</p>
        <disp-formula id="equ10">
          <label>(10)</label>
          <tex-math id="TM0027" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\log {\mathcal{L}}(\theta ) = \sum _i \left\lbrace \begin{array}{@{}l@{\quad }l@{}}
\log {\mathcal{L}}_{i} + \log (1-p_{i}), & \log {\mathcal{L}}_{i} + \log (1-p_{i}) \\
& \gt \log p_{i} -\log \Delta \\
\log p_{i} -\log \Delta , & \text{otherwise}.
\end{array}\right.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>In practice, this likelihood operates by removing very low likelihood data points from the overall likelihood and replacing them with an Occam’s penalty. This is because the Bayesian evidence favours the simplest solution that best describes the data, so without the penalty the ‘most likely’ solution would be to flag all of the data as anomalous.</p>
        <p>Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>) showed that this likelihood enabled anomalous data points to be efficiently flagged and corrected for automatically in a Bayesian model fit of a one-dimensional (1D) data set. The focus of this work is extending this methodology for application to 2D, time-varying data sets.</p>
      </sec>
      <sec id="sec2-2">
        <label>2.2</label>
        <title>Time-binned modelling</title>
        <p>Anstey, de Lera Acedo &amp; Handley (<xref ref-type="bibr" rid="bib2">2023</xref>) proposed a methodology for efficient inclusion of time-dependent data sets in Bayesian modelling. Typically, in the case of time-varying data, modelling each time bin separately is unfeasible as it requires a complete model with its own set of parameters for each bin, which can quickly result in the dimensionality of the fit becoming very large for more than a few time bins. As a result, time-varying data are typically modelled by fitting a model to the time averaged data set. For example, in the case of the uncorrelated Gaussian likelihood described in equations (<xref ref-type="disp-formula" rid="update1720681453889">1</xref>) and (<xref ref-type="disp-formula" rid="update1720681549014">2</xref>), a time-averaged likelihood would take the form</p>
        <disp-formula id="equ11">
          <label>(11)</label>
          <tex-math id="TM0028" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\log \mathcal {L} \left(\theta \right) = \sum _i -\frac{1}{2}\log \left(2\pi \sigma _\mathrm{n}^{2}\right) - \frac{1}{2}\left(\frac{\frac{1}{N_t}\sum _j \mathcal {D}_{ij} - \mathcal {M}_i\left(\theta \right) }{\sigma _\mathrm{n}}\right)^{2}, \\
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>where <italic>j</italic> indexes time bins and <inline-formula><tex-math id="TM0030" notation="LaTeX"><![CDATA[$N_t$]]></tex-math></inline-formula> is the total number of time bins.</p>
        <p>However, in Anstey et al. (<xref ref-type="bibr" rid="bib2">2023</xref>), the case was considered where a time varying model can be defined as a product of a parameter-dependent component and a parameter-independent component, where only the parameter-independent component has time dependence, of the form</p>
        <disp-formula id="equ12">
          <label>(12)</label>
          <tex-math id="TM0031" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathcal {M}_{ij} \left(\theta \right) = \mathcal {F}_i \left(\theta \right) \mathcal {G}_{ij}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>If it is the case that the time-dependent component <inline-formula><tex-math id="TM0032" notation="LaTeX"><![CDATA[$G_{ij}$]]></tex-math></inline-formula>, despite being independent of the specific value of the parameters, is required to be different for the different parameters in <inline-formula><tex-math id="TM0033" notation="LaTeX"><![CDATA[$\theta$]]></tex-math></inline-formula>, which will be the case if different parameters are describing different effects, this can be expressed more fully as</p>
        <disp-formula id="equ13">
          <label>(13)</label>
          <tex-math id="TM0034" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathcal {M}_{ij} \left(\theta _k\right) = \mathcal {F}_i \left(\theta _k\right) \mathcal {G}_{ijk},
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>where <italic>k</italic> indexes over the parameters and <inline-formula><tex-math id="TM0036" notation="LaTeX"><![CDATA[$\theta _k$]]></tex-math></inline-formula> expresses the individual elements of the vector <inline-formula><tex-math id="TM0037" notation="LaTeX"><![CDATA[$\theta$]]></tex-math></inline-formula>.</p>
        <p>An example of a case where a model of this form can be defined is for a set of parameters defining an astrophysical observable that is constant on the time-scales of the experiment, with time variance only being introduced by the rotation of the Earth. Another possibility would be a data set of just noise with transient signals, which would simply have a model of <inline-formula><tex-math id="TM0038" notation="LaTeX"><![CDATA[$\mathcal {M}_{ij}\left(\theta \right) = 0$]]></tex-math></inline-formula>, which still satisfies this condition.</p>
        <p>It should be noted that time dependence is not the only way a model can be factorized in the manner shown in equation (<xref ref-type="disp-formula" rid="equ13">13</xref>). For example, if the frequency component of the model were parameter independent, it could be factorized instead. Provided their effect on the model is not dependent on the parameters being fit for, it may also be possible to factorize polarization, or even different instruments in this fashion. The fast anomaly mitigation techniques described in Sections <xref ref-type="sec" rid="sec2-3">2.3</xref> and <xref ref-type="sec" rid="sec2-4">2.4</xref> could then be equivalently applied in such cases. However, for the purposes of this work, we will focus on time-dependence as a test case.</p>
        <p>If such a model can be defined, it becomes possible to implement simultaneous fitting of separate time bins of data to corresponding models without the dimensionality of the parameters increasing, as each time bin fits for the same parameter set. Such a fitting process can be implemented by modifying the likelihood shown in equation (<xref ref-type="disp-formula" rid="equ11">11</xref>) to</p>
        <disp-formula id="update1720681604823">
          <label>(14)</label>
          <tex-math id="TM0039" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\log \mathcal {L} \left(\theta \right) = \sum _{ij} -\frac{1}{2}\log \left(2\pi \sigma _\mathrm{n}^{2}\right) - \frac{1}{2}\left(\frac{\mathcal {D}_{ij} - \mathcal {M}_{ij}\left(\theta \right) }{\sigma _\mathrm{n}}\right)^{2}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>It was demonstrated in Anstey et al. (<xref ref-type="bibr" rid="bib2">2023</xref>), in the context of global 21-cm experiments, that using this full time-dependent likelihood in a Bayesian model fit enabled the time variance of the model to be exploited to constrain the model parameter more tightly than could be achieved for a time-averaged model.</p>
      </sec>
      <sec id="sec2-3">
        <label>2.3</label>
        <title>Time-binned anomaly mitigation</title>
        <p>Applying the Bayesian anomaly mitigation technique described in Section <xref ref-type="sec" rid="sec2-1">2.1</xref> to the time-dependent likelihood described in Section <xref ref-type="sec" rid="sec2-2">2.2</xref> is straightforward, requiring only that the flagged likelihood shown in equation (<xref ref-type="disp-formula" rid="equ10">10</xref>) be extended into two dimensions as</p>
        <disp-formula id="update1720681653667">
          <label>(15)</label>
          <tex-math id="TM0040" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\log {\mathcal{L}}(\theta ) = \sum _{ij} \left\lbrace \begin{array}{@{}l@{\quad }l@{}}
\log {\mathcal{L}}_{ij} + \log (1-p_{ij}), & \log {\mathcal{L}}_{ij} + \log (1-p_{ij}) \\
& \gt \log p_{ij} -\log \Delta \\
\log p_{ij} -\log \Delta , & \text{otherwise},
\end{array}\right.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>where <inline-formula><tex-math id="TM0041" notation="LaTeX"><![CDATA[$\log {\mathcal {L}_{ij}}$]]></tex-math></inline-formula> is the likelihood of a single data point in a single time bin</p>
        <disp-formula id="equ16">
          <label>(16)</label>
          <tex-math id="TM0042" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\log \mathcal {L}_{ij} \left(\theta \right) = -\frac{1}{2}\log \left(2\pi \sigma _\mathrm{n}^{2}\right) - \frac{1}{2}\left(\frac{\mathcal {D}_{ij} - \mathcal {M}_{ij}\left(\theta \right) }{\sigma _\mathrm{n}}\right)^{2}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>However, implementing this method in practice faces a challenge. The likelihood shown in equation (<xref ref-type="disp-formula" rid="update1720681604823">14</xref>) requires a summation over time bins. This means the computation time of the likelihood grows linearly with the number of time bins used in the data set. As a result, larger numbers of time bins can greatly slow the fitting procedure.</p>
        <p>In Anstey et al. (<xref ref-type="bibr" rid="bib2">2023</xref>), this issue was resolved by reformatting the likelihood such that all summations over time could be calculated once, outside of the likelihood, removing the dependence of the calculation time on the number of time bins. However, this solution is not possible to implement when the anomaly correcting procedure is also implemented. This is because the value of <inline-formula><tex-math id="TM0043" notation="LaTeX"><![CDATA[$\log {\mathcal {L}_{ij}}$]]></tex-math></inline-formula> must be calculated for every time bin within the likelihood, in order to evaluate <inline-formula><tex-math id="TM0044" notation="LaTeX"><![CDATA[$\log \mathcal {L}(\theta )$]]></tex-math></inline-formula>, as shown in equation (<xref ref-type="disp-formula" rid="update1720681653667">15</xref>). Therefore, an alternative method of speeding the likelihood evaluation is needed to make this process viable in practice. This can be achieved using likelihood reweighting.</p>
      </sec>
      <sec id="sec2-4">
        <label>2.4</label>
        <title>Likelihood reweighting</title>
        <p>The process of likelihood reweighting is an extension to importance sampling, pioneered in the context of gravitational waves (Payne, Talbot &amp; Thrane <xref ref-type="bibr" rid="bib24">2019</xref>; Romero-Shaw, Lasky &amp; Thrane <xref ref-type="bibr" rid="bib27">2019</xref>). It is a method for speeding the evaluation of a posterior and evidence in a Bayesian fit for the case of a complex model that is otherwise slow to evaluate. This process relies on several key criteria.</p>
        <p>Firstly, two models, <inline-formula><tex-math id="TM0045" notation="LaTeX"><![CDATA[$\mathcal {M}_\mathrm{F}\left(\theta \right)$]]></tex-math></inline-formula> and <inline-formula><tex-math id="TM0046" notation="LaTeX"><![CDATA[$\mathcal {M}_\mathrm{S}\left(\theta \right)$]]></tex-math></inline-formula>, are required. One of these must be quick to evaluate, which will henceforth be assumed to be model F, and one is slower to evaluate, which we define as model S. These two models must be parametrized by the same parameter vector <inline-formula><tex-math id="TM0047" notation="LaTeX"><![CDATA[$\theta$]]></tex-math></inline-formula>, with the same prior distribution <inline-formula><tex-math id="TM0048" notation="LaTeX"><![CDATA[$\mathcal {\pi }\left(\theta \right)$]]></tex-math></inline-formula>. They must also have their posterior peak in approximately the same region of parameter space.</p>
        <p>By definition, the posteriors of the two models can be expressed as</p>
        <disp-formula id="equ17">
          <label>(17)</label>
          <tex-math id="TM0049" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathcal {P}_\mathrm{F} \left(\theta | \mathcal {D}, \mathcal {M}_\mathrm{F}\right) = \frac{\mathcal {L}_\mathrm{F}\left(\mathcal {D} | \theta , \mathcal {M}_\mathrm{F}\right)\mathcal {\pi }\left(\theta \right)}{\mathcal {Z}_\mathrm{F}}
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>and</p>
        <disp-formula id="equ18">
          <label>(18)</label>
          <tex-math id="TM0050" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathcal {P}_\mathrm{S} \left(\theta | \mathcal {D}, \mathcal {M}_\mathrm{S}\right) = \frac{\mathcal {L}_\mathrm{S}\left(\mathcal {D} | \theta , \mathcal {M}_\mathrm{S}\right)\mathcal {\pi }\left(\theta \right)}{\mathcal {Z}_\mathrm{S}},
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>where <inline-formula><tex-math id="TM0051" notation="LaTeX"><![CDATA[$\mathcal {L}_\mathrm{F}\left(\mathcal {D} | \theta , \mathcal {M}_\mathrm{F}\right)$]]></tex-math></inline-formula> and <inline-formula><tex-math id="TM0052" notation="LaTeX"><![CDATA[$\mathcal {L}_\mathrm{S}\left(\mathcal {D} | \theta , \mathcal {M}_\mathrm{S}\right)$]]></tex-math></inline-formula> are the likelihoods calculated from the two models and <inline-formula><tex-math id="TM0053" notation="LaTeX"><![CDATA[$\mathcal {Z}_\mathrm{F}$]]></tex-math></inline-formula> and <inline-formula><tex-math id="TM0054" notation="LaTeX"><![CDATA[$\mathcal {Z}_\mathrm{S}$]]></tex-math></inline-formula> are the respective evidences.</p>
        <p>However, given the aforementioned criterion that the two models have the same priors, the prior can be expressed in terms of model F, as</p>
        <disp-formula id="equ19">
          <label>(19)</label>
          <tex-math id="TM0055" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathcal {\pi }\left(\theta \right) = \frac{\mathcal {Z}_\mathrm{F} \mathcal {P}_\mathrm{F}\left(\theta | \mathcal {D}, \mathcal {M}_\mathrm{F}\right)}{\mathcal {L}_\mathrm{F}\left(\mathcal {D} | \theta , \mathcal {M}_\mathrm{F}\right)}
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>and substituted into equation (<xref ref-type="disp-formula" rid="equ18">18</xref>) to give</p>
        <disp-formula id="equ20">
          <label>(20)</label>
          <tex-math id="TM0056" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathcal {P}_\mathrm{S} \left(\theta | \mathcal {D}, \mathcal {M}_\mathrm{S}\right) = \mathcal {P}_\mathrm{F}\left(\theta | \mathcal {D}, \mathcal {M}_\mathrm{F}\right) \frac{\mathcal {L}_\mathrm{S}\left(\mathcal {D} | \theta , \mathcal {M}_\mathrm{S}\right)}{\mathcal {L}_\mathrm{F}\left(\mathcal {D} | \theta , \mathcal {M}_\mathrm{F}\right)} \frac{\mathcal {Z}_\mathrm{F}}{\mathcal {Z}_\mathrm{S}}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>Thus, the posterior of model S can be evaluated without having to perform a full Bayesian model fit with the slow-to-calculate likelihood. This is achieved by instead performing a model fit of the much faster evaluated model F. Given the criterion that the bulk of the two models’ posteriors occupy similar regions of the parameter space, the samples of this fast evaluated posterior will cover the same parameter volume as a hypothetical posterior of the slower model. Therefore, reweighting the posterior samples of model F by a factor of the ratio of the likelihoods</p>
        <disp-formula id="equ21">
          <label>(21)</label>
          <tex-math id="TM0057" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
w\left(\theta \right) = \frac{\mathcal {L}_\mathrm{S}\left(\mathcal {D} | \theta , \mathcal {M}_\mathrm{S}\right)}{\mathcal {L}_\mathrm{F}\left(\mathcal {D} | \theta , \mathcal {M}_\mathrm{F}\right)},
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>will convert the samples to samples of the posterior of model S, to within a constant factor given by the ratio of the evidence. By this method, a set posterior for model S can be evaluated with the slow likelihood only needing to be computed for the relatively small number of posterior samples. This can be significantly faster than sampling the complex likelihood across the entire prior volume. For a particularly complex and thus slow to calculate likelihood, this can make the fitting procedure significantly faster. This effect will be demonstrated in Section <xref ref-type="sec" rid="sec3-3-3">3.3.3</xref>.</p>
        <p>Therefore, the process of likelihood reweighting is well suited to achieving fast and efficient time-dependent anomaly mitigation. By taking an anomaly correcting likelihood that acts on precomputed time-summed likelihoods as the quickly evaluated model F and the full time separated likelihood as the slowly evaluated model S, the full time-dependent likelihood posterior can be evaluated quickly, allowing the Bayesian anomaly mitigation procedure to be implemented practically on time-dependent data sets as follows. For the slow likelihood, the likelihood of each data point is calculated according to equations (<xref ref-type="disp-formula" rid="update1720681653667">15</xref>) and (<xref ref-type="disp-formula" rid="equ16">16</xref>).</p>
        <p>For the fast likelihood, however, in order to preserve as much of the time-dependent data as possible, the following process is implemented.</p>
        <p>First, the methodology described in Anstey et al. (<xref ref-type="bibr" rid="bib2">2023</xref>) to evaluate a time-dependent likelihood in a time-independent fashion by expanding the likelihood given in equation (<xref ref-type="disp-formula" rid="update1720681604823">14</xref>) and precomputing the summations over time is implemented. As previously noted, precomputing time summations in this fashion, while greatly accelerating the likelihood evaluation time, prevents the evaluation of <inline-formula><tex-math id="TM0058" notation="LaTeX"><![CDATA[$\log \mathcal {L}_{ij}$]]></tex-math></inline-formula> directly and thus prevents the implementation of the full time-dependent flagging method defined in equation (<xref ref-type="disp-formula" rid="update1720681653667">15</xref>). However, from equation (7) of Anstey et al. (<xref ref-type="bibr" rid="bib2">2023</xref>), it is possible to evaluate the product of likelihoods (or equivalently the sum of log likelihoods) over all time bins in each data channel</p>
        <disp-formula id="equ22">
          <label>(22)</label>
          <tex-math id="TM0059" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\log \widetilde{\mathcal {L}_i} = \sum _j\log \mathcal {L}_{ij},
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>from precomputed time summations.</p>
        <p>Therefore, it is possible to perform the anomaly mitigation methodology in a time-independent fashion by flagging out entire channels where contamination occurs, based on the summed log likelihoods, rather than only the contaminated time bins of those channels.</p>
        <p>In order to determine the appropriate flagging thresholds for this process, we repeat the derivation of Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>), described in Section <xref ref-type="sec" rid="sec2-1">2.1</xref>, applied to the summed log likelihood.</p>
        <p>We first make the assumption that if any time bin in a particular channel is contaminated, that contamination will dominate, and the resulting probability of the product of all time bins becomes independent on the model, and is instead uniform over a defined scale. In this case, the resulting probability of a contaminated channel depends on how many contaminated points it contains:</p>
        <disp-formula id="equ23">
          <label>(23)</label>
          <tex-math id="TM0060" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P({\mathcal{D}}_i|\theta ) = \left\lbrace \begin{array}{@{}l@{\quad }l@{}}
\widetilde{{\mathcal{L}}_i(\theta )}, & \text{all uncontaminated} \\
\Delta ^{-k_i}, & k_i \text{ contaminated},
\end{array}\right.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>where <inline-formula><tex-math id="TM0061" notation="LaTeX"><![CDATA[$\Delta$]]></tex-math></inline-formula> again describes the scale of the contamination, <inline-formula><tex-math id="TM0062" notation="LaTeX"><![CDATA[$k_i$]]></tex-math></inline-formula> is the number of contaminated points in channel <italic>i</italic>, and <inline-formula><tex-math id="TM0064" notation="LaTeX"><![CDATA[$\widetilde{\mathcal {L}_i(\theta )} = \prod _j \mathcal {L}_{ij}(\theta )$]]></tex-math></inline-formula>.</p>
        <p>This can again be expressed using a Boolean mask, <inline-formula><tex-math id="TM0065" notation="LaTeX"><![CDATA[$\epsilon _i$]]></tex-math></inline-formula>, as</p>
        <disp-formula id="equ24">
          <label>(24)</label>
          <tex-math id="TM0066" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(\mathcal {D}_i|\theta , \epsilon ) = \prod _i {\widetilde{\mathcal {L}_i(\theta )}}^{\epsilon _i} (\Delta ^{-k_i})^{1-\epsilon _i}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>Here, <inline-formula><tex-math id="TM0067" notation="LaTeX"><![CDATA[$\epsilon _i=1$]]></tex-math></inline-formula> refers to the case where no time bins in channel <italic>i</italic> are contaminated, and <inline-formula><tex-math id="TM0069" notation="LaTeX"><![CDATA[$\epsilon _i=0$]]></tex-math></inline-formula> to the case where at least 1 time bin is contaminated.</p>
        <p>As before, the probability of observing the data channel, given the model, can then be expressed as</p>
        <disp-formula id="equ25">
          <label>(25)</label>
          <tex-math id="TM0070" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(\mathcal {D}_i, \epsilon |\theta ) &=& \prod _i \left[\widetilde{\mathcal {L}_i(\theta )}P(\epsilon _i=1)\right]^{\epsilon _i} \\
&&\left[\sum _{k_i=1}^{N_t} \Delta ^{-k_i} P(k_i)P(\epsilon _i=0 |k_i)\right]^{1-\epsilon _i}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>It must be noted here that, as the value of <inline-formula><tex-math id="TM0071" notation="LaTeX"><![CDATA[$k_i$]]></tex-math></inline-formula> in each case is not known, we sum over all possible values, weighted by their probabilities.</p>
        <p>If, again, the probability that any given point is contaminated is <inline-formula><tex-math id="TM0072" notation="LaTeX"><![CDATA[$p_i$]]></tex-math></inline-formula>, assuming again that there are no correlations, the probability that a channel contains no contaminated points is</p>
        <disp-formula id="equ26">
          <label>(26)</label>
          <tex-math id="TM0073" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(\epsilon _i=1) = (1-p_i)^{N_t},
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>where <inline-formula><tex-math id="TM0074" notation="LaTeX"><![CDATA[$N_t$]]></tex-math></inline-formula> is the number of time bins.</p>
        <p>In the case of <inline-formula><tex-math id="TM0075" notation="LaTeX"><![CDATA[$\epsilon =0$]]></tex-math></inline-formula>, the probability of <inline-formula><tex-math id="TM0076" notation="LaTeX"><![CDATA[$k_i$]]></tex-math></inline-formula> points being contaminated is a simple binomial distribution:</p>
        <disp-formula id="update1720529436270">
          <label>(27)</label>
          <tex-math id="TM0077" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(k_i) = {N_t \choose k_i} (p_i)^{k_i}(1-p_i)^{N_t - k_i}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>As the case of <inline-formula><tex-math id="TM0078" notation="LaTeX"><![CDATA[$\epsilon _i=0$]]></tex-math></inline-formula> is defined to be where at least one point in the channel is contaminated, so <inline-formula><tex-math id="TM0079" notation="LaTeX"><![CDATA[$k_i\ge 1$]]></tex-math></inline-formula>, it can be seen that</p>
        <disp-formula id="equ28">
          <label>(28)</label>
          <tex-math id="TM0080" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
P(\epsilon _i=0|k_i) = 1
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>for all <inline-formula><tex-math id="TM0081" notation="LaTeX"><![CDATA[$k_i \ne 0$]]></tex-math></inline-formula>.</p>
        <p>Substituting equations (<xref ref-type="disp-formula" rid="equ26">26</xref>)–(<xref ref-type="disp-formula" rid="equ28">28</xref>) into equation (<xref ref-type="disp-formula" rid="equ25">25</xref>) and noting that</p>
        <disp-formula id="update1720681753846">
          <label>(29)</label>
          <tex-math id="TM0082" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\sum _{k_i=0}^{N_t} {N_t \choose k_i} x^{k_i} y^{N_t - k_i} = (x+y)^{N_t}
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>gives</p>
        <disp-formula id="equ30">
          <label>(30)</label>
          <tex-math id="TM0083" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
&&P(\mathcal {D}_i, \epsilon |\theta ) = \prod _i \left[\widetilde{\mathcal {L}_i(\theta )}(1-p_i)^{N_t}\right]^{\epsilon _i} \\
&&\left[(1 + p_i(\Delta ^{-1} - 1))^{N_t} - (1-p_i)^{N_t}\right]^{1-\epsilon _i}.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>Making, as before, the assumption that the most likely model dominates the marginalization of the <inline-formula><tex-math id="TM0084" notation="LaTeX"><![CDATA[$\epsilon _i$]]></tex-math></inline-formula> masks, and given that the most likely mask is given by</p>
        <disp-formula id="equ31">
          <label>(31)</label>
          <tex-math id="TM0085" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\epsilon _{i} = \left\lbrace \begin{array}{@{}l@{\quad }l@{}}
1, & \widetilde{{\mathcal{L}}_i(\theta )}(1-p_i)^{N_t}\\
& \gt (1 + p_i(\Delta ^{-1} - 1))^{N_t} - (1-p_i)^{N_t} \\
0, & \text{otherwise},
\end{array}\right.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>the final likelihood can be expressed as</p>
        <disp-formula id="equ32">
          <label>(32)</label>
          <tex-math id="TM0086" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\sum _i = \left\lbrace \begin{array}{@{}l@{\quad }l@{}}
& \log \widetilde{{\mathcal{L}}_{i}} + N_t \log (1-p_{i}) \\
\log \widetilde{{\mathcal{L}}_{i}} + N_t \log (1-p_{i}), & \gt \log \left[(1 + p_i(\Delta ^{-1} - 1))^{N_t} \right. \\
& - \left. (1-p_i)^{N_t}\right]\\
\log \left[(1 + p_i(\Delta ^{-1} - 1))^{N_t} \right. & \\
\left. - (1-p_i)^{N_t}\right], & \text{otherwise}.
\end{array}\right.
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>It can be seen that in the case of <inline-formula><tex-math id="TM0087" notation="LaTeX"><![CDATA[$N_t=1$]]></tex-math></inline-formula>, this reduces to equation (<xref ref-type="disp-formula" rid="equ10">10</xref>), as would be expected.</p>
        <p>Overall, this likelihood uses the methodology described in Anstey et al. (<xref ref-type="bibr" rid="bib2">2023</xref>) to evaluate a time-dependent likelihood in a time-independent fashion, while simultaneously implementing the anomaly mitigation methodology in a time-independent fashion by flagging out entire channels where contamination occurs, rather than only the contaminated time bins of those channels. This results in a likelihood that takes the same parameters as the full flagging likelihood shown in equation (<xref ref-type="disp-formula" rid="update1720681653667">15</xref>), with a posterior peak expected in a similar position, but is much faster to evaluate, albeit with a lower constraining power due to utilizing a smaller fraction of the available data. Therefore, equation (<xref ref-type="disp-formula" rid="equ32">32</xref>) serves as an ideal fast likelihood for a likelihood reweighting process, as required.</p>
        <p>In the following sections, this complete process will be tested on simulated data to evaluate its efficacy.</p>
      </sec>
    </sec>
    <sec id="sec3">
      <label>3.</label>
      <title>TIME-DEPENDENT RFI MITIGATION</title>
      <sec id="sec3-1">
        <label>3.1</label>
        <title>Toy model</title>
        <p>In order to evaluate the efficacy of this method for correcting transient RFI in time separated data, a toy-simulated time-dependent data set was generated of the form</p>
        <disp-formula id="equ33">
          <label>(33)</label>
          <tex-math id="TM0088" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathcal {D}_{ij} = \left[ \alpha _j \sin \left(\omega _jx_i +\phi _j\right)+ \gamma _j\right]x_i^{-2.55} + \hat{\sigma },
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>where <italic>x</italic> describes the data channel value, indexed by <italic>i</italic>. For this toy model, this is assumed to be frequency in MHz and runs from 50 to 200. <inline-formula><tex-math id="TM0090" notation="LaTeX"><![CDATA[$\alpha _j$]]></tex-math></inline-formula>, <inline-formula><tex-math id="TM0091" notation="LaTeX"><![CDATA[$\omega _j$]]></tex-math></inline-formula>, <inline-formula><tex-math id="TM0092" notation="LaTeX"><![CDATA[$\phi _j$]]></tex-math></inline-formula>, and <inline-formula><tex-math id="TM0093" notation="LaTeX"><![CDATA[$\gamma _j$]]></tex-math></inline-formula> are time-dependent variables that are chosen to vary gradually over the time bins. For each of the four values, a start point is randomly chosen uniformly from the range [0–5] for <inline-formula><tex-math id="TM0094" notation="LaTeX"><![CDATA[$\alpha _j$]]></tex-math></inline-formula>, [0–1] for <inline-formula><tex-math id="TM0095" notation="LaTeX"><![CDATA[$\omega _j$]]></tex-math></inline-formula> and <inline-formula><tex-math id="TM0096" notation="LaTeX"><![CDATA[$\phi _j$]]></tex-math></inline-formula>, and [110–130] for <inline-formula><tex-math id="TM0097" notation="LaTeX"><![CDATA[$\gamma _j$]]></tex-math></inline-formula>. The four variables are then iteratively defined according to</p>
        <disp-formula id="equ34">
          <label>(34)</label>
          <tex-math id="TM0098" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathrm{variable}_j = \mathrm{variable}_{j-1} + \mathcal {N}(\mu _\mathrm{variable}, \sigma _\mathrm{variable}),
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>where <inline-formula><tex-math id="TM0099" notation="LaTeX"><![CDATA[$\mathcal {N}(\mu _\mathrm{variable}, \sigma _\mathrm{variable})$]]></tex-math></inline-formula> indicates a value randomly drawn from a normal distribution of mean <inline-formula><tex-math id="TM0100" notation="LaTeX"><![CDATA[$\mu _\mathrm{variable}$]]></tex-math></inline-formula> and standard deviation <inline-formula><tex-math id="TM0101" notation="LaTeX"><![CDATA[$\sigma _\mathrm{variable}$]]></tex-math></inline-formula>. <inline-formula><tex-math id="TM0102" notation="LaTeX"><![CDATA[$\mu _\alpha$]]></tex-math></inline-formula> and <inline-formula><tex-math id="TM0103" notation="LaTeX"><![CDATA[$\mu _\gamma$]]></tex-math></inline-formula> were both set to 0, to ensure that the absolute scale of the toy model did not significantly vary from time bin to time bin, with standard deviations of <inline-formula><tex-math id="TM0104" notation="LaTeX"><![CDATA[$\sigma _\alpha =0.1$]]></tex-math></inline-formula> and <inline-formula><tex-math id="TM0105" notation="LaTeX"><![CDATA[$\sigma _\gamma = 1$]]></tex-math></inline-formula>. <inline-formula><tex-math id="TM0106" notation="LaTeX"><![CDATA[$\mu _\omega$]]></tex-math></inline-formula> and <inline-formula><tex-math id="TM0107" notation="LaTeX"><![CDATA[$\mu _\phi$]]></tex-math></inline-formula> were both randomly drawn from the range [−0.05 – 0.05], with <inline-formula><tex-math id="TM0108" notation="LaTeX"><![CDATA[$\sigma _\omega =\sigma _\phi = 0.05$]]></tex-math></inline-formula>, such that the sinusoidal distortion would vary over time.</p>
        <p>
          <inline-formula>
            <tex-math id="TM0109" notation="LaTeX"><![CDATA[$\hat{\sigma }$]]></tex-math>
          </inline-formula> is a realization of random Gaussian white noise added to the data. Unless otherwise specified, this was set to have a standard deviation of 0.25. A different noise realization is added to each time bin.</p>
        <p>A toy model of this form was chosen as it approximates the form of data from a global 21-cm experiment, which is dominated by foregrounds of primarily diffuse synchrotron emission (Shaver et al. <xref ref-type="bibr" rid="bib30">1999</xref>). The power law with a 2.55 spectral index approximates the spectral variation of the diffuse emission from the sky and the time varying sinusoids approximate the convolution of the diffuse emission with a chromatic antenna beam, as the Earth rotates. This is described, e.g. in equation (18) of Anstey et al. (<xref ref-type="bibr" rid="bib1">2021</xref>) and seen in Bowman et al. (<xref ref-type="bibr" rid="bib5">2018</xref>). Global 21-cm experiments are an anticipated use case of this process, so a toy model of this form enables these experiments to be used as a test case of the methodology.</p>
        <p>In addition, a simulated data set of this form has the required structure to apply the time-separated model fitting as specified in equation (<xref ref-type="disp-formula" rid="equ12">12</xref>), with</p>
        <disp-formula id="equ35">
          <label>(35)</label>
          <tex-math id="TM0110" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathcal {G}_{ij} = \alpha _j \sin \left(\omega _jx_i +\phi _j\right)+ \gamma _j
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>and</p>
        <disp-formula id="equ36">
          <label>(36)</label>
          <tex-math id="TM0111" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathcal {F}_i \left(\theta \right) = x_i^{-\theta }
\end{eqnarray}$$]]></tex-math>
        </disp-formula>
        <p>with a ‘true value’ of <inline-formula><tex-math id="TM0112" notation="LaTeX"><![CDATA[$\theta =2.55$]]></tex-math></inline-formula>.</p>
        <p>Fig. <xref ref-type="fig" rid="fig1">1</xref> shows an example simulated data set generated for 20 time bins using this toy model. Once such a data set has been generated, any arrangement of anomalous points can then be added in order to test the proposed Bayesian anomaly mitigator.</p>
        <fig id="fig1">
          <label>Figure 1.</label>
          <caption>
            <p>Example test data set with 20 time bins generated according to the toy model described in Section <xref ref-type="sec" rid="sec3-1">3.1</xref>.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig1.jpeg" mimetype="image" />
        </fig>
      </sec>
      <sec id="sec3-2">
        <label>3.2</label>
        <title>Parametrizing the threshold</title>
        <p>In Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>), the likelihood threshold value for determining if a point should be flagged as an anomaly or not, <italic>p</italic>, was set to a fixed value. However, doing so produces a challenge when applied to time-dependent modelling.</p>
        <p>As shown in equation (<xref ref-type="disp-formula" rid="update1720681653667">15</xref>), the general method of operation for the Bayesian anomaly correction procedure is to calculate the likelihood of each separate data point, compare it with a predefined threshold value and if it exceeds the threshold, include it with an appropriate weighting and if it does not, flag it as an anomaly and instead add a fixed penalty to the total likelihood. This is acceptable under the assumption that only anomalous points will have likelihoods lower than the threshold.</p>
        <p>However, in cases where the model used has relatively large priors and significant variability, it becomes possible for a given parameter sample to produce a model sufficiently different from the data set that significant numbers of data points have likelihoods below the threshold, as demonstrated by Fig. <xref ref-type="fig" rid="fig2">2</xref>.</p>
        <fig id="fig2">
          <label>Figure 2.</label>
          <caption>
            <p>Example, for linear noisy data, of the range of data points that are further from the model than a defined threshold for a model that closely matches the data set (top panel), moderately differs from the data set (middle panel), and significantly differs from the data set (bottom panel).</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig2.jpeg" mimetype="image" />
        </fig>
        <p>This becomes of concern if the priors are wide enough compared with the posterior peak to result in sections of the prior space in which every data point has a likelihood below the defined threshold set by <italic>p</italic>. In this case, the overall likelihood is a sum of only fixed penalty terms, and thus is constant. As a result, all sections of the prior space that satisfy this condition have the same likelihood. This manifests as the outer boundaries of the likelihood surface becoming flat.</p>
        <p>If the majority of the prior space is not flattened in this manner, this has minimal impact on the process of performing a model fit and recovering the parameter posteriors. However, if the likelihood surface is particularly steep, it is possible for the majority of the space to become flat, except for a very narrow region around the posterior peak. In this case, performing a model fit becomes impossible, as there exists no variation to guide an algorithm towards the peak.</p>
        <p>This is a challenge when performing time-dependent model fits, because the additional time information amplifies the effect of model disagreement. A parameter sample that gives a model that matches well to a data set with many time bins will give a high likelihood for each data point of each time bin, and thus give a larger overall likelihood than for an equally well matching data set with fewer time bins, and vice versa. As demonstrated in Fig. <xref ref-type="fig" rid="fig3">3</xref> for the proposed one-parameter toy model, defined by equations (<xref ref-type="disp-formula" rid="equ36">36</xref>) and (<xref ref-type="disp-formula" rid="equ35">35</xref>), with 2, 20, 200, and 2000 time bins, this has the effect of steepening the likelihood surface, the more time bins the data set has.</p>
        <fig id="fig3">
          <label>Figure 3.</label>
          <caption>
            <p>Plot of the total summed log likelihood shown in equation (<xref ref-type="disp-formula" rid="update1720681604823">14</xref>) for the toy model described in Section <xref ref-type="sec" rid="sec3-1">3.1</xref> as a function of parameter value for 2, 20, 200, and 2000 time bins.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig3.jpeg" mimetype="image" />
        </fig>
        <p>Fig. <xref ref-type="fig" rid="fig4">4</xref> shows the regions where the value of <inline-formula><tex-math id="TM0115" notation="LaTeX"><![CDATA[$\mathcal {L}_{i}$]]></tex-math></inline-formula>, averaged over time, falls above or below a threshold of <inline-formula><tex-math id="TM0116" notation="LaTeX"><![CDATA[$p=$]]></tex-math></inline-formula>1e−3, which is a typical value as determined from Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>). It can be seen that as the number of time bins increases, increasingly large portions of the space fall below the threshold, even in absence of anomalies, and so are flattened. By 2000 time bins, only a very small region of the parameter space is not flattened, which makes performing a model fit almost impossible.</p>
        <fig id="fig4">
          <label>Figure 4.</label>
          <caption>
            <p>Plots of the time averaged likelihoods <inline-formula><tex-math id="TM0117" notation="LaTeX"><![CDATA[$\mathcal {L}_{ij}$]]></tex-math></inline-formula> for simulated data sets generated according to equation (<xref ref-type="disp-formula" rid="equ11">11</xref>), excluding the summation, with 2, 20, 200, and 2000 time bins. The parameter and <italic>x</italic> values that give likelihoods above a fixed threshold of <inline-formula><tex-math id="TM0119" notation="LaTeX"><![CDATA[$p=$]]></tex-math></inline-formula>1e−3 are highlighted, demonstrating that larger regions of parameter space fall below a fixed likelihood threshold as the number of time bins increases.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig4.jpeg" mimetype="image" />
        </fig>
        <p>This demonstrates that, especially in the case of time separated data, having an externally fixed threshold value can impede model fitting. Therefore, ideally the value of <italic>p</italic> should be dynamic, allowing it to be low in suboptimal regions of the parameter space to avoid overflagging and flattening the likelihood surface, and higher around optimal regions to avoid missing genuine anomalous points.</p>
        <p>Therefore, as was suggested in Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>), this issue can be resolved by fitting the value of <italic>p</italic> as a free parameter, which is estimated as a parameter, simultaneously alongside other parameters of interest by the chosen Bayesian numerical solver, rather than assigning it a fixed value. It is assigned a wide prior from its theoretical maximum of one down to effectively zero. This results in there being sections of the parameter volume where the threshold is low and so the variations in the likelihood surface with the other parameters are visible. As a result, the model fit can progress, optimizing the parameter values towards their posteriors while simultaneously optimizing the threshold towards the optimal posterior value of the probability that a point is anomalous for that data set. This resolves the issue described above and allows the fit to proceed while still accurately flagging anomalous points. The effect this additional nuisance parameter may have on the likelihood reweighting process can then be mitigated by marginalizing over the parameter before reweighting.</p>
        <p>In the next section, tests of the entire process will be performed to demonstrate its efficacy. Throughout the rest of this work, <italic>p</italic> is fit as a free parameter with a logarithmically uniform prior in the range [1e−30 – 1]. The next section will demonstrate the functionality of this method.</p>
      </sec>
      <sec id="sec3-3">
        <label>3.3</label>
        <title>Results</title>
        <p>In order to demonstrate the performance of this anomaly mitigation procedure, four test data sets were generated according to the model described in Section <xref ref-type="sec" rid="sec3-1">3.1</xref>, with <inline-formula><tex-math id="TM0123" notation="LaTeX"><![CDATA[$N_\mathrm{t}$]]></tex-math></inline-formula> = 2, 20, 200, and 2000 time bins, respectively. These data sets will henceforth be referred to as the <italic>uncontaminated</italic> data. To each of these data sets, a random selection of anomalous peaks is added. In each case, the number of spikes added was equal to <inline-formula><tex-math id="TM0124" notation="LaTeX"><![CDATA[$N_\mathrm{t}\times 5$]]></tex-math></inline-formula>, such that every data set is contaminated in equal proportion. The amplitudes were uniformly sampled from the range 10–50. This scale was chosen to provide a trial case for the process, in which the anomalies are sufficiently above the noise to be large enough to impact the model fit, but still relatively small compared with the overall scale of the data, so they cannot be trivially removed. The time bins in which each anomaly was placed were uniformly randomly chosen and the <italic>x</italic> bin was uniformly set to one of 40 randomly chosen channels. This constrained the contamination to a subset of the data channels, in a manner more resembling RFI. This will be discussed in more detail in Section <xref ref-type="sec" rid="sec3-3-4">3.3.4</xref>. Fig. <xref ref-type="fig" rid="fig5">5</xref> shows an example of the anomalous points added to the data for <inline-formula><tex-math id="TM0126" notation="LaTeX"><![CDATA[$N_\mathrm{t}=200$]]></tex-math></inline-formula>. These data sets will henceforth be referred to as <italic>contaminated</italic>. As the primary objective of this work is to present a method of fast correction for transient RFI, the contamination we test here is transient. However, this method will also compensate for time-constant and wideband RFI, which was demonstrated in Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>).</p>
        <fig id="fig5">
          <label>Figure 5.</label>
          <caption>
            <p>Plot of the anomalous points added to the <inline-formula><tex-math id="TM0127" notation="LaTeX"><![CDATA[$N_\mathrm{t}=200$]]></tex-math></inline-formula> uncontaminated data set in order to produce the corresponding contaminated data set. The amplitudes were uniformly randomly chosen from the range 10–50. The time bin locations were uniformly randomly chosen and the <italic>x</italic> bin locations were uniformly set to one of 40 randomly chosen channels.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig5.jpeg" mimetype="image" />
        </fig>
        <p>In all model fits performed in this paper, <inline-formula><tex-math id="TM0129" notation="LaTeX"><![CDATA[$\Delta$]]></tex-math></inline-formula> is taken to be 7000, which is the maximum scale of the toy model data in all cases, as can be seen in Fig. <xref ref-type="fig" rid="fig1">1</xref>. Furthermore, all fits were performed using the <monospace>PolyChord</monospace> nested sampling algorithm (Handley, Hobson &amp; Lasenby <xref ref-type="bibr" rid="bib14">2015a</xref>, <xref ref-type="bibr" rid="bib15">b</xref>), implemented in Python and run on a laptop on one CPU. As the core of this method is a modification to the likelihood and model of a Bayesian model fit; however, it can be implemented with any Bayesian fitting algorithm and will scale and parallelize as that algorithm does.</p>
        <sec id="sec3-3-1">
          <label>3.3.1</label>
          <title>Anomaly correction</title>
          <p>Two models were fit to each of the contaminated and uncontaminated data sets described in the previous section. The first was a direct fit of the model given in equation (<xref ref-type="disp-formula" rid="equ35">35</xref>) and equation (<xref ref-type="disp-formula" rid="equ36">36</xref>), with no attempt to correct for any anomalies. The second was a full fit of this model together with the time-dependent Bayesian anomaly flagging method described in the previous section, including the likelihood reweighting process and fitting for the threshold value as a parameter. Fig. <xref ref-type="fig" rid="fig6">6</xref> shows the posteriors on the parameter <inline-formula><tex-math id="TM0130" notation="LaTeX"><![CDATA[$\theta$]]></tex-math></inline-formula> for each of these fits. Fig. <xref ref-type="fig" rid="fig7">7</xref> summarizes the standard deviations, <inline-formula><tex-math id="TM0131" notation="LaTeX"><![CDATA[$\sigma _\theta$]]></tex-math></inline-formula>, of each of these posterior distributions and Fig. <xref ref-type="fig" rid="fig8">8</xref> shows the biases between the posterior means and the true parameter value of 2.55, expressed as a number of standard deviations.</p>
          <fig id="fig6">
            <label>Figure 6.</label>
            <caption>
              <p>Plots of the posteriors on the toy model parameter <inline-formula><tex-math id="TM0132" notation="LaTeX"><![CDATA[$\theta$]]></tex-math></inline-formula>, defined in equation (<xref ref-type="disp-formula" rid="equ36">36</xref>), when fitting both uncontaminated and contaminated test data sets with models that include and do not include time-dependent Bayesian anomaly correction. The test data sets used a true value of <inline-formula><tex-math id="TM0133" notation="LaTeX"><![CDATA[$\theta =2.55$]]></tex-math></inline-formula>, marked with a vertical dashed line. Each subfigure shows the results for a simulated data set with a different number of time bins, from 2 to 2000. This demonstrates that uncorrected anomalies lead to parameter biases which are corrected by the application of the anomaly mitigation process.</p>
            </caption>
            <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig6.jpeg" mimetype="image" />
          </fig>
          <fig id="fig7">
            <label>Figure 7.</label>
            <caption>
              <p>Plots of the standard deviations, <inline-formula><tex-math id="TM0134" notation="LaTeX"><![CDATA[$\sigma _\theta$]]></tex-math></inline-formula>, of each of the posterior probability distributions shown in Fig. <xref ref-type="fig" rid="fig6">6</xref>.</p>
            </caption>
            <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig7.jpeg" mimetype="image" />
          </fig>
          <fig id="fig8">
            <label>Figure 8.</label>
            <caption>
              <p>Plots of the biases between the posterior mean and the true fiducial parameter value, expressed as a number of standard deviations, for each of the posterior probability distributions shown in Fig. <xref ref-type="fig" rid="fig6">6</xref>.</p>
            </caption>
            <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig8.jpeg" mimetype="image" />
          </fig>
          <p>For each number of time bins, the results of fitting the uncontaminated data sets without including any anomaly corrections, are shown in red in order to provide a benchmark. In all of these benchmark cases, the posterior correctly identifies the true parameter value of 2.55 to within &lt;1σ, as can be seen in Fig. <xref ref-type="fig" rid="fig8">8</xref>, with the standard deviation of those posteriors reducing approximately proportionally to the number of time bins, seen in Fig. <xref ref-type="fig" rid="fig7">7</xref>, as could be expected.</p>
          <p>The posteriors when the correction is applied but the data are uncontaminated are shown in blue. In these cases, it can be seen that the recovered posteriors are highly consistent with those of the uncorrected cases. This demonstrates that including the time-dependent Bayesian anomaly correction method does not bias the fit in absence of any anomalies. This is the expected result.</p>
          <p>The posteriors generated from contaminated data sets but with no correction applied are shown in green. It can be seen that in all cases, the parameter posterior is biased from the true value. For <inline-formula><tex-math id="TM0135" notation="LaTeX"><![CDATA[$N_\mathrm{t}=2$]]></tex-math></inline-formula>, 20, 200, and 2000, Fig. <xref ref-type="fig" rid="fig8">8</xref> shows the true value falls at <inline-formula><tex-math id="TM0136" notation="LaTeX"><![CDATA[$5.8\sigma _\theta$]]></tex-math></inline-formula>, <inline-formula><tex-math id="TM0137" notation="LaTeX"><![CDATA[$42.7\sigma _\theta$]]></tex-math></inline-formula>, <inline-formula><tex-math id="TM0138" notation="LaTeX"><![CDATA[$94.1\sigma _\theta$]]></tex-math></inline-formula>, and <inline-formula><tex-math id="TM0139" notation="LaTeX"><![CDATA[$393.7\sigma _\theta$]]></tex-math></inline-formula>, respectively. This is again the expected result, demonstrating that the results will be biased if contamination is present but not accounted for. The offset increases with the number of time bins due to the high time bin cases having narrower posteriors, which makes the bias more apparent. It can also be seen in Fig. <xref ref-type="fig" rid="fig7">7</xref> that, although the posterior standard deviations in these cases still decreases proportionally to the number of time bins, the posteriors are consistently a factor of <inline-formula><tex-math id="TM0140" notation="LaTeX"><![CDATA[$\sim 4$]]></tex-math></inline-formula> larger than the uncontaminated cases.</p>
          <p>The posteriors when the correction is applied to contaminated data are shown in black. It can be seen that the proposed time-dependent Bayesian anomaly mitigation methodology has successfully countered the bias in the posterior seen in the contaminated but uncorrected cases. For <inline-formula><tex-math id="TM0141" notation="LaTeX"><![CDATA[$N_\mathrm{t}=2$]]></tex-math></inline-formula>, 20, 200, and 2000, Fig. <xref ref-type="fig" rid="fig8">8</xref> shows the true value now falls at <inline-formula><tex-math id="TM0142" notation="LaTeX"><![CDATA[$0.59\sigma _\theta$]]></tex-math></inline-formula>, <inline-formula><tex-math id="TM0143" notation="LaTeX"><![CDATA[$0.03\sigma _\theta$]]></tex-math></inline-formula>, <inline-formula><tex-math id="TM0144" notation="LaTeX"><![CDATA[$0.31\sigma _\theta$]]></tex-math></inline-formula>, and <inline-formula><tex-math id="TM0145" notation="LaTeX"><![CDATA[$0.25\sigma _\theta$]]></tex-math></inline-formula>, respectively. The true value of the parameter is therefore now recovered to within <inline-formula><tex-math id="TM0146" notation="LaTeX"><![CDATA[$1\sigma$]]></tex-math></inline-formula> in all cases, with Fig. <xref ref-type="fig" rid="fig6">6</xref> showing that the posterior closely matches the uncontaminated benchmark in all cases.</p>
          <p>This demonstrates that the proposed anomaly mitigation technique is successfully correcting the added anomalies in the data and enabling the model parameters to be recovered accurately. It is worth noting that in this test case the simulated data are heavily contaminated, with 3.3 per cent of data points featuring an anomaly and 26.5 per cent of <italic>x</italic> channels being contaminated to some extent. Despite this, anomalies are still correctly accounted for.</p>
        </sec>
        <sec id="sec3-3-2">
          <label>3.3.2</label>
          <title>Anomaly recovery</title>
          <p>Figs <xref ref-type="fig" rid="fig6">6</xref>–<xref ref-type="fig" rid="fig8">8</xref> demonstrate that our methods account for the presence of anomalous data points to a level sufficient that the underlying model can be accurately recovered. It is also worth assessing directly whether the points predicted to be anomalies correctly correspond to those added into the data.</p>
          <p>This was investigated for the four corrected fits to contaminated data shown in black in Figs <xref ref-type="fig" rid="fig6">6</xref>–<xref ref-type="fig" rid="fig8">8</xref>. In order to determine the accuracy with which the flagged anomalies correspond to the true anomalies, it is necessary to first determine which data points are being identified as anomalous by the algorithm. This was achieved, for each case, by evaluating the condition given in equation (<xref ref-type="disp-formula" rid="update1720681653667">15</xref>) for every posterior sample. Any data point where <inline-formula><tex-math id="TM0148" notation="LaTeX"><![CDATA[$\log \mathcal {L}_{ij} + \log (1-p_{ij}) \le \log p_{ij} -\log \Delta$]]></tex-math></inline-formula> is considered anomalous for that sample. Evaluating the weighted average of all samples in that posterior then outputs a probability, for each data point, that that point is considered anomalous by the fit.</p>
          <p>A given data point can then be considered flagged as anomalous by the fit if this probability exceeds a threshold. This threshold was set to 0.5, such that a point is considered flagged if it more probable that it is anomalous than not. The resulting flagged points for each number of time bins can be compared with the anomalies added into the simulated data to determine the accuracy of the flagging process.</p>
          <p>Table <xref ref-type="table" rid="tbl1">1</xref> summarizes the number of points flagged as anomalous by the fitting that do or do not correspond to a real anomaly, true and false positives respectively, and the number of points not flagged that do or do not correspond to uncontaminated data, true and false negatives, respectively. It should be noted that the total number of anomalous data points does not exactly equal <inline-formula><tex-math id="TM0149" notation="LaTeX"><![CDATA[$5\times N_\mathrm{t}$]]></tex-math></inline-formula> due to the random locations of the contaminated points occasionally resulting in overlap. It can be seen that for all four <inline-formula><tex-math id="TM0150" notation="LaTeX"><![CDATA[$N_\mathrm{t}$]]></tex-math></inline-formula> values tested, the rate of both false positives and false negatives was zero. This results in both the precision, true positive/(true positive + false positive), and the recall, true positive/(true positive + false negative), being 1 for all cases. Therefore, the <inline-formula><tex-math id="TM0151" notation="LaTeX"><![CDATA[$\mathrm{F}_2$]]></tex-math></inline-formula> score, defined as the harmonic mean of these two values, giving twice as much weight to recall as precision,</p>
          <disp-formula id="equ37">
            <label>(37)</label>
            <tex-math id="TM0152" notation="LaTeX"><![CDATA[$$\begin{eqnarray}
\mathrm{F}_2 = \frac{\left(1+2^2 \right)\times \mathrm{precision}\times \mathrm{recall}}{2^2\times \mathrm{precision}+\mathrm{recall}},
\end{eqnarray}$$]]></tex-math>
          </disp-formula>
          <p>is 1 for all cases.</p>
          <table-wrap id="tbl1">
            <label>Table 1.</label>
            <caption>
              <p>Summary of the accuracy with which the anomalous data points inserted into each contaminated simulated data set were recovered by the flagging process, quantified by the number of true positives (points flagged as contaminated that were actually contaminated), false positives (points flagged that were not actually contaminated), false negatives (points not flagged as contaminated despite actually being contaminated), and true negatives (uncontaminated points that were correctly not flagged).</p>
            </caption>
            <table>
              <thead>
                <tr>
                  <th align="left">
                    <inline-formula>
                      <tex-math id="TM0153" notation="LaTeX"><![CDATA[$N_\mathrm{t}$]]></tex-math>
                    </inline-formula>
                  </th>
                  <th align="center">True</th>
                  <th align="center">False</th>
                  <th align="center">False</th>
                  <th align="center">True</th>
                </tr>
                <tr>
                  <th />
                  <th align="center">positives</th>
                  <th align="center">positives</th>
                  <th align="center">negatives</th>
                  <th align="center">negatives</th>
                </tr>
              </thead>
              <tbody>
                <tr>
                  <td>2</td>
                  <td>9</td>
                  <td>0</td>
                  <td>0</td>
                  <td>293</td>
                </tr>
                <tr>
                  <td>20</td>
                  <td>91</td>
                  <td>0</td>
                  <td>0</td>
                  <td>2929</td>
                </tr>
                <tr>
                  <td>200</td>
                  <td>946</td>
                  <td>0</td>
                  <td>0</td>
                  <td>29 254</td>
                </tr>
                <tr>
                  <td>2000</td>
                  <td>9433</td>
                  <td>0</td>
                  <td>0</td>
                  <td>292 567</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <p>It should also be noted that this level of precision and recall is maintained even if the probability at which a point is considered anomalous in the fit is dropped from 0.5 to 1e−10. This accuracy of anomaly recovery, even for these heavily contaminated toy models, raises the possibility that this anomaly correction methodology could also be used to detect anomalous points of interest, and thus functions as a transient flagger. This possibility will be explored further in Section <xref ref-type="sec" rid="sec4">4</xref>.</p>
        </sec>
        <sec id="sec3-3-3">
          <label>3.3.3</label>
          <title>Computation time</title>
          <p>The motivation for implementing likelihood reweighting as described in Section <xref ref-type="sec" rid="sec2-4">2.4</xref> was to improve the computational efficiency of the proposed process and significantly reduce the otherwise strong dependency of the total runtime on the number of time bins.</p>
          <p>In order to investigate the effects on runtime, the four contaminated data sets, with <inline-formula><tex-math id="TM0154" notation="LaTeX"><![CDATA[$N_\mathrm{t} = 2$]]></tex-math></inline-formula>, 20, 200, and 2000, were fit to the corresponding model with the anomaly correction method implemented, but without using likelihood reweighting. Instead the full ‘slow’ likelihood defined in equation (<xref ref-type="disp-formula" rid="update1720681653667">15</xref>) was used for the entire fit.</p>
          <p>Fig. <xref ref-type="fig" rid="fig9">9</xref> shows the means and standard deviations of the time, on the laptop CPU used, for a single evaluation of this slow likelihood for each of these fits, calculated from 1000 repeat evaluations of each likelihood. It can be seen that the evaluation time increases proportionally with the increasing number of time bins, as expected.</p>
          <fig id="fig9">
            <label>Figure 9.</label>
            <caption>
              <p>Plot of runtimes as a function of number of time bins in the data set. The time required, on the laptop CPU used, for a single likelihood evaluation of the full time-dependent anomaly mitigation method described in equation (<xref ref-type="disp-formula" rid="update1720681653667">15</xref>), calculated from 1000 likelihood evaluations in each case, is shown by the solid line. The ratio of the total runtime of a model fit implementing time-dependent Bayesian anomaly flagging using the full slow likelihood described in equation (<xref ref-type="disp-formula" rid="update1720681653667">15</xref>), to that of the equivalent fit with using likelihood reweighting, calculated from five repeats of each fit, is shown by the dashed line. The horizontal dotted line marks the ratio of 1, above which the likelihood reweighting methodology is faster than the full fit.</p>
            </caption>
            <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig9.jpeg" mimetype="image" />
          </fig>
          <p>Fig. <xref ref-type="fig" rid="fig9">9</xref> also shows the means and standard deviations of the ratio of the total runtime of the fit with no likelihood reweighting to an equivalent fit on the same data set with likelihood reweighting implemented, evaluated from five repeats of each fit. Whilst using the full model fit directly is more efficient for cases with very few time bins, it can be seen that implementing likelihood reweighting reduces the runtime when <inline-formula><tex-math id="TM0155" notation="LaTeX"><![CDATA[$N_\mathrm{t}\gt 20$]]></tex-math></inline-formula>, with the speed up following approximately the same trend as that of the likelihood evaluation time, increasing approximately proportionally to the number of time bins. For example, for 2000 time bins, the runtime improves by a factor of 44. This demonstrates that implementing likelihood reweighting successfully makes the total runtime close to independent of the number of time bins. This enables the underlying model fit to be performed in the efficient fashion described in Anstey et al. (<xref ref-type="bibr" rid="bib2">2023</xref>) and Section <xref ref-type="sec" rid="sec2-3">2.3</xref> without being significantly slowed by the anomaly flagging, and thus enables the flagging process to be implemented efficiently on very large data sets, as was the objective.</p>
          <p>Given that this runtime improvement is obtained from improving the likelihood evaluation time to be independent of the number of time bins, this trend is expected to continue for higher numbers of time bins. It should also be maintained with parallelization of the underlying model fitting algorithm.</p>
        </sec>
        <sec id="sec3-3-4">
          <label>3.3.4</label>
          <title>Overcontamination</title>
          <p>In Section <xref ref-type="sec" rid="sec3-3-3">3.3.3</xref>, it was demonstrated that the process of likelihood reweighting described in Section <xref ref-type="sec" rid="sec2-4">2.4</xref> significantly improves the computation time for the proposed method. However, it also introduces a minor limitation that is not present if the full, slow likelihood is used.</p>
          <p>When likelihood reweighting is implemented for this process, the initial model fit is performed using the ‘fast’ likelihood defined in equation (<xref ref-type="disp-formula" rid="equ32">32</xref>). However, in this fast likelihood, in order for the evaluation time to not be dependent on the number of time bins as required, anomalies are identified in the product of the likelihoods of all data points in a given channel, flagging the entire data channel if the likelihood product is below the specified threshold. However, as a result of this, if the data are contaminated in such a way that all <italic>x</italic> channels are contaminated to some extent, it could result in every channel being flagged, thus giving no constraints on the parameters of interest.</p>
          <p>In order to test this effect, a new set of contaminated data sets were generated, by adding anomalous data points to the four uncontaminated data sets described in Section <xref ref-type="sec" rid="sec3-3">3.3</xref>, in exactly the same manner as previously, except the <italic>x</italic> bin of each contaminated point was chosen entirely randomly, rather than being confined to certain channels. Fig. <xref ref-type="fig" rid="fig10">10</xref> shows an example of the anomalous points added to the data for <inline-formula><tex-math id="TM0158" notation="LaTeX"><![CDATA[$N_\mathrm{t} = 200$]]></tex-math></inline-formula>.</p>
          <fig id="fig10">
            <label>Figure 10.</label>
            <caption>
              <p>Plot of the anomalous points added to the <inline-formula><tex-math id="TM0159" notation="LaTeX"><![CDATA[$N_\mathrm{t}=200$]]></tex-math></inline-formula> uncontaminated data set in order to produce the corresponding contaminated data set in which most <italic>x</italic> channels are contaminated to some degree. The amplitudes were uniformly randomly chosen from the range 10–50. The time bin and <italic>x</italic> bin locations were uniformly randomly chosen.</p>
            </caption>
            <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig10.jpeg" mimetype="image" />
          </fig>
          <p>The tests described in Section <xref ref-type="sec" rid="sec3-3-1">3.3.1</xref> were repeated on these new randomly contaminated data sets. Fig. <xref ref-type="fig" rid="fig11">11</xref> shows the posteriors for the cases where the contaminated data sets were fit with the anomaly correcting likelihood, in comparison with the equivalent fits where the anomalies were confined to specific channels.</p>
          <fig id="fig11">
            <label>Figure 11.</label>
            <caption>
              <p>Plot of the parameter posteriors recovered when applying the Bayesian anomaly mitigation method with likelihood reweighting to simulated test data sets contaminated with <inline-formula><tex-math id="TM0162" notation="LaTeX"><![CDATA[$5N_\mathrm{ t}$]]></tex-math></inline-formula> anomalous points located at random (solid line), in comparison to the recovered posteriors for performing the same fits on data instead contaminated with the same number of anomalous points but constrained to a maximum of 40 <italic>x</italic> channels (dashed line), previously shown in Fig. <xref ref-type="fig" rid="fig6">6</xref> (by a dash-dotted line). The true parameter value of 2.55 is indicated by the vertical dashed line.</p>
            </caption>
            <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig11.jpeg" mimetype="image" />
          </fig>
          <p>Given that the number of anomalous points injected was <inline-formula><tex-math id="TM0164" notation="LaTeX"><![CDATA[$5 \times N_\mathrm{t}$]]></tex-math></inline-formula> and the number of <italic>x</italic> channels in the simulated data sets are 151, for fully randomly distributed anomalies, the expected number of anomalous points per channel will be 0.07, 0.66, 6.62, and 66.23 for <inline-formula><tex-math id="TM0166" notation="LaTeX"><![CDATA[$N_\mathrm{t}=$]]></tex-math></inline-formula> 2, 20, 200, and 2000, respectively. For all cases except <inline-formula><tex-math id="TM0167" notation="LaTeX"><![CDATA[$N_\mathrm{t}=2$]]></tex-math></inline-formula>, it is more likely that a channel be contaminated than not, with <inline-formula><tex-math id="TM0168" notation="LaTeX"><![CDATA[$N_\mathrm{t}=200$]]></tex-math></inline-formula> and 2000 expecting one or more contaminated points in every channel. It can therefore be expected that for <inline-formula><tex-math id="TM0169" notation="LaTeX"><![CDATA[$N_\mathrm{t}=200$]]></tex-math></inline-formula> and 2000, every channel will be flagged in the fast likelihood and thus no constraints on the parameter will be achieved.</p>
          <p>It can be seen in Fig. <xref ref-type="fig" rid="fig11">11</xref> that, for the <inline-formula><tex-math id="TM0170" notation="LaTeX"><![CDATA[$N_\mathrm{t}=2$]]></tex-math></inline-formula> and 20 cases, where the random anomalies are expected to have less than one contaminated point per channel, the parameter recovery for the random contamination is equivalent to the channel constrained contamination. However, for the <inline-formula><tex-math id="TM0171" notation="LaTeX"><![CDATA[$N_\mathrm{t}=200$]]></tex-math></inline-formula> and 2000 cases, where the expected number of contaminated points per channel increases above 1, the expected flagging of every channel is seen in the random contamination case, which returns the prior on the parameter <inline-formula><tex-math id="TM0172" notation="LaTeX"><![CDATA[$\theta$]]></tex-math></inline-formula>.</p>
          <p>This over contamination failure can be overcome by utilizing the full, slow likelihood instead of implementing likelihood reweighting. However, doing so would prevent the improved calculation time the reweighting provides. The degree of contamination required for this limitation to become relevant is very high, particularly in the context of RFI as it requires most or all of the channels to show contamination. However, this does represent a limitation of the proposed methodology to improve flagging speed through likelihood reweighting, which may not be viable on extremely heavily contaminated data sets.</p>
        </sec>
      </sec>
    </sec>
    <sec id="sec4">
      <label>4.</label>
      <title>TRANSIENT DETECTION</title>
      <p>In Section <xref ref-type="sec" rid="sec3-3-2">3.3.2</xref>, it was demonstrated that along with enabling accurate identification of the underlying model from beneath contamination, the proposed method also gives accurate recovery of the anomalous points themselves. This raises the possibility that, in addition to applications in RFI excision, this method could also be applied to detect transient signals of interest. As the implementation of likelihood reweighting enables model fitting to have a computational time almost independent of the number of time bins, this process potentially provides a Bayesian methodology for efficiently searching large data sets for transients such as FRBs and pulsars.</p>
      <sec id="sec4-1">
        <label>4.1</label>
        <title>Anomaly recovery</title>
        <p>In order to test the ability of the method to correctly identify small transients in large data sets, a series of tests were run in which a single anomalous data point was added at random to the previously described 2000 time bin test data set, and the model fit with the flagging process implemented. This was repeated five times each for anomalous points with amplitudes of 0.5, 1.25, 2.5, 5, and 12.5, which correspond to SNRs of 2, 5, 10, 20, and 50 respectively.</p>
        <p>The results of these fits are shown in Fig. <xref ref-type="fig" rid="fig12">12</xref>. It can be seen that when acting as a flagger, this method correctly and reliably identifies the single anomalous point for the cases with SNR <inline-formula><tex-math id="TM0173" notation="LaTeX"><![CDATA[$\ge 10$]]></tex-math></inline-formula>. Furthermore, it never erroneously identifies any data points as anomalous that were not anomalous in the data. However, the flagger does begin to fail to detect the anomalous point at lower SNRs, which can be expected given there is only one anomalous point out of 302 000 in this test case, meaning lower SNR anomalies can become indistinguishable from simple statistical fluctuations and so will not be flagged.</p>
        <fig id="fig12">
          <label>Figure 12.</label>
          <caption>
            <p>Plot of the attempts to identify a single added anomalous point into 2000 time bin data sets using the proposed method as a transient detector. Each panel shows the results for different SNR of the added anomalous point and for different repeats with the anomaly in a different random data bin. Cases where no anomalies were identified at all are shown in black with x hatching. Cases where the single anomaly was correctly flagged are shown in green with + hatching. Cases where multiple points were erroneously flagged and cases where only one point was flagged, but at the wrong location, would be shown in red with / hatching and purple with \ hatching, respectively. However, no such cases were seen.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="rzae025fig12.jpeg" mimetype="image" />
        </fig>
        <p>Although real transients such as FRBs and pulsars will often have additional structure and cover more data points than this simple, single contaminated data point test case, this minimal case demonstrates the potential for the proposed methodology to function as an efficient Bayesian transient detector. This will be explored in greater detail in a future work.</p>
      </sec>
    </sec>
    <sec id="sec5" sec-type="conclusions">
      <label>5.</label>
      <title>CONCLUSIONS</title>
      <p>RFI is a significant challenge in radio astronomy. In this paper, we extend the Bayesian RFI mitigation methodology presented in Leeney et al. (<xref ref-type="bibr" rid="bib17">2023</xref>) into the time domain. This enables transient anomalies to be flagged and properly accounted for in a Bayesian manner when fitting models to time-series data.</p>
      <p>The process of likelihood reweighting was implemented in order to enable this process to be performed in a manner mostly independent of the number of time bins in the data. This was demonstrated to produce significant improvements in the computation time as the number of time bins increases, reaching a 44 times speed increase on a test case with 2000 time bins, by breaking the proportional relation between the number of time bins and the runtime.</p>
      <p>Our methodology was demonstrated to be successful when correcting for contamination in a series of test data sets that approximate global 21-cm experiment data. It accurately corrected the bias in the model parameters that occurs if the contamination was not accounted for, while not affecting the results if no contamination is present.</p>
      <p>Furthermore, it was demonstrated that our methods can correctly locate and extract anomalous points from data. Therefore, the efficacy of our methods as an efficient transient detector was explored. It was demonstrated that they were successfully able to identify a single anomalous point out of 302 000, provided the anomaly had an SNR of 10 or higher. The use of this process as a transient flagger will be explored in greater depth in a future work.</p>
      <p>A potential limitation was also identified, in which the implementation of likelihood reweighting results in a failure to correctly account for anomalies in cases where every data channel is contaminated to some degree. Therefore, this method of improving computation time may not be viable on extremely heavily contaminated data sets, or in cases where most or all channels have a finite probability of showing some contamination and the data set has a very long time series. However, in such cases, the full slow likelihood can still be applied, albeit with a much longer computational time.</p>
      <p>Overall, the methodology presented here represents an efficient and fully Bayesian technique for correcting time-dependent contamination or identifying transients in large data sets.</p>
    </sec>
  </body>
  <back>
    <sec id="sec6" sec-type="data-availability">
      <title>DATA AVAILABILITY</title>
      <p>The data used and generated in this article will be shared on reasonable request to the corresponding author.</p>
    </sec>
    <ack id="ack1">
      <title>ACKNOWLEDGEMENTS</title>
      <p>We would like to thank Will Handley for his work on the development of the original methodology. DA was supported by the Science and Technologies Facilities Council and SL was supported by the European Research Council and the UKRI.</p>
    </ack>
    <ref-list id="ref1">
      <title>REFERENCES</title>
      <ref id="bib1">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Anstey</surname>  <given-names>D.</given-names></string-name>, <string-name><surname>de Lera Acedo</surname>  <given-names>E.</given-names></string-name>, <string-name><surname>Handley</surname>  <given-names>W.</given-names></string-name></person-group>, <year>2021</year>, <source>MNRAS</source>, <volume>506</volume>, <fpage>2041</fpage>  <pub-id pub-id-type="doi">10.1093/mnras/stab1765</pub-id></mixed-citation>
      </ref>
      <ref id="bib2">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Anstey</surname>  <given-names>D.</given-names></string-name>, <string-name><surname>de Lera Acedo</surname>  <given-names>E.</given-names></string-name>, <string-name><surname>Handley</surname>  <given-names>W.</given-names></string-name></person-group>, <year>2023</year>, <source>MNRAS</source>, <volume>520</volume>, <fpage>850</fpage>  <pub-id pub-id-type="doi">10.1093/mnras/stad156</pub-id></mixed-citation>
      </ref>
      <ref id="bib3">
        <mixed-citation publication-type="other">
          <person-group person-group-type="author">
            <string-name>
              <surname>Arrubarrena</surname>  <given-names>P.</given-names></string-name>, <string-name><surname>Lemercier</surname>  <given-names>M.</given-names></string-name>, <string-name><surname>Nikolic</surname>  <given-names>B.</given-names></string-name>, <string-name><surname>Lyons</surname>  <given-names>T.</given-names></string-name>, <string-name><surname>Cass</surname>  <given-names>T.</given-names></string-name></person-group>, <year>2024</year>, <comment>preprint</comment> (<pub-id pub-id-type="arxiv">arXiv:2402.14892</pub-id>)</mixed-citation>
      </ref>
      <ref id="bib4">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Baan</surname>  <given-names>W. A.</given-names></string-name>
          </person-group>, <year>2019</year>, <source>J. Astron. Instrum.</source>, <volume>8</volume>, <fpage>1940010</fpage>  <pub-id pub-id-type="doi">10.1142/S2251171719400105</pub-id></mixed-citation>
      </ref>
      <ref id="bib5">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Bowman</surname>  <given-names>J. D.</given-names></string-name>, <string-name><surname>Rogers</surname>  <given-names>A. E. E.</given-names></string-name>, <string-name><surname>Monsalve</surname>  <given-names>R. A.</given-names></string-name>, <string-name><surname>Mozdzen</surname>  <given-names>T. J.</given-names></string-name>, <string-name><surname>Mahesh</surname>  <given-names>N.</given-names></string-name></person-group>, <year>2018</year>, <source>Nature</source>, <volume>555</volume>, <fpage>67</fpage>  <pub-id pub-id-type="doi">10.1038/nature25792</pub-id></mixed-citation>
      </ref>
      <ref id="bib6">
        <mixed-citation publication-type="book">
          <person-group person-group-type="author">
            <string-name>
              <surname>Braun</surname>  <given-names>R.</given-names></string-name>, <string-name><surname>Bourke</surname>  <given-names>T.</given-names></string-name>, <string-name><surname>Green</surname>  <given-names>J. A.</given-names></string-name>, <string-name><surname>Keane</surname>  <given-names>E.</given-names></string-name>, <string-name><surname>Wagg</surname>  <given-names>J.</given-names></string-name></person-group>, <year>2015</year>, <source>Proc. Sci. Advancing Astrophysics with the Square Kilometre Array</source>, Vol. <volume>215</volume>. <publisher-name>SISSA</publisher-name>, <publisher-loc>Trieste</publisher-loc>, <fpage>PoS#174</fpage>  <pub-id pub-id-type="doi">10.22323/1.215.0174</pub-id></mixed-citation>
      </ref>
      <ref id="bib7">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Cendes</surname>  <given-names>Y.</given-names></string-name>  <etal>et al.</etal></person-group>, <year>2018</year>, <source>Astron. Comput.</source>, <volume>23</volume>, <fpage>103</fpage>  <pub-id pub-id-type="doi">10.1016/j.ascom.2018.04.001</pub-id></mixed-citation>
      </ref>
      <ref id="bib8">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Czech</surname>  <given-names>D.</given-names></string-name>, <string-name><surname>Mishra</surname>  <given-names>A.</given-names></string-name>, <string-name><surname>Inggs</surname>  <given-names>M.</given-names></string-name></person-group>, <year>2018a</year>, <source>Astron. Comput.</source>, <volume>25</volume>, <fpage>52</fpage>  <pub-id pub-id-type="doi">10.1016/j.ascom.2018.07.002</pub-id></mixed-citation>
      </ref>
      <ref id="bib9">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Czech</surname>  <given-names>D.</given-names></string-name>, <string-name><surname>Mishra</surname>  <given-names>A.</given-names></string-name>, <string-name><surname>Inggs</surname>  <given-names>M.</given-names></string-name></person-group>, <year>2018b</year>, <source>Radio Sci.</source>, <volume>53</volume>, <fpage>656</fpage>  <pub-id pub-id-type="doi">10.1029/2018RS006538</pub-id></mixed-citation>
      </ref>
      <ref id="bib10">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>de Lera Acedo</surname>  <given-names>E.</given-names></string-name>  <etal>et al.</etal></person-group>, <year>2022</year>, <source>Nature Astron.</source>, <volume>6</volume>, <fpage>984</fpage>  <pub-id pub-id-type="doi">10.1038/s41550-022-01709-9</pub-id></mixed-citation>
      </ref>
      <ref id="bib11">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Finlay</surname>  <given-names>C.</given-names></string-name>, <string-name><surname>Bassett</surname>  <given-names>B. A.</given-names></string-name>, <string-name><surname>Kunz</surname>  <given-names>M.</given-names></string-name>, <string-name><surname>Oozeer</surname>  <given-names>N.</given-names></string-name></person-group>, <year>2023</year>, <source>MNRAS</source>, <volume>524</volume>, <fpage>3231</fpage>  <pub-id pub-id-type="doi">10.1093/mnras/stad1979</pub-id></mixed-citation>
      </ref>
      <ref id="bib12">
        <mixed-citation publication-type="book">
          <person-group person-group-type="author">
            <string-name>
              <surname>Ford</surname>  <given-names>J. M.</given-names></string-name>, <string-name><surname>Buch</surname>  <given-names>K. D.</given-names></string-name></person-group>, <year>2014</year>, <source>Proc. 2014 IEEE Geosci. Remote Sens. Symp., RFI Mitigation Techniques in Radio Astronomy</source>. <publisher-name>IEEE</publisher-name>, Quebec City, p. <fpage>231</fpage>  <pub-id pub-id-type="doi">10.1109/IGARSS.2014.6946399</pub-id></mixed-citation>
      </ref>
      <ref id="bib13">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Fridman</surname>  <given-names>P.</given-names></string-name>, <string-name><surname>Baan</surname>  <given-names>W.</given-names></string-name></person-group>, <year>2001</year>, <source>A&amp;A</source>, <volume>378</volume>, <fpage>327</fpage>  <pub-id pub-id-type="doi">10.1051/0004-6361:20011166</pub-id></mixed-citation>
      </ref>
      <ref id="bib14">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Handley</surname>  <given-names>W. J.</given-names></string-name>, <string-name><surname>Hobson</surname>  <given-names>M. P.</given-names></string-name>, <string-name><surname>Lasenby</surname>  <given-names>A. N.</given-names></string-name></person-group>, <year>2015a</year>, <source>MNRAS</source>, <volume>450</volume>, <fpage>L61</fpage>  <pub-id pub-id-type="doi">10.1093/mnrasl/slv047</pub-id></mixed-citation>
      </ref>
      <ref id="bib15">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Handley</surname>  <given-names>W. J.</given-names></string-name>, <string-name><surname>Hobson</surname>  <given-names>M. P.</given-names></string-name>, <string-name><surname>Lasenby</surname>  <given-names>A. N.</given-names></string-name></person-group>, <year>2015b</year>, <source>MNRAS</source>, <volume>453</volume>, <fpage>4384</fpage>  <pub-id pub-id-type="doi">10.1093/mnras/stv1911</pub-id></mixed-citation>
      </ref>
      <ref id="bib16">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Kerrigan</surname>  <given-names>J.</given-names></string-name>  <etal>et al.</etal></person-group>, <year>2019</year>, <source>MNRAS</source>, <volume>488</volume>, <fpage>2605</fpage>  <pub-id pub-id-type="doi">10.1093/mnras/stz1865</pub-id></mixed-citation>
      </ref>
      <ref id="bib17">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Leeney</surname>  <given-names>S. A. K.</given-names></string-name>, <string-name><surname>Handley</surname>  <given-names>W. J.</given-names></string-name>, <string-name><surname>Acedo</surname>  <given-names>E. d. L.</given-names></string-name></person-group>, <year>2023</year>, <source>Phys. Rev. D</source>, <volume>108</volume>, <fpage>062006</fpage>  <pub-id pub-id-type="doi">10.1103/PhysRevD.108.062006</pub-id></mixed-citation>
      </ref>
      <ref id="bib18">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>McKinnon</surname>  <given-names>M.</given-names></string-name>, <string-name><surname>Beasley</surname>  <given-names>A.</given-names></string-name>, <string-name><surname>Murphy</surname>  <given-names>E.</given-names></string-name>, <string-name><surname>Selina</surname>  <given-names>R.</given-names></string-name>, <string-name><surname>Farnsworth</surname>  <given-names>R.</given-names></string-name>, <string-name><surname>Walter</surname>  <given-names>A.</given-names></string-name></person-group>, <year>2019</year>, <source>BAAS</source>, <volume>51</volume>, <fpage>81</fpage></mixed-citation>
      </ref>
      <ref id="bib19">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Mesarcik</surname>  <given-names>M.</given-names></string-name>, <string-name><surname>Boonstra</surname>  <given-names>A.-J.</given-names></string-name>, <string-name><surname>Ranguelova</surname>  <given-names>E.</given-names></string-name>, <string-name><surname>van Nieuwpoort</surname>  <given-names>R. V.</given-names></string-name></person-group>, <year>2022</year>, <source>MNRAS</source>, <volume>516</volume>, <fpage>5367</fpage>  <pub-id pub-id-type="doi">10.1093/mnras/stac2503</pub-id></mixed-citation>
      </ref>
      <ref id="bib20">
        <mixed-citation publication-type="other">
          <person-group person-group-type="author">
            <string-name>
              <surname>Monsalve</surname>  <given-names>R. A.</given-names></string-name>  <etal>et al.</etal></person-group>, <year>2024</year>, <source>MNRAS</source>, <volume>530</volume>, <fpage>4125</fpage>  <pub-id pub-id-type="doi">10.1093/mnras/stae1138</pub-id></mixed-citation>
      </ref>
      <ref id="bib21">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Nan</surname>  <given-names>R.</given-names></string-name>
          </person-group>, <year>2006</year>, <source>Sci. China G</source>, <volume>49</volume>, <fpage>129</fpage>  <pub-id pub-id-type="doi">10.1007/s11433-006-0129-9</pub-id></mixed-citation>
      </ref>
      <ref id="bib22">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Nita</surname>  <given-names>G. M.</given-names></string-name>, <string-name><surname>Keimpema</surname>  <given-names>A.</given-names></string-name>, <string-name><surname>Paragi</surname>  <given-names>Z.</given-names></string-name></person-group>, <year>2019</year>, <source>J. Astron. Instrum.</source>, <volume>8</volume>, <fpage>1940008</fpage>  <pub-id pub-id-type="doi">10.1142/S2251171719400087</pub-id></mixed-citation>
      </ref>
      <ref id="bib23">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Offringa</surname>  <given-names>A. R.</given-names></string-name>, <string-name><surname>de Bruyn</surname>  <given-names>A. G.</given-names></string-name>, <string-name><surname>Biehl</surname>  <given-names>M.</given-names></string-name>, <string-name><surname>Zaroubi</surname>  <given-names>S.</given-names></string-name>, <string-name><surname>Bernardi</surname>  <given-names>G.</given-names></string-name>, <string-name><surname>Pandey</surname>  <given-names>V. N.</given-names></string-name></person-group>, <year>2010</year>, <source>MNRAS</source>, <volume>405</volume>, <fpage>155</fpage>  <pub-id pub-id-type="doi">10.1111/j.1365-2966.2010.16471.x</pub-id></mixed-citation>
      </ref>
      <ref id="bib24">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Payne</surname>  <given-names>E.</given-names></string-name>, <string-name><surname>Talbot</surname>  <given-names>C.</given-names></string-name>, <string-name><surname>Thrane</surname>  <given-names>E.</given-names></string-name></person-group>, <year>2019</year>, <source>Phys. Rev. D</source>, <volume>100</volume>, <fpage>123017</fpage>  <pub-id pub-id-type="doi">10.1103/PhysRevD.100.123017</pub-id></mixed-citation>
      </ref>
      <ref id="bib25">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Pritchard</surname>  <given-names>N. J.</given-names></string-name>, <string-name><surname>Wicenec</surname>  <given-names>A.</given-names></string-name>, <string-name><surname>Bennamoun</surname>  <given-names>M.</given-names></string-name>, <string-name><surname>Dodson</surname>  <given-names>R.</given-names></string-name></person-group>, <year>2024</year>, <source>Publ. Astron. Soc. Aust.</source>, <volume>41</volume>, <fpage>e028</fpage>  <pub-id pub-id-type="doi">10.1017/pasa.2024.27</pub-id></mixed-citation>
      </ref>
      <ref id="bib26">
        <mixed-citation publication-type="other">
          <person-group person-group-type="author">
            <string-name>
              <surname>Razavi-Ghods</surname>  <given-names>N.</given-names></string-name>  <etal>et al.</etal></person-group>, <year>2023</year>, <comment>preprint</comment> (<pub-id pub-id-type="arxiv">arXiv:2307.00099</pub-id>)</mixed-citation>
      </ref>
      <ref id="bib27">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Romero-Shaw</surname>  <given-names>I. M.</given-names></string-name>, <string-name><surname>Lasky</surname>  <given-names>P. D.</given-names></string-name>, <string-name><surname>Thrane</surname>  <given-names>E.</given-names></string-name></person-group>, <year>2019</year>, <source>MNRAS</source>, <volume>490</volume>, <fpage>5210</fpage>  <pub-id pub-id-type="doi">10.1093/mnras/stz2996</pub-id></mixed-citation>
      </ref>
      <ref id="bib28">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Röttgering</surname>  <given-names>H.</given-names></string-name>
          </person-group>, <year>2003</year>, <source>New Astron. Rev.</source>, <volume>47</volume>, <fpage>405</fpage>  <pub-id pub-id-type="doi">10.1016/S1387-6473(03)00057-5</pub-id></mixed-citation>
      </ref>
      <ref id="bib29">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Scaife</surname>  <given-names>A.</given-names></string-name>
          </person-group>, <year>2020</year>, <source>Phil. Trans. R. Soc. A</source>, <volume>378</volume>, <fpage>20190060</fpage>  <pub-id pub-id-type="doi">10.1098/rsta.2019.0060</pub-id></mixed-citation>
      </ref>
      <ref id="bib30">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Shaver</surname>  <given-names>P. A.</given-names></string-name>, <string-name><surname>Windhorst</surname>  <given-names>R. A.</given-names></string-name>, <string-name><surname>Madau</surname>  <given-names>P.</given-names></string-name>, <string-name><surname>de Bruyn</surname>  <given-names>A. G.</given-names></string-name></person-group>, <year>1999</year>, <source>A&amp;A</source>, <volume>345</volume>, <fpage>380</fpage></mixed-citation>
      </ref>
      <ref id="bib31">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Singh</surname>  <given-names>S.</given-names></string-name>  <etal>et al.</etal></person-group>, <year>2022</year>, <source>Nature Astron.</source>, <volume>6</volume>, <fpage>607</fpage>  <pub-id pub-id-type="doi">10.1038/s41550-022-01610-5</pub-id></mixed-citation>
      </ref>
      <ref id="bib32">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Smith</surname>  <given-names>E.</given-names></string-name>, <string-name><surname>Lynch</surname>  <given-names>R. S.</given-names></string-name>, <string-name><surname>Pisano</surname>  <given-names>D.</given-names></string-name></person-group>, <year>2022</year>, <source>AJ</source>, <volume>164</volume>, <fpage>123</fpage>  <pub-id pub-id-type="doi">10.3847/1538-3881/ac7e47</pub-id></mixed-citation>
      </ref>
      <ref id="bib33">
        <mixed-citation publication-type="journal">
          <person-group person-group-type="author">
            <string-name>
              <surname>Wang</surname>  <given-names>Y.</given-names></string-name>, <string-name><surname>Zhang</surname>  <given-names>Z.</given-names></string-name>, <string-name><surname>Zhang</surname>  <given-names>H.</given-names></string-name>, <string-name><surname>Zhu</surname>  <given-names>W.</given-names></string-name>, <string-name><surname>Li</surname>  <given-names>D.</given-names></string-name>, <string-name><surname>Wang</surname>  <given-names>P.</given-names></string-name></person-group>, <year>2022</year>, <source>Astron. Comput.</source>, <volume>39</volume>, <fpage>100568</fpage>  <pub-id pub-id-type="doi">10.1016/j.ascom.2022.100568</pub-id></mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>