% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/XcmsExperiment.R,
%   R/functions-Params.R, R/methods-XCMSnExp.R
\name{fillChromPeaks}
\alias{fillChromPeaks}
\alias{fillChromPeaks,XcmsExperiment,ChromPeakAreaParam-method}
\alias{FillChromPeaksParam}
\alias{ChromPeakAreaParam}
\alias{fillChromPeaks,XCMSnExp,FillChromPeaksParam-method}
\alias{fillChromPeaks,XCMSnExp,ChromPeakAreaParam-method}
\alias{fillChromPeaks,XCMSnExp,missing-method}
\title{Gap Filling}
\usage{
fillChromPeaks(object, param, ...)

\S4method{fillChromPeaks}{XcmsExperiment,ChromPeakAreaParam}(
  object,
  param,
  msLevel = 1L,
  chunkSize = 2L,
  BPPARAM = bpparam()
)

FillChromPeaksParam(
  expandMz = 0,
  expandRt = 0,
  ppm = 0,
  fixedMz = 0,
  fixedRt = 0
)

ChromPeakAreaParam(
  mzmin = function(z, na.rm = TRUE) quantile(z, probs = 0.25, names = FALSE, na.rm =
    na.rm),
  mzmax = function(z, na.rm = TRUE) quantile(z, probs = 0.75, names = FALSE, na.rm =
    na.rm),
  rtmin = function(z, na.rm = TRUE) quantile(z, probs = 0.25, names = FALSE, na.rm =
    na.rm),
  rtmax = function(z, na.rm = TRUE) quantile(z, probs = 0.75, names = FALSE, na.rm =
    na.rm),
  minMzWidthPpm = 0
)

\S4method{fillChromPeaks}{XCMSnExp,FillChromPeaksParam}(object, param, msLevel = 1L, BPPARAM = bpparam())

\S4method{fillChromPeaks}{XCMSnExp,ChromPeakAreaParam}(object, param, msLevel = 1L, BPPARAM = bpparam())

\S4method{fillChromPeaks}{XCMSnExp,missing}(object, param, BPPARAM = bpparam(), msLevel = 1L)
}
\arguments{
\item{object}{\code{XcmsExperiment} or \code{XCMSnExp} object with identified and
grouped chromatographic peaks.}

\item{param}{\code{ChromPeakAreaParam} or \code{FillChromPeaksParam} object
defining which approach should be used (see details section).}

\item{...}{currently ignored.}

\item{msLevel}{\code{integer(1)} defining the MS level on which peak filling
should be performed (defaults to \code{msLevel = 1L}). Only peak filling
on one MS level at a time is supported, to fill in peaks for MS
level 1 and 2 run first using \code{msLevel = 1} and then (on the returned
result object) again with \code{msLevel = 2}.}

\item{chunkSize}{For \code{fillChromPeaks} if \code{object} is an \code{XcmsExperiment}:
\code{integer(1)} defining the number of files (samples)
that should be loaded into memory and processed at the same time.
This setting thus allows to balance between memory
demand and speed (due to parallel processing). Because parallel
processing can only performed on the subset of data currently loaded
into memory in each iteration, the value for \code{chunkSize} should match
the defined  parallel setting setup. Using a parallel processing setup
using 4 CPUs (separate processes) but using \verb{chunkSize = }1\verb{will not perform any parallel processing, as only the data from one sample is loaded in memory at a time. On the other hand, setting}chunkSize` to
the total number of samples in an experiment will load the full MS data
into memory and will thus in most settings cause an out-of-memory error.}

\item{BPPARAM}{Parallel processing settings.}

\item{expandMz}{for \code{FillChromPeaksParam}: \code{numeric(1)} defining the
value by which the mz width of peaks should be expanded. Each peak is
expanded in mz direction by \verb{expandMz *} their original m/z width.
A value of \code{0} means no expansion, a value of \code{1} grows each peak
by \verb{1 *} the m/z width of the peak resulting in peaks with twice
their original size in m/z direction (expansion by half m/z width
to both sides).}

\item{expandRt}{for \code{FillChromPeaksParam}: \code{numeric(1)}, same as
\code{expandMz} but for the retention time width.}

\item{ppm}{for \code{FillChromPeaksParam}: \code{numeric(1)} optionally specifying
a \emph{ppm} by which the m/z width of the peak region should be expanded.
For peaks with an m/z width smaller than
\code{mean(c(mzmin, mzmax)) * ppm / 1e6}, the \code{mzmin} will be replaced by
\code{mean(c(mzmin, mzmax)) - (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)}
\code{mzmax} by
\code{mean(c(mzmin, mzmax)) + (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)}.
This is applied before eventually expanding the m/z width using the
\code{expandMz} parameter.}

\item{fixedMz}{for \code{FillChromPeaksParam}: \code{numeric(1)} defining a constant
factor by which the m/z width of each feature is to be expanded.
The m/z width is expanded on both sides by \code{fixedMz} (i.e. \code{fixedMz}
is subtracted from the lower m/z and added to the upper m/z). This
expansion is applied \emph{after} \code{expandMz} and \code{ppm}.}

\item{fixedRt}{for \code{FillChromPeaksParam}: \code{numeric(1)} defining a constant
factor by which the retention time width of each factor is to be
expanded. The rt width is expanded on both sides by \code{fixedRt} (i.e.
\code{fixedRt} is subtracted from the lower rt and added to the upper rt).
This expansion is applied \emph{after} \code{expandRt}.}

\item{mzmin}{\code{function} to be applied to values in the \code{"mzmin"} column
of all chromatographic peaks of a feature to define the lower m/z
value of the area from which signal for the feature should be
integrated. Defaults to \code{mzmin = function(z) quantile(z, probs = 0.25)}
hence using the 25\% quantile of all values.}

\item{mzmax}{\code{function} to be applied to values in the \code{"mzmax"} column
of all chromatographic peaks of a feature to define the upper m/z
value of the area from which signal for the feature should be
integrated. Defaults to  \code{mzmax = function(z) quantile(z, probs = 0.75)}
hence using the 75\% quantile of all values.}

\item{rtmin}{\code{function} to be applied to values in the \code{"rtmin"} column
of all chromatographic peaks of a feature to define the lower rt
value of the area from which signal for the feature should be
integrated. Defaults to \code{rtmin = function(z) quantile(z, probs = 0.25)}
hence using the 25\% quantile of all values.}

\item{rtmax}{\code{function} to be applied to values in the \code{"rtmax"} column
of all chromatographic peaks of a feature to define the upper rt
value of the area from which signal for the feature should be
integrated. Defaults to \code{rtmax = function(z) quantile(z, probs = 0.75)}
hence using the 75\% quantile of all values.}

\item{minMzWidthPpm}{For \code{ChromPeakAreaParam()}: \code{numeric(1)} defining the
minimal guaranteed m/z width (expressed in ppm of the feature's m/z)
that will be used to integrate signal from (default
\code{minMzWidthPpm = 0.0}). See documentation of \code{ChromPeakAreaParam()}
for more information.}
}
\value{
An \link{XcmsExperiment} or  \code{XCMSnExp} object with previously missing
chromatographic peaks for features filled into its \code{\link[=chromPeaks]{chromPeaks()}} matrix.

The \code{FillChromPeaksParam()} function returns a
\code{FillChromPeaksParam} object.
}
\description{
Gap filling integrate signal in the m/z-rt area of a feature (i.e., a
chromatographic peak group) for samples in which no chromatographic
peak for this feature was identified and add it to the \code{\link[=chromPeaks]{chromPeaks()}}
matrix. Such \emph{filled-in} peaks are indicated with a \code{TRUE} in column
\code{"is_filled"} in the result object's \code{\link[=chromPeakData]{chromPeakData()}} data frame.

The method for gap filling along with its settings can be defined with
the \code{param} argument. Two different approaches are available:
\itemize{
\item \code{param = FillChromPeaksParam()}: the default of the original \code{xcms}
code. Signal is integrated from the m/z and retention time range as
defined in the \code{\link[=featureDefinitions]{featureDefinitions()}} data frame, i.e. from the
\code{"rtmin"}, \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. This method is not
suggested as it underestimates the actual peak area and it is also
not available for \code{object} being an \link{XcmsExperiment} object. See
details below for more information and settings for this method.
\item \code{param = ChromPeakAreaParam()}: the area from which the signal for a
feature is integrated is defined based on the feature's chromatographic
peak areas. The m/z range is by default defined as the the lower quartile
of chromatographic peaks' \code{"mzmin"} value to the upper quartile of the
chromatographic peaks' \code{"mzmax"} values.
The retention time range for the area is defined analogously.
Alternatively, by setting \code{mzmin = median},
\code{mzmax = median}, \code{rtmin = median} and \code{rtmax = median} in
\code{ChromPeakAreaParam}, the median \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and
\code{"rtmax"} values from all detected chromatographic peaks of a feature
would be used instead.
Parameter \code{minMzWidthPpm} allows in addition to define a minimal
guaranteed m/z width expressed in ppm of the features' m/z and centered
around it. The default is \code{minMzWidthPpm = 0.0}. With a
\code{minMzWidthPpm} > 0, the lower m/z boundary for a feature is defined as
the smaller value from the m/z derived from its chromatographic peaks'
\code{"mzmin"}, and the feature's m/z \emph{minus} \code{minMzWidthPpm / 2} ppm of its
m/z. The upper m/z boundary is determined in the same way.
In contrast to the  \code{FillChromPeaksParam} approach this method uses (all)
identified chromatographic peaks of a feature to define the area
from which the signal should be integrated.
}
}
\details{
After correspondence (i.e. grouping of chromatographic peaks across
samples) there will always be features (peak groups) that do not include
peaks from every sample. The \code{fillChromPeaks} method defines
intensity values for such features in the missing samples by integrating
the signal in the m/z-rt region of the feature. Two different approaches
to define this region are available: with \code{ChromPeakAreaParam} the region
is defined based on the detected \strong{chromatographic peaks} of a feature,
while with \code{FillChromPeaksParam} the region is defined based on the m/z and
retention times of the \strong{feature} (which represent the m/z and retentention
times of the apex position of the associated chromatographic peaks). For the
latter approach various parameters are available to increase the area from
which signal is to be integrated, either by a constant value (\code{fixedMz} and
\code{fixedRt}) or by a feature-relative amount (\code{expandMz} and \code{expandRt}).

Adjusted retention times will be used if available.

Based on the peak finding algorithm that was used to identify the
(chromatographic) peaks, different internal functions are used to
guarantee that the integrated peak signal matches as much as possible
the peak signal integration used during the peak detection. For peaks
identified with the \code{\link[=matchedFilter]{matchedFilter()}} method, signal
integration is performed on the \emph{profile matrix} generated with
the same settings used also during peak finding (using the same
\code{bin} size for example). For direct injection data and peaks
identified with the \code{MSW} algorithm signal is integrated
only along the mz dimension. For all other methods the complete (raw)
signal within the area is used.
}
\note{
The reported \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and
\code{"rtmax"} for the filled peaks represents the actual MS area from
which the signal was integrated.

No peak is filled in if no signal was present in a file/sample
in the respective mz-rt area. These samples will still show a \code{NA}
in the matrix returned by the \code{\link[=featureValues]{featureValues()}} method.
}
\examples{

## Load a test data set with identified chromatographic peaks
library(xcms)
library(MsExperiment)
res <- loadXcmsData("faahko_sub2")

## Disable parallel processing for this example
register(SerialParam())

## Perform the correspondence. We assign all samples to the same group.
res <- groupChromPeaks(res,
    param = PeakDensityParam(sampleGroups = rep(1, length(res))))

## For how many features do we lack an integrated peak signal?
sum(is.na(featureValues(res)))

## Filling missing peak data using the peak area from identified
## chromatographic peaks.
res <- fillChromPeaks(res, param = ChromPeakAreaParam())

## Alternatively, force a minimal guaranteed m/z width for the regions
## to integrate signal from.
res <- fillChromPeaks(res, param = ChromPeakAreaParam(minMzWidthPpm = 10))

## How many missing values do we have after peak filling?
sum(is.na(featureValues(res)))

## Get the peaks that have been filled in:
fp <- chromPeaks(res)[chromPeakData(res)$is_filled, ]
head(fp)

## Get the process history step along with the parameters used to perform
## The peak filling:
ph <- processHistory(res, type = "Missing peak filling")[[1]]
ph

## The parameter class:
ph@param

## It is also possible to remove filled-in peaks:
res <- dropFilledChromPeaks(res)

sum(is.na(featureValues(res)))
}
\seealso{
\code{\link[=groupChromPeaks]{groupChromPeaks()}} for methods to perform the correspondence.

\link{featureArea} for the function to define the m/z-retention time
region for each feature.
}
\author{
Johannes Rainer
}
