% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions-binning.R
\name{binYonX}
\alias{binYonX}
\title{Aggregate values in y for bins defined on x}
\usage{
binYonX(
  x,
  y,
  breaks,
  nBins,
  binSize,
  binFromX,
  binToX,
  fromIdx = 1L,
  toIdx = length(x),
  method = "max",
  baseValue,
  sortedX = !is.unsorted(x),
  shiftByHalfBinSize = FALSE,
  returnIndex = FALSE,
  returnX = TRUE
)
}
\arguments{
\item{x}{Numeric vector to be used for binning.}

\item{y}{Numeric vector (same length than \code{x}) from which the maximum
values for each bin should be defined. If not provided, \code{x} will
be used.}

\item{breaks}{Numeric vector defining the breaks for the bins, i.e. the
lower and upper values for each bin. See examples below.}

\item{nBins}{\code{integer(1)} defining the number of desired bins.}

\item{binSize}{\code{numeric(1)} defining the desired bin size.}

\item{binFromX}{Optional \code{numeric(1)} allowing to manually specify
the range of x-values to be used for binning.
This will affect only the calculation of the breaks for the bins
(i.e. if \code{nBins} or \code{binSize} is provided).
If not provided the minimal value in the sub-set
\code{fromIdx}-\code{toIdx} in input vector \code{x} will be used.}

\item{binToX}{Same as \code{binFromX}, but defining the maximum x-value to be
used for binning.}

\item{fromIdx}{Integer vector defining the start position of one or multiple
sub-sets of input vector \code{x} that should be used for binning.}

\item{toIdx}{Same as \code{toIdx}, but defining the maximum index (or indices)
in x to be used for binning.}

\item{method}{A character string specifying the method that should be used to
aggregate values in \code{y}. Allowed are \code{"max"}, \code{"min"},
\code{"sum"} and \code{"mean"} to identify the maximal or minimal value
or to sum all values within a bin or calculate their mean value.}

\item{baseValue}{The base value for empty bins (i.e. bins into which either
no values in \code{x} did fall, or to which only \code{NA} values in
\code{y} were assigned). By default (i.e. if not specified), \code{NA}
is assigned to such bins.}

\item{sortedX}{Whether \code{x} is sorted.}

\item{shiftByHalfBinSize}{Logical specifying whether the bins should be
shifted by half the bin size to the left. Thus, the first bin will have
its center at \code{fromX} and its lower and upper boundary are
\code{fromX - binSize/2} and \code{fromX + binSize/2}. This argument is
ignored if \code{breaks} are provided.}

\item{returnIndex}{Logical indicating whether the index of the max (if
\code{method = "max"}) or min (if \code{method = "min"}) value within
each bin in input vector \code{x} should also be reported. For methods
other than \code{"max"} or \code{"min"} this argument is ignored.}

\item{returnX}{\code{logical} allowing to avoid returning \verb{$x}, i.e. the
mid-points of the bins. \code{returnX = FALSE} might be useful in cases
where \code{breaks} are pre-defined as it considerably reduces the memory
demand.}
}
\value{
Returns a list of length 2, the first element (named \code{"x"})
contains the bin mid-points, the second element (named \code{"y"}) the
aggregated values from input vector \code{y} within each bin. For
\code{returnIndex = TRUE} the list contains an additional element
\code{"index"} with the index of the max or min (depending on whether
\code{method = "max"} or \code{method = "min"}) value within each bin in
input vector \code{x}.
}
\description{
This functions takes two same-sized numeric vectors \code{x}
and \code{y}, bins/cuts \code{x} into bins (either a pre-defined number
of equal-sized bins or bins of a pre-defined size) and aggregates values
in \code{y} corresponding to \code{x} values falling within each bin. By
default (i.e. \code{method = "max"}) the maximal \code{y} value for the
corresponding \code{x} values is identified. \code{x} is expected to be
incrementally sorted and, if not, it will be internally sorted (in which
case also \code{y} will be ordered according to the order of \code{x}).
}
\details{
The breaks defining the boundary of each bin can be either passed
directly to the function with the argument \code{breaks}, or are
calculated on the data based on arguments \code{nBins} or \code{binSize}
along with \code{fromIdx}, \code{toIdx} and optionally \code{binFromX}
and \code{binToX}.
Arguments \code{fromIdx} and \code{toIdx} allow to specify subset(s) of
the input vector \code{x} on which bins should be calculated. The
default the full \code{x} vector is considered. Also, if not specified
otherwise with arguments \code{binFromX} and \code{binToX}, the range
of the bins within each of the sub-sets will be from \code{x[fromIdx]}
to \code{x[toIdx]}. Arguments \code{binFromX} and \code{binToX} allow to
overwrite this by manually defining the a range on which the breaks
should be calculated. See examples below for more details.

\if{html}{\out{<div class="sourceCode">}}\preformatted{Calculation of breaks: for `nBins` the breaks correspond to
`seq(min(x[fromIdx])), max(x[fromIdx], length.out = (nBins + 1))`.
For `binSize` the breaks correspond to
`seq(min(x[fromIdx]), max(x[toIdx]), by = binSize)` with the
exception that the last break value is forced to be equal to
`max(x[toIdx])`. This ensures that all values from the specified
range are covered by the breaks defining the bins. The last bin could
however in some instances be slightly larger than `binSize`. See
[breaks_on_binSize()] and [breaks_on_nBins()] for
more details.
}\if{html}{\out{</div>}}
}
\note{
The function ensures that all values within the range used to define
the breaks are considered in the binning (and assigned to a bin). This
means that for all bins except the last one values in \code{x} have to be
\verb{>= xlower} and \verb{< xupper} (with \code{xlower}
and \code{xupper} being the lower and upper boundary, respectively). For
the last bin the condition is \code{x >= xlower & x <= xupper}.
Note also that if \code{shiftByHalfBinSize} is \code{TRUE} the range of
values that is used for binning is expanded by \code{binSize} (i.e. the
lower boundary will be \code{fromX - binSize/2}, the upper
\code{toX + binSize/2}). Setting this argument to \code{TRUE} resembles
the binning that is/was used in \code{profBin} function from
\emph{xcms} < 1.51.

\if{html}{\out{<div class="sourceCode">}}\preformatted{`NA` handling: by default the function ignores `NA` values in
`y` (thus inherently assumes `na.rm = TRUE`). No `NA`
values are allowed in `x`.
}\if{html}{\out{</div>}}
}
\examples{
########
## Simple example illustrating the breaks and the binning.
##
## Define breaks for 5 bins:
brks <- seq(2, 12, length.out = 6)
## The first bin is then [2,4), the second [4,6) and so on.
brks
## Get the max value falling within each bin.
binYonX(x = 1:16, y = 1:16, breaks = brks)
## Thus, the largest value in x = 1:16 falling into the bin [2,4) (i.e. being
## >= 2 and < 4) is 3, the largest one falling into [4,6) is 5 and so on.
## Note however the function ensures that the minimal and maximal x-value
## (in this example 1 and 12) fall within a bin, i.e. 12 is considered for
## the last bin.

#######
## Performing the binning ons sub-set of x
##
X <- 1:16
## Bin X from element 4 to 10 into 5 bins.
X[4:10]
binYonX(X, X, nBins = 5L, fromIdx = 4, toIdx = 10)
## This defines breaks for 5 bins on the values from 4 to 10 and bins
## the values into these 5 bins. Alternatively, we could manually specify
## the range for the binning, i.e. the minimal and maximal value for the
## breaks:
binYonX(X, X, nBins = 5L, fromIdx = 4, toIdx = 10, binFromX = 1, binToX = 16)
## In this case the breaks for 5 bins were defined from a value 1 to 16 and
## the values 4 to 10 were binned based on these breaks.

#######
## Bin values within a sub-set of x, second example
##
## This example illustrates how the fromIdx and toIdx parameters can be used.
## x defines 3 times the sequence form 1 to 10, while y is the sequence from
## 1 to 30. In this very simple example x is supposed to represent M/Z values
## from 3 consecutive scans and y the intensities measured for each M/Z in
## each scan. We want to get the maximum intensities for M/Z value bins only
## for the second scan, and thus we use fromIdx = 11 and toIdx = 20. The breaks
## for the bins are defined with the nBins, binFromX and binToX.
X <- rep(1:10, 3)
Y <- 1:30
## Bin the M/Z values in the second scan into 5 bins and get the maximum
## intensity for each bin. Note that we have to specify sortedX = TRUE as
## the x and y vectors would be sorted otherwise.
binYonX(X, Y, nBins = 5L, sortedX = TRUE, fromIdx = 11, toIdx = 20)

#######
## Bin in overlapping sub-sets of X
##
## In this example we define overlapping sub-sets of X and perform the binning
## within these.
X <- 1:30
## Define the start and end indices of the sub-sets.
fIdx <- c(2, 8, 21)
tIdx <- c(10, 25, 30)
binYonX(X, nBins = 5L, fromIdx = fIdx, toIdx = tIdx)
## The same, but pre-defining also the desired range of the bins.
binYonX(X, nBins = 5L, fromIdx = fIdx, toIdx = tIdx, binFromX = 4, binToX = 28)
## The same bins are thus used for each sub-set.
}
\seealso{
\code{\link[=imputeLinInterpol]{imputeLinInterpol()}}
}
\author{
Johannes Rainer
}
