% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataset.R
\name{dataset}
\alias{dataset}
\title{Build \link[SummarizedExperiment]{SummarizedExperiment} using local annotation and count matrix R objects}
\usage{
dataset(
  annotation,
  count_matrix = NULL,
  tpm_matrix = NULL,
  name = "SimBu_dataset",
  spike_in_col = NULL,
  additional_cols = NULL,
  filter_genes = TRUE,
  variance_cutoff = 0,
  type_abundance_cutoff = 0,
  scale_tpm = TRUE
)
}
\arguments{
\item{annotation}{(mandatory) dataframe; needs columns 'ID' and 'cell_type'; 'ID' needs to be equal with cell_names in count_matrix}

\item{count_matrix}{(mandatory) sparse count matrix; raw count data is expected with genes in rows, cells in columns}

\item{tpm_matrix}{sparse count matrix; TPM like count data is expected with genes in rows, cells in columns}

\item{name}{name of the dataset; will be used for new unique IDs of cells}

\item{spike_in_col}{which column in annotation contains information on spike_in counts, which can be used to re-scale counts; mandatory for spike_in scaling factor in simulation}

\item{additional_cols}{list of column names in annotation, that should be stored as well in dataset object}

\item{filter_genes}{boolean, if TRUE, removes all genes with 0 expression over all samples & genes with variance below \code{variance_cutoff}}

\item{variance_cutoff}{numeric, is only applied if \code{filter_genes} is TRUE: removes all genes with variance below the chosen cutoff (default = 0)}

\item{type_abundance_cutoff}{numeric, remove all cells, whose cell-type appears less then the given value. This removes low abundant cell-types}

\item{scale_tpm}{boolean, if TRUE (default) the cells in tpm_matrix will be scaled to sum up to 1e6}
}
\value{
Return a \link[SummarizedExperiment]{SummarizedExperiment} object
}
\description{
Build \link[SummarizedExperiment]{SummarizedExperiment} using local annotation and count matrix R objects
}
\examples{

counts <- Matrix::Matrix(matrix(stats::rpois(3e5, 5), ncol = 300), sparse = TRUE)
tpm <- Matrix::Matrix(matrix(stats::rpois(3e5, 5), ncol = 300), sparse = TRUE)
tpm <- Matrix::t(1e6 * Matrix::t(tpm) / Matrix::colSums(tpm))

colnames(counts) <- paste0("cell_", rep(1:300))
colnames(tpm) <- paste0("cell_", rep(1:300))
rownames(counts) <- paste0("gene_", rep(1:1000))
rownames(tpm) <- paste0("gene_", rep(1:1000))

annotation <- data.frame(
  "ID" = paste0("cell_", rep(1:300)),
  "cell_type" = c(rep("T cells CD4", 300))
)

ds <- SimBu::dataset(annotation = annotation, count_matrix = counts, tpm_matrix = tpm, name = "test_dataset")

}
