% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ensemble_fs.R
\name{ensemble_fs}
\alias{ensemble_fs}
\title{Ensemble Feature Selection}
\usage{
ensemble_fs(data, classnumber, NA_threshold = 0.2, cor_threshold = 0.7,
  runs = 100, selection = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE,
  FALSE))
}
\arguments{
\item{data}{the name of the dataset, which should already be
loaded in the environment.}

\item{classnumber}{nominal, dichotomous classification.
variable, number of column in dataset, which should be the dependent variable for classification.}

\item{NA_threshold}{(optional) decimal number in range of [0,1]. Threshold for deletion
of features with a greater proportion of NAs than NA_threshold.
than \code{NA_threshold}}

\item{cor_threshold}{(optional) used only for Spearman and Pearson
correlation. The correlation within features is tested.
If the correlation of 2 features is greater than.
\code{cor_threshold} the dependent feature is deleted}

\item{runs}{(optional) amount of runs for randomForest and cforest to gain higher robustness.}

\item{selection}{(optional) vector of length eight with TRUE or FALSE values. Selection of feature selection methods to be conducted.}
}
\value{
table of normalized importance values of class matrix
 (used methods as rows and features of the imported file as columns).
}
\description{
Uses an ensemble of feature selection methods
 to create a normalized quantitative
 ranking of all relevant features. Irrelevant features
 (e.g. too many NA or variance = 1) will be deleted. See
 Details for a list of tests used in this function.
}
\details{
Following methods are provided in the \code{ensemble_fs}:
  \itemize{
 \item Median: p-values from Wilcoxon signed-rank
   test (\link{wilcox.test})
 \item Spearman: Spearman's rank correlation test
   arccording to Yu et al. (2004) (\link{cor})
 \item Pearson: Pearson's product moment correlation
   test arccording to Yu et al. (2004) (\link{cor})
 \item LogReg: beta-Values of logistic regression
   (\link{glm})
 \item Accuracy//Error-rate randomForest: Error-rate-based
   variable importance measure embedded in randomForest
   according to Breiman (2001) (\link{randomForest})
 \item Gini randomForest: Gini-index-based variable
   importance measure embedded in randomForest according
   to Breiman (2001) (\link{randomForest})
 \item Error-rate cforest: Error-rate-based variable
   importance measure embedded in cforest according
   Strobl et al. (2009) (\link{cforest})
 \item AUC cforest: AUC-based variable importance measure
   embedded in cforest according to Janitza et al. (2013)
   (\link{cforest})}
   By the argument \code{selection} the user decides which feature selection methods are used in \code{ensemble_fs}. 
   Default value is \code{selection = c(TRUE, TRUE, TRUE,TRUE, TRUE, TRUE, FALSE, FALSE)}, 
   i.e., the function does not use either of the cforest variable importance measures.
   The maximum score for features depends on the input of \code{selection}.
   The scores are always divided through the amount of selected feature selection, respectively the amount of TRUEs.
}
\examples{
 ##loading dataset in Environment
 data(efsdata)
 ##Generate a ranking based on importance (with default NA_threshold = 0.2,
 ##cor_threshold = 0.7, selection = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE))
 efs<-ensemble_fs(efsdata,5,runs=2)
}
\author{
Ursula Neumann
}
\references{
\itemize{
 \item Yu, L. and Liu H.: Efficient feature selection via
   analysis of relevance and redundancy. J. Mach. Learn.
   Res. 2004, 5:1205-1224. \cr
\item Breiman, L.: Random Forests, Machine Learning.
   2001, 45(1): 5-32. \cr
\item Strobl, C., Malley, J. anpercentaged Tutz, G.: An
   Introduction to Recursive Partitioning: Rationale,
   Application, and Characteristics of Classification and
   Regression Trees, Bagging, and Random forests.
   Psychological Methods. 2009, 14(4), 323–348. \cr
 \item	Janitza, S., Strobl, C. and Boulesteix AL.: An
   AUC-based Permutation Variable Importance Measure for
   Random Forests. BMC Bioinformatics.2013, 14, 119. \cr
}
}
\seealso{
\link{wilcox.test},
 \link[randomForest]{randomForest},
 \link[party]{cforest},
 \link[stats]{cor},
 \link[stats]{glm}
}

