% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_recode.R
\name{check_recode}
\alias{check_recode}
\title{Check accurate recoding of variables}
\usage{
check_recode(.data, dependent = NULL, explanatory = NULL,
  include_numerics = TRUE)
}
\arguments{
\item{.data}{Data frame or tibble.}

\item{dependent}{Optional character vector: name(s) of depdendent
variable(s).}

\item{explanatory}{Optional character vector: name(s) of explanatory
variable(s).}

\item{include_numerics}{Logical. Include numeric variables in function.}
}
\value{
List of length two. The first is an index of variable combiations.
  The second is a nested list of crosstables as tibbles.
}
\description{
This was written a few days after the retraction of a paper in JAMA due to an
error in recoding the treatment variable
(\url{https://jamanetwork.com/journals/jama/fullarticle/2752474}). This
takes a data frame or tibble, fuzzy matches variable names, and produces
crosstables of all matched variables. A visual inspection should reveal any
miscoding.
}
\examples{
library(dplyr)
data(colon_s)
colon_s_small = colon_s \%>\%
  select(-id, -rx, -rx.factor) \%>\%
  mutate(
    age.factor2 = forcats::fct_collapse(age.factor,
      "<60 years" = c("<40 years", "40-59 years")),
    sex.factor2 = forcats::fct_recode(sex.factor,
    # Intentional miscode
      "F" = "Male",
      "M" = "Female")
  )

# Check
colon_s_small \%>\%
  check_recode(include_numerics = FALSE)

out = colon_s_small \%>\%
  select(-extent, -extent.factor,-time, -time.years) \%>\%
  check_recode()
out

# Select a tibble and expand
out$counts[[9]] \%>\%
  print(n = Inf)
# Note this variable (node4) appears miscoded in original dataset survival::colon.

# Choose to only include variables that you actually use. 
# This uses standard Finalfit grammar. 
dependent = "mort_5yr"
explanatory = c("age.factor2", "sex.factor2")
colon_s_small \%>\% 
  check_recode(dependent, explanatory)
}
