## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, error = FALSE, message = FALSE ) suppressPackageStartupMessages(library(auk)) suppressPackageStartupMessages(library(dplyr)) ## ----quickstart, eval = FALSE------------------------------------------------- # library(auk) # # path to the ebird data file, here a sample included in the package # # in practice, provide path to ebd, e.g. input_file <- "data/ebd_relFeb-2018.txt" # input_file <- system.file("extdata/ebd-sample.txt", package = "auk") # # output text file # output_file <- "ebd_filtered_grja.txt" # ebird_data <- input_file %>% # # 1. reference file # auk_ebd() %>% # # 2. define filters # auk_species(species = "Canada Jay") %>% # auk_country(country = "Canada") %>% # # 3. run filtering # auk_filter(file = output_file) %>% # # 4. read text file into r data frame # read_ebd() ## ----quickstart-nopipes, eval = FALSE----------------------------------------- # input_file <- system.file("extdata/ebd-sample.txt", package = "auk") # output_file <- "ebd_filtered_grja.txt" # ebd <- auk_ebd(input_file) # ebd_filters <- auk_species(ebd, species = "Canada Jay") # ebd_filters <- auk_country(ebd_filters, country = "Canada") # ebd_filtered <- auk_filter(ebd_filters, file = output_file) # ebd_df <- read_ebd(ebd_filtered) ## ----example-data-1, eval = FALSE--------------------------------------------- # library(auk) # library(dplyr) # system.file("extdata/ebd-sample.txt", package = "auk") ## ----example-data-2, eval = FALSE--------------------------------------------- # # ebd # system.file("extdata/zerofill-ex_ebd.txt", package = "auk") # # sampling event data # system.file("extdata/zerofill-ex_sampling.txt", package = "auk") ## ----auk-ebd------------------------------------------------------------------ ebd <- system.file("extdata/ebd-sample.txt", package = "auk") %>% auk_ebd() ebd ## ----auk-filter--------------------------------------------------------------- ebd_filters <- ebd %>% # species: common and scientific names can be mixed auk_species(species = c("Canada Jay", "Cyanocitta cristata")) %>% # country: codes and names can be mixed; case insensitive auk_country(country = c("US", "Canada", "mexico")) %>% # bbox: long and lat in decimal degrees # formatted as `c(lng_min, lat_min, lng_max, lat_max)` auk_bbox(bbox = c(-100, 37, -80, 52)) %>% # date: use standard ISO date format `"YYYY-MM-DD"` auk_date(date = c("2012-01-01", "2012-12-31")) %>% # time: 24h format auk_time(start_time = c("06:00", "09:00")) %>% # duration: length in minutes of checklists auk_duration(duration = c(0, 60)) %>% # complete: all species seen or heard are recorded auk_complete() ebd_filters ## ----auk-complete, eval = FALSE----------------------------------------------- # output_file <- "ebd_filtered_blja-grja.txt" # ebd_jays <- system.file("extdata/ebd-sample.txt", package = "auk") %>% # auk_ebd() %>% # auk_species(species = c("Canada Jay", "Cyanocitta cristata")) %>% # auk_country(country = "Canada") %>% # auk_filter(file = output_file) ## ----read--------------------------------------------------------------------- system.file("extdata/ebd-sample.txt", package = "auk") %>% read_ebd() %>% glimpse() ## ----read-auk-ebd, eval = FALSE----------------------------------------------- # output_file <- "ebd_filtered_blja-grja.txt" # ebd_df <- system.file("extdata/ebd-sample.txt", package = "auk") %>% # auk_ebd() %>% # auk_species(species = c("Canada Jay", "Cyanocitta cristata")) %>% # auk_country(country = "Canada") %>% # auk_filter(file = output_file) %>% # read_ebd() ## ----awk-script--------------------------------------------------------------- awk_script <- system.file("extdata/ebd-sample.txt", package = "auk") %>% auk_ebd() %>% auk_species(species = c("Canada Jay", "Cyanocitta cristata")) %>% auk_country(country = "Canada") %>% auk_filter(awk_file = "awk-script.txt", execute = FALSE) # read back in and prepare for printing awk_file <- readLines(awk_script) unlink("awk-script.txt") awk_file[!grepl("^[[:space:]]*$", awk_file)] %>% paste0(collapse = "\n") %>% cat() ## ----auk-unique--------------------------------------------------------------- # read in an ebd file and don't automatically remove duplicates ebd_dupes <- system.file("extdata/ebd-sample.txt", package = "auk") %>% read_ebd(unique = FALSE) # remove duplicates ebd_unique <- auk_unique(ebd_dupes) # compare number of rows nrow(ebd_dupes) nrow(ebd_unique) ## ----auk-rollup--------------------------------------------------------------- # read in sample data without rolling up ebd <- system.file("extdata/ebd-rollup-ex.txt", package = "auk") %>% read_ebd(rollup = FALSE) # apply roll up ebd_ru <- auk_rollup(ebd) # all taxa not identifiable to species are dropped # taxa below species have been rolled up to species unique(ebd$category) unique(ebd_ru$category) # yellow-rump warbler subspecies rollup # without rollup, there are three observations ebd %>% filter(common_name == "Yellow-rumped Warbler") %>% select(checklist_id, category, common_name, subspecies_common_name, observation_count) # with rollup, they have been combined ebd_ru %>% filter(common_name == "Yellow-rumped Warbler") %>% select(checklist_id, category, common_name, observation_count) ## ----ebd-zf------------------------------------------------------------------- # to produce zero-filled data, provide an EBD and sampling event data file f_ebd <- system.file("extdata/zerofill-ex_ebd.txt", package = "auk") f_smp <- system.file("extdata/zerofill-ex_sampling.txt", package = "auk") filters <- auk_ebd(f_ebd, file_sampling = f_smp) %>% auk_species("Collared Kingfisher") %>% auk_time(c("06:00", "10:00")) %>% auk_complete() filters ## ----zf-filter-fake, echo = FALSE--------------------------------------------- # needed to allow building vignette on machines without awk ebd_sed_filtered <- filters ebd_sed_filtered$output <- "ebd-filtered.txt" ebd_sed_filtered$output_sampling <- "sampling-filtered.txt" ## ----zf-filter, eval = -1----------------------------------------------------- ebd_sed_filtered <- auk_filter(filters, file = "ebd-filtered.txt", file_sampling = "sampling-filtered.txt") ebd_sed_filtered ## ----auk-zf-fake, echo = FALSE------------------------------------------------ # needed to allow building vignette on machines without awk fake_ebd <- read_ebd(f_ebd) fake_smp <- read_sampling(f_smp) # filter in R to fake AWK call fake_ebd <- subset( fake_ebd, all_species_reported & scientific_name %in% filters$filters$species & time_observations_started >= filters$filters$time[1] & time_observations_started <= filters$filters$time[2]) fake_smp <- subset( fake_smp, all_species_reported & time_observations_started >= filters$filters$time[1] & time_observations_started <= filters$filters$time[2]) ebd_zf <- auk_zerofill(fake_ebd, fake_smp) ## ----auk-zf, eval = -1-------------------------------------------------------- ebd_zf <- auk_zerofill(ebd_sed_filtered) ebd_zf ## ----zf-components------------------------------------------------------------ head(ebd_zf$observations) glimpse(ebd_zf$sampling_events) ## ----zf-collapse, eval = -1--------------------------------------------------- ebd_zf_df <- auk_zerofill(ebd_filtered, collapse = TRUE) ebd_zf_df <- collapse_zerofill(ebd_zf) class(ebd_zf_df) ebd_zf_df