% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_openalex.R
\name{read_openalex}
\alias{read_openalex}
\title{Read and Process OpenAlex data}
\usage{
read_openalex(file, format = "csv")
}
\arguments{
\item{file}{For \code{format = "csv"}, a character string with a local path or an
HTTP(S) URL to a CSV export. For \code{format = "api"}, a data frame produced by
\code{{openalexR}} for the \strong{works} entity.}

\item{format}{Either \code{"csv"} (CSV export) or \code{"api"} (data frame from \code{{openalexR}}).}
}
\value{
A tibble with standardized bibliographic columns. Typical output includes:
\code{id_short}, \code{AU}, \code{DI}, \code{CR}, \code{SO}, \code{DT}, \code{DE}, \code{AB}, \code{C1}, \code{TC}, \code{SC}, \code{SR},
\code{PY}, and \code{DB} (source flag: \code{"openalex_csv"} or \code{"openalex_api"}). See \strong{Details}.
}
\description{
Parse datasets exported from \strong{OpenAlex} in two ways:
(1) a CSV file exported in the browser, or
(2) a data frame obtained via the \code{{openalexR}} API helpers.
The function standardizes fields to common bibliographic tags (e.g., \code{AU},
\code{SO}, \code{CR}, \code{PY}, \code{DI}) and returns a tidy tibble.
}
\details{
\strong{CSV mode (\code{format = "csv"}):}
\itemize{
\item If \code{file} is a URL, it is downloaded to a temporary file before parsing
(a progress message is printed).
\item Selected fields are mapped to standardized tags:
\code{id_short} (short OpenAlex ID), \code{SR} (= \code{id_short}), \code{PY} (= \code{publication_year}),
\code{TI} (= \code{title}), \code{DI} (= \code{doi}), \code{DT} (= \code{type}), \code{DE} (= \code{keywords.display_name}),
\code{AB} (= \code{abstract}), \code{AU} (= \code{authorships.author.display_name}),
\code{SO} (= \code{locations.source.display_name}),
\code{C1} (= \code{authorships.countries}), \code{TC} (= \code{cited_by_count}),
\code{SC} (= \code{primary_topic.field.display_name}), \code{CR} (= \code{referenced_works},
with the \verb{https://openalex.org/} prefix stripped),
and \code{DB = "openalex_csv"}.
\item \code{PY} is coerced to numeric; a helper column \code{DI2} (uppercase, punctuation-stripped
variant of \code{DI}) is added; columns with all-caps tags are placed first and
\code{DI2} is relocated after \code{DI}.
}

\strong{API mode (\code{format = "api"}):}
\itemize{
\item \code{file} must be a data frame containing at least column \code{id}; typically this
is returned by \code{openalexR::oa_request()} + \code{openalexR::oa2df()} or similar.
\item Records are filtered to \code{type \%in\% c("article","review")} and deduplicated by \code{id}.
\item The function derives:
\itemize{
\item \code{id_short} (= \code{id} without the \verb{https://openalex.org/} prefix) and \code{SR} (= \code{id_short});
\item \code{CR}: concatenated short IDs from \code{referenced_works} (semicolon-separated);
\item \code{DE}: concatenated keyword names (lower case) from \code{keywords};
\item \code{AU}: concatenated author names (upper case) from \code{authorships};
\item plus core fields \code{PY} (= \code{publication_year}), \code{TC} (= \code{cited_by_count}),
\code{TI} (= \code{title}), \code{AB} (= \code{abstract}), \code{DI} (= \code{doi}),
and \code{DB = "openalex_api"}.
}
\item The result keeps one row per \code{id} and may include original columns from the
input (via a right join), after constructing the standardized fields above.
}
}
\section{Supported inputs}{

\itemize{
\item \code{format = "csv"} — a local path or an HTTP(S) URL to an OpenAlex \strong{CSV} export.
\item \code{format = "api"} — a \strong{data frame} produced by \code{{openalexR}} for the
\strong{works} entity (with the usual OpenAlex columns, including list-columns
such as \code{keywords}, \code{authorships}, and \code{referenced_works}).
}
}

\examples{
\dontrun{
## CSV export (local path)
x <- read_openalex("openalex-works.csv", format = "csv")

## Using the API with openalexR
library(openalexR)
url_api <- "https://api.openalex.org/works?page=1&filter=primary_location.source.id:s121026525"
df_api  <- openalexR::oa_request(query_url = url_api) |>
  openalexR::oa2df(entity = "works")
y <- read_openalex(df_api, format = "api")
}

}
\seealso{
OpenAlex R client: \code{\link[openalexR]{oa_request}}, \code{\link[openalexR]{oa2df}}.
Importers for Web of Science: \code{\link{read_wos}}.
}
