---
title: "Updating `USGDPpresidents`"
author: "Spencer Graves"
date: "2022-02-22"
output: html_document
vignette: >
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteIndexEntry{Updating USGDPpresidents}
  %\VignetteKeyword{USGDPpresidents}
  %\SweaveUTF8
  %\usepackage[utf8]{inputenc}
---

```{r setup, include=FALSE, echo=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## Purpose of this document 

This document describes the process for updating [`Ecdat::USGDPpresidents`](https://www.rdocumentation.org/packages/Ecdat/versions/0.3-1/topics/USGDPpresidents).  

## Set working directory 

First decide the directory in which we want to work and copy this vignette (`*.Rmd` file) into that directory.  (`RStudio` does not allow `setwd` inside code chunks to work as one might naively expect.  Therefore, it's best NOT to try to change the working directory but instead to copy this vignette into the desired working directory.)  

## Are there new data?  

Start by checking the span of years in `USGDPpresidents`:  

```{r yrSpan}
library(Ecdat)
(rngYrs <- range(USGDPpresidents$Year))
```

Next download "GDP - US" and "CPI - US" from [Measuring Worth](https://www.measuringworth.com/).  On 2022-02-16 this produced two csv files, which I downloaded and copied into a directory in which we wish to work.  

```{r csv}
getwd()
(csv2 <- dir(pattern='\\.csv$'))
(CPIcsvs <- grep('^USCPI', csv2, value=TRUE))
(CPIcsv <- tail(CPIcsvs, 1))

(GDPcsvs <- grep('^USGDP', csv2, value=TRUE))
(GDPcsv <- tail(GDPcsvs, 1))

if((length(CPIcsv)==1) & (length(GDPcsv)==1)){
  Update0 <- TRUE
} else Update0 <- FALSE
```

We must verify by visual inspection that `CPIcsv` and `GDPcsv` are both of length 1 and are the files we want.  

Read them:  

```{r read.csv}
Update <- FALSE
if(Update0){
  str(USCPI <- read.csv(CPIcsv, skip=2))
  str(USGDP. <- read.csv(GDPcsv, skip=1))
  library(Ecfun)
  USGDP <- asNumericDF(USGDP.)
  print(rngCPIyrs <- range(USCPI$Year) )
  print(rngGDPyrs <- range(USGDP$Year) )
  endYr <- max(rngCPIyrs, rngGDPyrs)
  if(endYr>rngYrs[2]) print(Update <- TRUE)
}
```

## Update 

If Update, create a local copy of `USGDPpresidents` with the additional rows required to hold the new data:  

```{r cy}
if(Update){
  rowsNeeded <- (endYr - rngYrs[2])
  Nold <- nrow(USGDPpresidents)
  iRep <- c(1:Nold, rep(Nold, rowsNeeded))
  USGDPp2 <- USGDPpresidents[iRep,]
}
```

Fix the Year and insert NAs for all other columns for the new rows:   

```{r Year}
if(Update){
  iNew <- (Nold+(1:rowsNeeded))
  USGDPp2$Year[iNew] <- ((rngYrs[2]+1):endYr)
  rownames(USGDPp2) <- USGDPp2$Year
#
  USGDPp2[iNew, -1] <- NA
}
```

Now replace CPI by the new numbers:  

```{r CPI}
if(Update){
  selCPI <- (USGDPp2$Year %in% USCPI$Year)
  if(any(!is.na(USGDPp2[!selCPI, 2]))){
    stop('ERROR:  There are CPI numbers ', 
         'in the current USGDPpresidents ', 
         'that are not in the new.  ', 
         'Manual review required.')
  }
  USGDPp2$CPI[selCPI] <- USCPI[,2]
}
```

Does `USGDPpresidents.Rd` needs to be updated
to reflect the proper reference years for the 
CPI?  

```{r CPIref}
if(Update){
  readLines(CPIcsv, n=4)
}
```

If this says "Average 1982-84 = 100", it should be good.  Otherwise that (and this) should be updated.  

Now let's update `GDPdeflator`:  

```{r GDPdeflator}
if(Update){
  selGDP <- (USGDPp2$Year %in% USGDP$Year)
#
  if(any(!is.na(USGDPp2[!selGDP, 'GDPdeflator']))){
    stop('ERROR:  There are GDPdeflator numbers ', 
         'in the current USGDPpresidents ', 
         'that are not in the new.  ', 
         'Manual review required.')
  }
  selDefl <- grep('Deflator', names(USGDP))
  USGDPp2$GDPdeflator[selGDP] <- USGDP[,selDefl]
  print(names(USGDP)[selDefl])
}
```  

Compare the index year of "GDP.Deflator" with that in `USGDPpresidents.Rd`:  If they are different, fix `USGDPpresidents.Rd`.  

Now update population:  

```{r pop}
if(Update){
  selPop <- grep('Population', names(USGDP))
  USGDPp2$population.K[selGDP] <- USGDP[,selPop]
  print(names(USGDP)[selPop])
}
```  

Now `realGDPperCapita`.  This also has a reference year, so we need to make sure we get them all:  

```{r GDPperCap}
if(Update){
  if(any(!is.na(USGDPp2[!selGDP, 'readGDPperCapita']))){
    stop('ERROR:  There are realGDPperCapita numbers ', 
         'in the current USGDPpresidents ', 
         'that are not in the new.  ', 
         'Manual review required.')
  }
  selGDPperC <- grep('Real.GDP.per.c', names(USGDP))
  USGDPp2$realGDPperCapita[selGDP] <- USGDP[,selGDPperC]
  print(names(USGDP)[selGDPperC])
}
```  

Compare the index year of `Real.GDP.per.capita` with that in `USGDPpresidents.Rd`:  If they are different, fix `USGDPpresidents.Rd`.  

Next:  executive:  

NOTE:  THIS MAY NEED TO BE CHANGED MANUALLY HERE BEFORE EXECUTING, BECAUSE IT IS NOT IN `USGDP`...
BOTH: 
** WHO WAS PRESIDENT SINCE THE PREVIOUS VERSION?
** WAS THAT PERSON NOT IN THE PREVIOUS VERSION?  

```{r executive}
if(Update){
  exec <- as.character(USGDPp2$executive)
  exec[is.na(exec)] <- c('Trump', 'Trump', 'Biden')
  lvlexec <- c(levels(USGDPp2$executive), 
               'Biden')
  USGDPp2$executive <- ordered(exec, lvlexec)
}
```  

Similarly:  war

NOTE:  IF THERE HAS BEEN A MAJOR WAR SINCE THE LAST VERSION, 
THEN THIS TEXT NEEDS TO BE CHANGED, 
BECAUSE IT ASSUMES THERE HAS NOT BEEN A MAJOR WAR.  

```{r war}
if(Update){
  war <- as.character(USGDPp2$war)
  war[is.na(war)] <- ''
  lvlwar <- levels(USGDPp2$war)
  USGDPp2$war <- ordered(war, lvlwar)
}
```  

Next:  `battleDeaths` and `battleDeathsPMP`:  

NOTE:  `battleDeaths` ARE ONLY BATTLE DEATHS
IN MAJOR WARS as defined in `help(USGDPpresidents)`.  
Otherwise, they are 0.  

```{r battleDeaths}
if(Update){
  USGDPp2$battleDeaths[iNew] <- 0 
#
  USGDPp2$battleDeathsPMP <- with(USGDPp2, 
          1000*battleDeaths/population.K) 
}
```

Keynes (per `help(USGDPpresidents)`):

```{r Keynes}
if(Update){
  USGDPp2$Keynes[iNew] <- 0 
}
```

## Unemployment?  

Unemployment figures came from different 
sources for different years.  Since 1940 
the source has been the Bureau of Labor 
Statistics (BLS), series `LNS14000000` from 
the Current Population Survey.  These data 
are available as a monthly series from 
the [Current Population Survey of the Bureau 
of Labor Statistics](https://www.bls.gov/cps/).  
Download the most recent years as an Excel 
file, compute row averages, and transfer the 
numbers for the most recent years here.  

NOTE:  When I did that on 2022-02-22 I found 
minor discrepancies in earlier years.  Pushing 
this further I found that I could download data 
back to 1948.  The average unemployment per this
BLS computation for 1948 and 1947 were 3.75 and 
6.05 percent, respectively, vs. 0.038 and 0.059
in the previous version of `USGDPpresidents` for
those years.  I therefore decided to read that 
`*.xlsx` file and replace all those numbers.

```{r xlsx}
if(Update){
  (xls <- dir(pattern='\\.xlsx$'))
  (BLSxls <- grep('^Series', xls, value=TRUE))
}
```

```{r readBLS}
library(readxl)
if(Update){
  str(BLS <- read_excel(BLSxls, skip=11))
}
```

Compute the average unemployment here, so I don't have to do this separately.  

```{r AnnUnemp}
if(Update){
  UNEMP <- as.matrix(BLS[2:13])
  str(unemp <- apply(UNEMP, 1, mean))
}
```

Store these `unemp` numbers   
  
```{r unemp}
if(Update){
  selU4GDP <- (USGDPp2$Year %in% BLS$Year)
  selBLS <- (BLS$Year %in% USGDPp2$Year)
  USGDPp2[selU4GDP, 'unemployment'] <- 
          unemp[selBLS]
#  USGDPp2$unemployment[iNew] <- c(4.875, 
#                    4.35, 3.89166666666667)
  USGDPp2$unempSource[iNew] <- USGDPp2$unempSource[
    iNew[1]-1]
  tail(USGDPp2)
}
```

## `fedReceipts`, `fedOutlays`

We get `fedReceipts` and `fedOutlays` from two different sources.  Let's start with the historical data first.  

We manually copied the historical data from series Y 335 and 336 in [United States Census Bureau (1975) Bicentennial Edition: Historical Statistics of the United States, Colonial Times to 1970, Part 2.  Chapter Y. Government](https://www.census.gov/library/publications/1975/compendia/hist_stats_colonial-1970.html) into a LibreOffice `*.ods` file.  We need to read that once and add it to `USGDPp`:  

```{r ods, eval=FALSE}
if(Update){
  (odsFile <- dir(pattern='\\.ods'))
  (odsF <- grep('^hstat', odsFile, value=TRUE))
}
```

```{r readods, eval=FALSE}
if(Update){
  library(readODS)
  str(hstat <- read_ods(odsF, sheet='Receipts', skip=2))
}
```

```{r sortOld, eval=FALSE}
if(Update){
  Hstat <- hstat[!is.na(hstat$Year), 1:3]
  oOld <- order(Hstat$Year)
  head(Hst <- Hstat[oOld, ])
}
```

Add as new variables to `USGDPp2`: 

```{r addNewVars, eval=FALSE}
if(Update){
  USGDPp2$fedReceipts <- NA 
  USGDPp2$fedOutlays <- NA
  selGDP4Hst <- (USGDPp2$Year %in% Hst$Year)
  USGDPp2[selGDP4Hst, c("fedReceipts", "fedOutlays")] <- 
      (Hst[2:3] / 1000)
  USGDPp2[c('Year', 'fedReceipts', 'fedOutlays')]
}
```

New let's add the new data.  

```{r BudgetFile}
if(Update){
  (BudgetFiles <- grep('^BUDGET', xls, value=TRUE))
  (BudgetF2_1 <- grep('2-1', BudgetFiles, value=TRUE))
  (BudgetFile <- tail(BudgetF2_1, 1))
}
```

```{r readBudget}
if(Update){
  str(Budget <- read_excel(BudgetFile, skip=3))
}
```

```{r drop2}
if(Update){
  library(Ecfun)
  str(Budg <- asNumericDF(Budget[-(1:2), 1:3]))
}
```

```{r updateBudget}
if(Update){
  selGDP4budg <- (USGDPp2$Year %in% Budg[, 1])
  selBudg <- (Budg[, 1] %in% USGDPp2$Year)
  USGDPp2[selGDP4budg, 
    c('fedReceipts', 'fedOutlays')] <- Budg[selBudg, 2:3]
}
```

Finally:  `fedOutlays_pGDP`

```{r fedOutlays_pGDP}
if(Update){
  sum(i1843 <- (USGDP$Year==1843))
  GDPnom <- (USGDP$Nominal.GDP..million.of.Dollars.
          / (1+i1843))
  plot(USGDP$Year, GDPnom, type='l', log='y')
  abline(v=1843)

  fedOp <- (USGDPp2$fedOutlays[selGDP] / GDPnom)
  plot(USGDP$Year, fedOp, type='l', log='y')

  USGDPp2$fedOutlays_pGDP <- NA
  USGDPp2$fedOutlays_pGDP[selGDP] <- fedOp
}
```

## Plot US federal outlays 

```{r USGDPpresNew}
if(Update){
  USGDPpresidents <- USGDPp2

  sel <- !is.na(USGDPpresidents$fedOutlays_pGDP)
  plot(100*fedOutlays_pGDP~Year, 
     USGDPpresidents[sel,], type='l', log='y', 
     xlab='', ylab='US federal outlays, % of GDP')
  abline(h=2:3)
  war <- (USGDPpresidents$war !='')
  abline(v=USGDPpresidents$Year[war], 
    lty='dotted', col='light gray')
  abline(v=c(1929, 1933), col='red', lty='dotted')
  text(1931, 22, 'Hoover', srt=90, col='red')
}
```
## Done:  Save

```{r save}
if(Update){
  save(USGDPpresidents, file='USGDPpresidents.rda')
  getwd()
}
```

Now copy this file from the current working directory 
to `~Ecdat\data`, overwriting the previous version.