knitr::opts_chunk$set(echo = TRUE, warning = FALSE)

Read data

Let’s load the data prepared in the previous section and proceed with preparation for analysis.

library(tidyverse)
fhb_dat <- read.csv("data/fhb_data_new.csv") %>%
  group_by(trial)

Trial-level linear regression

Let’s now fit a regression model for each study with variable number of pairs of observations, but splitting the dataset according to the baseline yield, which will give 19 and 18 trials in each condition.

High yield scenario

library(broom)

high_lm <- fhb_dat %>%
  filter(yield_class != "low") %>%
  group_by(trial) %>%
  do(tidy(lm(.$yield ~ .$sev), conf.int = TRUE)) %>%
  select(c(1:3)) %>%
  spread(term, estimate)

high_lm1 <- data.frame(high_lm)
colnames(high_lm1) <- c("trial", "intercept", "slope")
summary(high_lm1)

##      trial         intercept           slope     
##  Min.   : 1.00   Min.   :-220.18   Min.   :3000  
##  1st Qu.: 6.50   1st Qu.: -75.40   1st Qu.:4124  
##  Median :17.50   Median : -53.35   Median :4433  
##  Mean   :16.44   Mean   : -60.86   Mean   :4473  
##  3rd Qu.:21.75   3rd Qu.: -18.57   3rd Qu.:4952  
##  Max.   :37.00   Max.   :  52.88   Max.   :6097

Low yield scenario

low_lm <- fhb_dat %>%
  filter(yield_class != "high") %>%
  group_by(trial) %>%
  do(tidy(lm(.$yield ~ .$sev), conf.int = TRUE)) %>%
  select(c(1:3)) %>%
  spread(term, estimate)

low_lm1 <- data.frame(low_lm)
colnames(low_lm1) <- c("trial", "intercept", "slope")
head(low_lm1)

summary(low_lm1)

##      trial         intercept           slope     
##  Min.   : 6.00   Min.   :-381.87   Min.   :1708  
##  1st Qu.:11.00   1st Qu.: -82.11   1st Qu.:2579  
##  Median :24.00   Median : -54.02   Median :3102  
##  Mean   :21.42   Mean   : -71.76   Mean   :2977  
##  3rd Qu.:30.50   3rd Qu.: -17.10   3rd Qu.:3391  
##  Max.   :35.00   Max.   :  43.48   Max.   :4299

Population-average mixed model estimates

We will use the lmer function of the lme4 to fit three different kinds of mixed models: random intercepts and slopes, random intercepts only and random slopes only. We also included baseline yield categorical variable in the model to check whether variance could be reduced, thus explaining portion of the variation in the slopes or intercepts.

library(lme4)

# null model
mix_yld <- lmer(yield ~ 1 + (1 | trial), data = fhb_dat, REML = F)

# random intercept and slopes
mix_yld1 <- lmer(yield ~ sev + (sev | trial), data = fhb_dat, REML = F)

# random slopes
mix_yld2 <- lmer(yield ~ sev + (1 | sev), data = fhb_dat, REML = F)

# random intercepts
mix_yld3 <- lmer(yield ~ sev + (1 | trial), data = fhb_dat, REML = F)

Here we can check which model best fitted the data based on the lowest AIC, which was the one with both intercepts and slopes as random effects.

AIC(mix_yld, mix_yld1, mix_yld2, mix_yld3)

Effect of baseline yield

Let’s include an interaction term and test whether variance was significantly reduced based on likelihood ratio test.

mix_yld4 <- lmer(yield ~ sev * yield_class + (sev | trial), data = fhb_dat, REML = F)

anova(mix_yld1, mix_yld4, test = "Chisq")

summary(mix_yld4)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: yield ~ sev * yield_class + (sev | trial)
##    Data: fhb_dat
## 
##      AIC      BIC   logLik deviance df.resid 
##   4595.1   4625.2  -2289.5   4579.1      309 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.1436 -0.3431  0.0492  0.4179  3.9299 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  trial    (Intercept) 372287   610.2        
##           sev           1129    33.6    0.02
##  Residual              57284   239.3        
## Number of obs: 317, groups:  trial, 37
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)         4419.504    148.390  29.783
## sev                  -46.316      9.604  -4.823
## yield_classlow     -1535.949    209.460  -7.333
## sev:yield_classlow    -3.475     14.309  -0.243
## 
## Correlation of Fixed Effects:
##             (Intr) sev    yld_cl
## sev         -0.087              
## yild_clsslw -0.708  0.062       
## sv:yld_clss  0.059 -0.671 -0.120
## convergence code: 0
## Model failed to converge with max|grad| = 0.0056379 (tol = 0.002, component 1)

library(emmeans)
CLD(emmeans(mix_yld4, ~ sev * yield_class))

Extract the random coefficients (BLUES).

blup <- coef(mix_yld1)$trial
colnames(blup) <- c("Intercept", "Slope")

summary(blup)

##    Intercept        Slope         
##  Min.   :1620   Min.   :-112.534  
##  1st Qu.:2998   1st Qu.: -59.085  
##  Median :3560   Median : -52.881  
##  Mean   :3645   Mean   : -49.068  
##  3rd Qu.:4277   3rd Qu.: -23.605  
##  Max.   :5786   Max.   :  -4.544

Calculate the interdecile range for the BLUEs of the slopes and intercepts

# Intercept
dec90_i <- quantile(blup$Intercept, probs = c(.9))
dec10_i <- quantile(blup$Intercept, probs = c(.1))
dec90_i - dec10_i

##      90% 
## 2470.569

# Slopes
dec90_s <- quantile(blup$Slope, probs = c(.9))
dec10_s <- quantile(blup$Slope, probs = c(.1))
dec90_s - dec10_s

##      90% 
## 76.80005