• Read data
    • Trial-level linear regression
      • High yield scenario
      • Low yield scenario
    • Population-average mixed model estimates
      • Effect of baseline yield
knitr::opts_chunk$set(echo = TRUE, warning = FALSE)

Read data

Let’s load the data prepared in the previous section and proceed with preparation for analysis.

library(tidyverse)
fhb_dat <- read.csv("data/fhb_data_new.csv") %>%
  group_by(trial)

Trial-level linear regression

Let’s now fit a regression model for each study with variable number of pairs of observations, but splitting the dataset according to the baseline yield, which will give 19 and 18 trials in each condition.

High yield scenario

library(broom)

high_lm <- fhb_dat %>%
  filter(yield_class != "low") %>%
  group_by(trial) %>%
  do(tidy(lm(.$yield ~ .$sev), conf.int = TRUE)) %>%
  select(c(1:3)) %>%
  spread(term, estimate)

high_lm1 <- data.frame(high_lm)
colnames(high_lm1) <- c("trial", "intercept", "slope")
summary(high_lm1)
##      trial         intercept           slope     
##  Min.   : 1.00   Min.   :-220.18   Min.   :3000  
##  1st Qu.: 6.50   1st Qu.: -75.40   1st Qu.:4124  
##  Median :17.50   Median : -53.35   Median :4433  
##  Mean   :16.44   Mean   : -60.86   Mean   :4473  
##  3rd Qu.:21.75   3rd Qu.: -18.57   3rd Qu.:4952  
##  Max.   :37.00   Max.   :  52.88   Max.   :6097

Low yield scenario

low_lm <- fhb_dat %>%
  filter(yield_class != "high") %>%
  group_by(trial) %>%
  do(tidy(lm(.$yield ~ .$sev), conf.int = TRUE)) %>%
  select(c(1:3)) %>%
  spread(term, estimate)

low_lm1 <- data.frame(low_lm)
colnames(low_lm1) <- c("trial", "intercept", "slope")
head(low_lm1)
ABCDEFGHIJ0123456789
 
 
trial
<int>
intercept
<dbl>
slope
<dbl>
16-49.6437092463.242
2743.4771982752.433
38-381.8668413317.410
49-13.8492511768.866
510-0.1892952777.906
612-55.0842173157.509
summary(low_lm1)
##      trial         intercept           slope     
##  Min.   : 6.00   Min.   :-381.87   Min.   :1708  
##  1st Qu.:11.00   1st Qu.: -82.11   1st Qu.:2579  
##  Median :24.00   Median : -54.02   Median :3102  
##  Mean   :21.42   Mean   : -71.76   Mean   :2977  
##  3rd Qu.:30.50   3rd Qu.: -17.10   3rd Qu.:3391  
##  Max.   :35.00   Max.   :  43.48   Max.   :4299

Population-average mixed model estimates

We will use the lmer function of the lme4 to fit three different kinds of mixed models: random intercepts and slopes, random intercepts only and random slopes only. We also included baseline yield categorical variable in the model to check whether variance could be reduced, thus explaining portion of the variation in the slopes or intercepts.

library(lme4)

# null model
mix_yld <- lmer(yield ~ 1 + (1 | trial), data = fhb_dat, REML = F)

# random intercept and slopes
mix_yld1 <- lmer(yield ~ sev + (sev | trial), data = fhb_dat, REML = F)

# random slopes
mix_yld2 <- lmer(yield ~ sev + (1 | sev), data = fhb_dat, REML = F)

# random intercepts
mix_yld3 <- lmer(yield ~ sev + (1 | trial), data = fhb_dat, REML = F)

Here we can check which model best fitted the data based on the lowest AIC, which was the one with both intercepts and slopes as random effects.

AIC(mix_yld, mix_yld1, mix_yld2, mix_yld3)
ABCDEFGHIJ0123456789
 
 
df
<dbl>
AIC
<dbl>
mix_yld34817.058
mix_yld164624.178
mix_yld245291.585
mix_yld344683.390

Effect of baseline yield

Let’s include an interaction term and test whether variance was significantly reduced based on likelihood ratio test.

mix_yld4 <- lmer(yield ~ sev * yield_class + (sev | trial), data = fhb_dat, REML = F)
anova(mix_yld1, mix_yld4, test = "Chisq")
ABCDEFGHIJ0123456789
 
 
Df
<dbl>
AIC
<dbl>
BIC
<dbl>
logLik
<dbl>
deviance
<dbl>
Chisq
<dbl>
Chi Df
<dbl>
Pr(>Chisq)
<dbl>
mix_yld164624.1784646.732-2306.0894612.178NANANA
mix_yld484595.0924625.163-2289.5464579.09233.0864126.536987e-08
summary(mix_yld4)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: yield ~ sev * yield_class + (sev | trial)
##    Data: fhb_dat
## 
##      AIC      BIC   logLik deviance df.resid 
##   4595.1   4625.2  -2289.5   4579.1      309 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.1436 -0.3431  0.0492  0.4179  3.9299 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  trial    (Intercept) 372287   610.2        
##           sev           1129    33.6    0.02
##  Residual              57284   239.3        
## Number of obs: 317, groups:  trial, 37
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)         4419.504    148.390  29.783
## sev                  -46.316      9.604  -4.823
## yield_classlow     -1535.949    209.460  -7.333
## sev:yield_classlow    -3.475     14.309  -0.243
## 
## Correlation of Fixed Effects:
##             (Intr) sev    yld_cl
## sev         -0.087              
## yild_clsslw -0.708  0.062       
## sv:yld_clss  0.059 -0.671 -0.120
## convergence code: 0
## Model failed to converge with max|grad| = 0.0056379 (tol = 0.002, component 1)
library(emmeans)
CLD(emmeans(mix_yld4, ~ sev * yield_class))
ABCDEFGHIJ0123456789
 
 
sev
<dbl>
yield_class
<fctr>
emmean
<dbl>
SE
<dbl>
df
<dbl>
lower.CL
<dbl>
upper.CL
<dbl>
.group
<chr>
29.271626low2421.917171.688241.780312075.3822768.4521
19.271626high3990.081173.115639.322403640.0144340.1492

Extract the random coefficients (BLUES).

blup <- coef(mix_yld1)$trial
colnames(blup) <- c("Intercept", "Slope")

summary(blup)
##    Intercept        Slope         
##  Min.   :1620   Min.   :-112.534  
##  1st Qu.:2998   1st Qu.: -59.085  
##  Median :3560   Median : -52.881  
##  Mean   :3645   Mean   : -49.068  
##  3rd Qu.:4277   3rd Qu.: -23.605  
##  Max.   :5786   Max.   :  -4.544

Calculate the interdecile range for the BLUEs of the slopes and intercepts

# Intercept
dec90_i <- quantile(blup$Intercept, probs = c(.9))
dec10_i <- quantile(blup$Intercept, probs = c(.1))
dec90_i - dec10_i
##      90% 
## 2470.569
# Slopes
dec90_s <- quantile(blup$Slope, probs = c(.9))
dec10_s <- quantile(blup$Slope, probs = c(.1))
dec90_s - dec10_s
##      90% 
## 76.80005
Copyright 2017 Emerson Del Ponte