knitr::opts_chunk$set(echo = TRUE, warning = FALSE)
Let’s load the data prepared in the previous section and proceed with preparation for analysis.
library(tidyverse)
fhb_dat <- read.csv("data/fhb_data_new.csv") %>%
group_by(trial)
Let’s now fit a regression model for each study with variable number of pairs of observations, but splitting the dataset according to the baseline yield, which will give 19 and 18 trials in each condition.
library(broom)
high_lm <- fhb_dat %>%
filter(yield_class != "low") %>%
group_by(trial) %>%
do(tidy(lm(.$yield ~ .$sev), conf.int = TRUE)) %>%
select(c(1:3)) %>%
spread(term, estimate)
high_lm1 <- data.frame(high_lm)
colnames(high_lm1) <- c("trial", "intercept", "slope")
summary(high_lm1)
## trial intercept slope
## Min. : 1.00 Min. :-220.18 Min. :3000
## 1st Qu.: 6.50 1st Qu.: -75.40 1st Qu.:4124
## Median :17.50 Median : -53.35 Median :4433
## Mean :16.44 Mean : -60.86 Mean :4473
## 3rd Qu.:21.75 3rd Qu.: -18.57 3rd Qu.:4952
## Max. :37.00 Max. : 52.88 Max. :6097
low_lm <- fhb_dat %>%
filter(yield_class != "high") %>%
group_by(trial) %>%
do(tidy(lm(.$yield ~ .$sev), conf.int = TRUE)) %>%
select(c(1:3)) %>%
spread(term, estimate)
low_lm1 <- data.frame(low_lm)
colnames(low_lm1) <- c("trial", "intercept", "slope")
head(low_lm1)
summary(low_lm1)
## trial intercept slope
## Min. : 6.00 Min. :-381.87 Min. :1708
## 1st Qu.:11.00 1st Qu.: -82.11 1st Qu.:2579
## Median :24.00 Median : -54.02 Median :3102
## Mean :21.42 Mean : -71.76 Mean :2977
## 3rd Qu.:30.50 3rd Qu.: -17.10 3rd Qu.:3391
## Max. :35.00 Max. : 43.48 Max. :4299
We will use the lmer
function of the lme4
to fit three different kinds of mixed models: random intercepts and slopes, random intercepts only and random slopes only. We also included baseline yield categorical variable in the model to check whether variance could be reduced, thus explaining portion of the variation in the slopes or intercepts.
library(lme4)
# null model
mix_yld <- lmer(yield ~ 1 + (1 | trial), data = fhb_dat, REML = F)
# random intercept and slopes
mix_yld1 <- lmer(yield ~ sev + (sev | trial), data = fhb_dat, REML = F)
# random slopes
mix_yld2 <- lmer(yield ~ sev + (1 | sev), data = fhb_dat, REML = F)
# random intercepts
mix_yld3 <- lmer(yield ~ sev + (1 | trial), data = fhb_dat, REML = F)
Here we can check which model best fitted the data based on the lowest AIC, which was the one with both intercepts and slopes as random effects.
AIC(mix_yld, mix_yld1, mix_yld2, mix_yld3)
Let’s include an interaction term and test whether variance was significantly reduced based on likelihood ratio test.
mix_yld4 <- lmer(yield ~ sev * yield_class + (sev | trial), data = fhb_dat, REML = F)
anova(mix_yld1, mix_yld4, test = "Chisq")
summary(mix_yld4)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: yield ~ sev * yield_class + (sev | trial)
## Data: fhb_dat
##
## AIC BIC logLik deviance df.resid
## 4595.1 4625.2 -2289.5 4579.1 309
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.1436 -0.3431 0.0492 0.4179 3.9299
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## trial (Intercept) 372287 610.2
## sev 1129 33.6 0.02
## Residual 57284 239.3
## Number of obs: 317, groups: trial, 37
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4419.504 148.390 29.783
## sev -46.316 9.604 -4.823
## yield_classlow -1535.949 209.460 -7.333
## sev:yield_classlow -3.475 14.309 -0.243
##
## Correlation of Fixed Effects:
## (Intr) sev yld_cl
## sev -0.087
## yild_clsslw -0.708 0.062
## sv:yld_clss 0.059 -0.671 -0.120
## convergence code: 0
## Model failed to converge with max|grad| = 0.0056379 (tol = 0.002, component 1)
library(emmeans)
CLD(emmeans(mix_yld4, ~ sev * yield_class))
Extract the random coefficients (BLUES).
blup <- coef(mix_yld1)$trial
colnames(blup) <- c("Intercept", "Slope")
summary(blup)
## Intercept Slope
## Min. :1620 Min. :-112.534
## 1st Qu.:2998 1st Qu.: -59.085
## Median :3560 Median : -52.881
## Mean :3645 Mean : -49.068
## 3rd Qu.:4277 3rd Qu.: -23.605
## Max. :5786 Max. : -4.544
Calculate the interdecile range for the BLUEs of the slopes and intercepts
# Intercept
dec90_i <- quantile(blup$Intercept, probs = c(.9))
dec10_i <- quantile(blup$Intercept, probs = c(.1))
dec90_i - dec10_i
## 90%
## 2470.569
# Slopes
dec90_s <- quantile(blup$Slope, probs = c(.9))
dec10_s <- quantile(blup$Slope, probs = c(.1))
dec90_s - dec10_s
## 90%
## 76.80005