3.1 Predictors affecting multiple responses
If we look a little at what simulate_population()
assumes underneath with the formulation above (just random effects), we can understand more how we simulate predictors that affect multiple response variable. In the above code, we are essentially simulating a predictor for each trait (individual_effect1
and individual_effect2
) with some covariance between them.
We can expand the equation above:
\[\boldsymbol{y}_{ij} = \boldsymbol{\beta}_0 + \boldsymbol{u}_j B_u + \boldsymbol{\epsilon}_{ij}B_{\epsilon}\] \[ B_u = \begin{bmatrix} 1 & 0 \\ 0 & 1 \end{bmatrix} , B_{\epsilon} = \begin{bmatrix} 1 & 0 \\ 0 & 1 \end{bmatrix} \]
to include matrices of \(\beta\)s, \(B\), the columns of which refer to the response variable, and the rows predictors. In this case they are all identity matrices, which essentially controlling which predictors affects which response (\(u_1\) affects \(y_1\) but not \(y_2\) and vice versa). Internally, simulate_population()
does the same thing, and assigns beta
as an identity matrix.
<- simulate_population(
squid_data data_structure= make_structure(structure = "individual(100)",repeat_obs=10),
n_response=2,
response_name = c("body_mass","behaviour"),
parameters=list(
individual = list(
vcov =matrix(c(1,0.5,0.5,1),nrow=2,ncol=2,byrow=TRUE),
beta= diag(2)
),residual = list(
vcov =matrix(c(1,0.5,0.5,1),nrow=2,ncol=2,byrow=TRUE),
beta= diag(2)
)
) )
How is this helpful? Well now we could simulate one (or many) predictor(s) that both responses. In the form of an equation we could have
\[\boldsymbol{y}_{ij} = \boldsymbol{\beta}_0 + \boldsymbol{x}_{i} B_x + \boldsymbol{u}_j + \boldsymbol{\epsilon}_{ij}\]
\[\boldsymbol{x}_i \sim \mathcal{N}(\boldsymbol{\mu}_x, \Sigma_x)\]
\[\boldsymbol{u}_i \sim \mathcal{N}(0, \Sigma_u)\]
\[\boldsymbol{\epsilon}_{i} \sim \mathcal{N}(0, \Sigma_{\epsilon})\]
where \(B\) is a \(p*q\) matrix, where \(p\) is number of predictors. So if we returned to our original example in Section 1, we have three predictors at the observation level - temperature, rainfall and wind. So \(B_x\) would be a 3*2
matrix, with 3 predictors and two responses.
<- matrix(c(
Beta 0.5, -0.1,
0.2, -0.2,
0.3, -0.1
nrow=3,ncol=2,byrow=TRUE)
), Beta
## [,1] [,2]
## [1,] 0.5 -0.1
## [2,] 0.2 -0.2
## [3,] 0.3 -0.1
So here, the environment variables all positively affect body mass (response 1) and negatively affect behaviour (response 2). This then slots easily into our code.
<- simulate_population(
squid_data data_structure= make_structure(structure = "individual(100)",repeat_obs=20),
n_response=2,
parameters= list(
individual = list(
vcov =matrix(c(1,0.5,0.5,1),nrow=2,ncol=2,byrow=TRUE)
),observation = list(
names = c("temperature", "rainfall", "wind"),
beta= Beta
),residual = list(
vcov= matrix(c(1,0.5,0.5,1),nrow=2,ncol=2,byrow=TRUE)
)
)
)
<- get_population_data(squid_data)
data head(data)
## y1 y2 individual_effect1 individual_effect2 temperature
## 1 0.2629858 -0.0780353 -0.005446858 -0.2658786 0.3738621
## 2 1.0829243 1.1550551 -0.005446858 -0.2658786 -0.8473002
## 3 -0.2310409 0.3175489 -0.005446858 -0.2658786 -0.6371071
## 4 1.7982021 -0.2832260 -0.005446858 -0.2658786 0.5207047
## 5 -0.1813950 0.1731702 -0.005446858 -0.2658786 1.1298068
## 6 1.5139838 0.8386942 -0.005446858 -0.2658786 1.5284323
## rainfall wind residual1 residual2 individual squid_pop
## 1 0.1456369 -0.6502334 0.2474442 0.1893336 1 1
## 2 -0.9514215 0.9920109 1.4047023 1.2451206 1 1
## 3 -1.2060802 -0.5203187 0.4902712 0.2264689 1 1
## 4 0.3514403 0.1582997 1.4255186 0.1208411 1 1
## 5 -0.5530579 -0.2232949 -0.5632515 0.4190884 1 1
## 6 -0.6243242 -0.1365409 0.9210416 1.1188972 1 1
# library(MCMCglmm)
# mod <- MCMCglmm(cbind(y1,y2)~1,random=~us(trait):individual, rcov=~us(trait):units,data=data,family=rep("gaussian",2),verbose=FALSE)
# summary(mod)
Equally if we want an interaction, we now have to expand the size of what we give to beta
, with one \(\beta\) for each response, in a matrix, with \(q\) columns.
<- simulate_population(
squid_data data_structure= make_structure(structure = "individual(100)",repeat_obs=20),
n_response=2,
parameters= list(
individual = list(
vcov =matrix(c(1,0.5,0.5,1),nrow=2,ncol=2,byrow=TRUE)
),observation = list(
names = c("temperature", "rainfall", "wind"),
beta= Beta
),interactions = list(
names = c("temperature:rainfall"),
beta=matrix(c(0.1,-0.3),ncol=2,byrow=TRUE)
),residual = list(
vcov= matrix(c(1,0.5,0.5,1),nrow=2,ncol=2,byrow=TRUE)
)
)
)
<- get_population_data(squid_data)
data head(data)
## y1 y2 individual_effect1 individual_effect2 temperature
## 1 2.8739089 2.7223659 1.274479 0.806482 2.5349210
## 2 -0.2195532 0.4901810 1.274479 0.806482 -0.3163310
## 3 2.1650067 0.2206076 1.274479 0.806482 0.3833268
## 4 -0.7894193 -1.4047362 1.274479 0.806482 -0.9681842
## 5 2.8398480 0.9040718 1.274479 0.806482 -0.1220015
## 6 1.0100711 0.5755157 1.274479 0.806482 -0.9008461
## rainfall wind residual1 residual2 temperature:rainfall individual
## 1 -0.49522796 2.29401469 -0.1316535 1.9231227 -1.25536379 1
## 2 -1.19507319 -0.80430268 -0.8933655 -0.5539674 0.37803876 1
## 3 -0.60617083 0.09328987 0.8153473 -0.7291554 -0.23236153 1
## 4 0.05970686 0.66797804 -1.7863607 -2.2466396 -0.05780724 1
## 5 1.32784401 1.37169437 0.9654921 0.4395282 -0.16199893 1
## 6 2.65030643 0.06492591 -0.1247725 -0.5007525 -2.38751816 1
## squid_pop
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
# library(MCMCglmm)
# mod <- MCMCglmm(cbind(y1,y2)~1,random=~us(trait):individual, rcov=~us(trait):units,data=data,family=rep("gaussian",2),verbose=FALSE)
# summary(mod)