# Christian Kleiner
# 12.02.2026

# load packages
library(lmerTest)   ## to fit linear mixed models (loads lme4 dependency)
library(dplyr)      ## for data manipulation (using pipes with %>%)
library(ggplot2)    ## for graphic functions
library(MuMIn)      ## to calculate conditional and marginal R2
library(gridExtra)  ## for grid.arrange()  
library(car)	  ## adds vif()
library(ggpubr)	  ## for stat_compare_means()

# -------------------------
# load data
data = read.csv("analysis_result.txt", header=T)

# aggregate data
data <- data %>% 
	filter(!(v2 %in% c("ei", "au", "eu"))) %>% 
	filter(rate != "slow") %>%
	mutate(phi = ifelse(phi_rad < 0, -pi/2, pi/2)) %>%
	mutate(v1_v2 = paste(v1, v2, sep="_")) %>%
	mutate(rate = factor(rate, levels=c("slow", "normal", "fast"))) %>%
	mutate(subject = factor(subject, levels = c("vp01", "vp02", "vp04", "vp05"))) %>%
	mutate(c = factor(c, levels = c("glott", "m", "f", "p", "pf"))) %>%
	mutate(t_trans_s = c_duration_s + dt_offset_s - dt_onset_s) %>%
	mutate(usability = ifelse(!complete.cases(.), 'no model', ifelse(error_pct > 10, 'bad model (error)', ifelse(dt_onset_s / v1_duration_s <= -1, 'bad model (onset)', ifelse(dt_offset_s / v2_duration_s >= 1, 'bad model (offset)', 'good model'))))) %>%
	mutate(usability = factor(usability , levels=rev(c("no model", "bad model (error)", "bad model (onset)", "bad model (offset)", "good model"))))

common_layers <- list(
	theme_bw(),
	theme(panel.grid.major = element_line(colour = "#dfdfdf"), panel.grid.minor = element_line(colour = "#dfdfdf")))

p1 <- ggplot(data, aes(x = v1_v2, fill = usability)) + geom_bar(position = "fill") + common_layers
p2 <- ggplot(data, aes(x = c, fill = usability)) + geom_bar(position = "fill") + common_layers
p3 <- ggplot(data, aes(x = rate, fill = usability)) + geom_bar(position = "fill") + common_layers
p4 <- ggplot(data, aes(x = subject, fill = usability)) + geom_bar(position = "fill") + common_layers
grid.arrange(p1,p2,p3,p4)

# polish data
datap <- data %>% filter(usability != "no model")
100*nrow(datap)/nrow(data)

datap <- datap %>% filter(usability != "bad model (error)")
100*nrow(datap)/nrow(data)

datap <- datap %>% filter(usability != "bad model (onset)")
100*nrow(datap)/nrow(data)

datap <- datap %>% filter(usability != "bad model (offset)")
100*nrow(datap)/nrow(data)

# -------------------------
# statistical analysis of collinearity

# variance inflation factor
model <- lm(dt_onset_s ~ v1_duration_s + c_duration_s + v2_duration_s, data = datap)
vif(model)

# correlation coefficients
print(cor(datap[,c('v1_duration_s','c_duration_s','v2_duration_s')]))

# -------------------------
# graphical analysis

common_layers <- list(
  	geom_boxplot(),
	stat_compare_means(label.x.npc = "left", label.y.npc = "top", vjust = 1),
	theme_bw(),
	theme(panel.grid.major = element_line(colour = "#dfdfdf"), panel.grid.minor = element_line(colour = "#dfdfdf")))
p01 <- ggplot(data,aes(x=v1_v2,y=b_mm)) + common_layers
p02 <- ggplot(data,aes(x=c,y=b_mm)) + common_layers
p03 <- ggplot(data,aes(x=emph,y=b_mm)) + common_layers
p04 <- ggplot(data,aes(x=rate,y=b_mm)) + common_layers
p05 <- ggplot(data,aes(x=v1_v2,y=v1_duration_s)) + common_layers + ylim(0, 0.4)
p06 <- ggplot(data,aes(x=c,y=v1_duration_s)) + common_layers + ylim(0, 0.4)
p07 <- ggplot(data,aes(x=emph,y=v1_duration_s)) + common_layers + ylim(0, 0.4)
p08 <- ggplot(data,aes(x=rate,y=v1_duration_s)) + common_layers + ylim(0, 0.4)
p09 <- ggplot(data,aes(x=v1_v2,y=c_duration_s)) + common_layers + ylim(0, 0.4)
p10 <- ggplot(data,aes(x=c,y=c_duration_s)) + common_layers + ylim(0, 0.4)
p11 <- ggplot(data,aes(x=emph,y=c_duration_s)) + common_layers + ylim(0, 0.4)
p12 <- ggplot(data,aes(x=rate,y=c_duration_s)) + common_layers + ylim(0, 0.4)
p13 <- ggplot(data,aes(x=v1_v2,y=v2_duration_s)) + common_layers + ylim(0, 0.4)
p14 <- ggplot(data,aes(x=c,y=v2_duration_s)) + common_layers + ylim(0, 0.4)
p15 <- ggplot(data,aes(x=emph,y=v2_duration_s)) + common_layers + ylim(0, 0.4)
p16 <- ggplot(data,aes(x=rate,y=v2_duration_s)) + common_layers + ylim(0, 0.4)
p17 <- ggplot(data,aes(x=v1_v2,y=phi_rad)) + common_layers + scale_y_continuous(limits=c(-180,270),breaks=seq(-180, 180, 90))
p18 <- ggplot(data,aes(x=c,y=phi_rad)) + common_layers + scale_y_continuous(limits=c(-180,270),breaks=seq(-180, 180, 90))
p19 <- ggplot(data,aes(x=emph,y=phi_rad)) + common_layers + scale_y_continuous(limits=c(-180,270),breaks=seq(-180, 180, 90))
p20 <- ggplot(data,aes(x=rate,y=phi_rad)) + common_layers + scale_y_continuous(limits=c(-180,270),breaks=seq(-180, 180, 90))
grid.arrange(p01,p02,p03,p04,p05,p06,p07,p08,p09,p10,p11,p12,p13,p14,p15,p16,p17,p18,p19,p20,nrow=5,widths = c(6,5,2,2))

common_layers <- list(
	geom_point(aes(fill=subject,shape=subject), color="black", size=1),
	scale_fill_manual(values = c("white", "yellow", "dodgerblue3", "black")),
	scale_shape_manual(values = c(21, 22, 23, 24)),
	stat_cor(),
	geom_smooth(method="lm", se=FALSE, fullrange=TRUE),
	theme_bw(),
	theme(panel.grid.major = element_line(colour = "#dfdfdf"), panel.grid.minor = element_line(colour = "#dfdfdf"))) # legend.position="none"
datas <- datap[sample(nrow(datap)),]

p1.1 <- ggplot(datas ,aes(x=v1_duration_s,y=dt_onset_s)) + common_layers + xlim(0, 0.3) + ylim(-0.2, 0.2)
p1.2 <- ggplot(datas ,aes(x=v2_duration_s,y=dt_onset_s)) + common_layers + xlim(0, 0.3) + ylim(-0.2, 0.2)
p1.3 <- ggplot(datas ,aes(x=c_duration_s,y=dt_onset_s)) + common_layers + xlim(0, 0.3) + ylim(-0.2, 0.2)
p1.4 <- ggplot(datas ,aes(x=b_mm,y=dt_onset_s)) + common_layers + ylim(-0.2, 0.2)
p1.5 <- ggplot(datas ,aes(x=phi_rad,y=dt_onset_s)) + common_layers + geom_boxplot(aes(x=phi,group=phi),outlier.shape=NA, alpha = 0) + ylim(-0.2, 0.2) + scale_x_continuous(limits=c(-pi,pi),breaks=seq(-pi, pi, pi/2))

p2.1 <- ggplot(datas,aes(x=v1_duration_s,y=dt_offset_s)) + common_layers + xlim(0, 0.3) + ylim(-0.2, 0.2)
p2.2 <- ggplot(datas,aes(x=v2_duration_s,y=dt_offset_s)) + common_layers + xlim(0, 0.3) + ylim(-0.2, 0.2)
p2.3 <- ggplot(datas,aes(x=c_duration_s,y=dt_offset_s)) + common_layers + xlim(0, 0.3) + ylim(-0.2, 0.2)
p2.4 <- ggplot(datas,aes(x=b_mm,y=dt_offset_s)) + common_layers + ylim(-0.2, 0.2)
p2.5 <- ggplot(datas,aes(x=phi_rad,y=dt_offset_s)) + common_layers + geom_boxplot(aes(x=phi,group=phi),outlier.shape=NA, alpha = 0) + ylim(-0.2, 0.2) + scale_x_continuous(limits=c(-pi,pi),breaks=seq(-pi, pi, pi/2))

p3.1 <- ggplot(datas,aes(x=v1_duration_s,y=t_trans_s)) + common_layers + xlim(0, 0.3) + ylim(0, 0.4)
p3.2 <- ggplot(datas,aes(x=v2_duration_s,y=t_trans_s)) + common_layers + xlim(0, 0.3) + ylim(0, 0.4)
p3.3 <- ggplot(datas,aes(x=c_duration_s,y=t_trans_s)) + common_layers + xlim(0, 0.3) + ylim(0, 0.4)
p3.4 <- ggplot(datas ,aes(x=b_mm,y=t_trans_s)) + common_layers + ylim(0, 0.4)
p3.5 <- ggplot(datas ,aes(x=phi_rad,y=t_trans_s)) + common_layers + geom_boxplot(aes(x=phi,group=phi),outlier.shape=NA, alpha = 0) + ylim(0, 0.4) + scale_x_continuous(limits=c(-pi,pi),breaks=seq(-pi, pi, pi/2))

grid.arrange(p1.1,p1.2,p1.3,p1.4,p1.5,p2.1,p2.2,p2.3,p2.4,p2.5,p3.1,p3.2,p3.3,p3.4,p3.5,nrow=3)

wilcox.test(dt_onset_s ~ phi, datap)$p.value
wilcox.test(dt_offset_s ~ phi, datap)$p.value
wilcox.test(t_trans_s ~ phi, datap)$p.value

# -------------------------
# statistical analysis of dt_onset_s

datap <- datap %>% mutate(phi = as.factor(phi))

# spatio-temporal model
fit1.1 <- lmer(dt_onset_s ~ v1_duration_s + c_duration_s + phi + (1 | subject), datap)

qqPlot(resid(fit1.1))
abline(h = cut <- 0.1)
datac <- subset(datap, resid(fit1.1) < cut)
nrow(datac)/nrow(datap)

fit1.1 <- lmer(formula(fit1.1), datac)
qqPlot(resid(fit1.1))
summary(fit1.1) # final result
r.squaredGLMM(fit1.1)

fit1.1.1 <- lmer(dt_onset_s ~ (v1_duration_s + c_duration_s + phi)^2 + (1 | subject), datac)
fit1.1.2 <- lmer(dt_onset_s ~ v1_duration_s + c_duration_s + phi + b_mm + (1 | subject), datac)
fit1.1.3 <- lmer(dt_onset_s ~ v1_duration_s + c_duration_s + phi + v2_duration_s + (1 | subject), datac)
fit1.1.4 <- lmer(dt_onset_s ~ v1_duration_s + c_duration_s + (1 | subject), datac)
anova(fit1.1, fit1.1.1)
anova(fit1.1, fit1.1.2)
anova(fit1.1, fit1.1.3)
anova(fit1.1.4, fit1.1)

summary(fit1.1.4)
r.squaredGLMM(fit1.1.4)

# temporal model
fit1.2 <- lmer(dt_onset_s ~ v1_duration_s + c_duration_s + (1 | subject) + (1 | v1_v2), datap)

qqPlot(resid(fit1.2))
abline(h = cut <- 0.1)
datac <- subset(datap, resid(fit1.2) < cut)
nrow(datac)/nrow(datap)

fit1.2 <- lmer(formula(fit1.2), datac)
qqPlot(resid(fit1.2))
abline(h = cut <- 0.1)
datac <- subset(datac, resid(fit1.2) < cut)
nrow(datac)/nrow(datap)

fit1.2 <- lmer(formula(fit1.2), datac)
qqPlot(resid(fit1.2))
summary(fit1.2) # final result
r.squaredGLMM(fit1.2)

fit1.2.1 <- lmer(dt_onset_s ~ (v1_duration_s + c_duration_s)^2 + (1 | subject) + (1 | v1_v2), datac)
fit1.2.2 <- lmer(dt_onset_s ~ v1_duration_s + c_duration_s + v2_duration_s + (1 | subject) + (1 | v1_v2), datac)
anova(fit1.2, fit1.2.1)
anova(fit1.2, fit1.2.2)

# -------------------------
# statistical analysis of dt_offset_s

# spatio-temporal model
fit2.1 <- lmer(dt_offset_s ~ c_duration_s + b_mm + (1 | subject), datap)

qqPlot(resid(fit2.1))
abline(h = cut <- 0.1)
datac <- subset(datap, resid(fit2.1) < cut)
nrow(datac)/nrow(datap)

fit2.1 <- lmer(formula(fit2.1), datac)
qqPlot(resid(fit2.1))
abline(h = cut <- 0.1)
datac <- subset(datac, resid(fit2.1) < cut)
nrow(datac)/nrow(datap)

fit2.1 <- lmer(formula(fit2.1), datac)
qqPlot(resid(fit2.1))
summary(fit2.1) # final result
r.squaredGLMM(fit2.1)

fit2.1.1 <- lmer(dt_offset_s ~ (c_duration_s + b_mm)^2 + (1 | subject), datac)
fit2.1.2 <- lmer(dt_offset_s ~ c_duration_s + b_mm + v1_duration_s + (1 | subject), datac)
fit2.1.3 <- lmer(dt_offset_s ~ c_duration_s + b_mm + v2_duration_s + (1 | subject), datac)
fit2.1.4 <- lmer(dt_offset_s ~ c_duration_s + b_mm + phi + (1 | subject), datac)
anova(fit2.1, fit2.1.1)
anova(fit2.1, fit2.1.2) # p=0.004
anova(fit2.1, fit2.1.3)
anova(fit2.1, fit2.1.4)

# temporal model
fit2.2 <- lmer(dt_offset_s ~ c_duration_s + (1 | subject) + (1 | v1_v2), datap)

qqPlot(resid(fit2.2))
abline(h = cut <- 0.1)
datac <- subset(datap, resid(fit2.2) < cut)
nrow(datac)/nrow(datap)

fit2.2 <- lmer(formula(fit2.2), datac)
qqPlot(resid(fit2.2))
summary(fit2.2) # final result
r.squaredGLMM(fit2.2)

fit2.2.1 <- lmer(dt_offset_s ~ c_duration_s + v1_duration_s + (1 | subject) + (1 | v1_v2), datac)
fit2.2.2 <- lmer(dt_offset_s ~ c_duration_s + v2_duration_s + (1 | subject) + (1 | v1_v2), datac)
anova(fit2.2, fit2.2.1)
anova(fit2.2, fit2.2.2)

