tdata <- read_csv("exp_data.csv")
# demographics
min(tdata$age)
## [1] 18
max(tdata$age)
## [1] 87
mean(tdata$age)
## [1] 37.33117
sd(tdata$age)
## [1] 12.84463
# 1 = male, 2 = female, 3 = other
table(tdata$gender)
##
## 1: male 2: female 3: non-binary
## 197 109 1
## 4: prefer not to say
## 1
1 = male, 2 = female, 3 = non-binary, 4 = prefer not to say
table(tdata$dv_query, tdata$outcome_valence)
##
## negative positive
## probability 50 50
## satisfaction 104 104
# check width of the means 95% cis (none is supposed to be grater than 1)
library(rcompanion)
ci_table <- groupwiseMean(DV_rating ~ dv_query + outcome_valence,
data = tdata,
traditional = FALSE,
percentile = TRUE)
(ci_width <- ci_table$Percentile.upper - ci_table$Percentile.lower)
## [1] 0.52 0.76 0.69 0.96
library(dplyr)
tdata %>%
group_by(dv_query) %>%
summarise_at(vars(DV_rating), list(name=var))
## # A tibble: 2 × 2
## dv_query name
## <chr> <dbl>
## 1 probability 1.42
## 2 satisfaction 4.69
myTheme <- theme(plot.title = element_text(face="bold", size = 22),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text.x = element_text(size = 14, angle = 0),
axis.text.y = element_text(size = 16, angle = 0),
legend.text = element_text(size = 18),
legend.title = element_text(face = "bold", size = 18),
strip.text.x = element_text(size = 18),
strip.text.y = element_text(size = 18),
#panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line.x = element_line(colour = "black"),
axis.line.y = element_line(colour = "black"),
axis.text = element_text(colour ="black"),
axis.ticks = element_line(colour ="black"))
tdata_long <- tdata
tdata_sub <- tdata_long
library(see)
## first, turn sID into a factor
tdata_sub$subj_code <- factor(tdata_sub$subj_code)
pd <- position_dodge(width = 0.3)
tdata_sub$valueJitter <- jitter(tdata_sub$rating_rec, factor = 0.01, amount = 0.004)
theme_set(theme_light(base_size = 20, base_family = "Poppins"))
# new labels for the facets
query.labs <- c("test question: probability", "test question: satisfaction")
names(query.labs) <- c("probability", "satisfaction")
g <- ggplot(tdata_sub, aes(x = outcome_valence, y = valueJitter)) +
#guides(fill=FALSE)+
facet_grid( ~ dv_query, labeller = labeller(dv_query = query.labs))+
#ggtitle("Subjects' causal srength ratings") +
scale_y_continuous(limits = c(-5.3, 5.3), breaks=seq(-5, 5, 1), expand = c(0,0)) +
#scale_x_discrete(labels=c("no \ninformation", "'You don't \nknow'")) +
#stat_summary(fun.y = mean, geom = "bar", position = "dodge", colour = "black", alpha =0.5) +
geom_violinhalf(aes(y = rating_rec, group = outcome_valence, fill = outcome_valence), color = NA,
position=position_dodge(1), alpha = 0.4)+
#geom_line(position = pd, color = "black", size = 1, alpha=0.04) +
geom_hline(yintercept=0, linetype="dashed", color = "black")+
geom_jitter(aes(color = outcome_valence), alpha = 0.5, width = 0.15, height = 0.2) +
stat_summary(aes(y = rating_rec, group=1), fun.data = mean_cl_boot,
geom = "errorbar", width = 0, size = 1) +
stat_summary(aes(y = rating_rec, group=1, color = outcome_valence), fun.y=mean, geom="line",
color = "black", shape = 22, size = 1, alpha = .7)+
stat_summary(aes(y = rating_rec, group=1, fill = outcome_valence), fun.y=mean, geom="point",
color = "black", shape = 22, size = 2, group=1, alpha = 1)+
stat_summary(aes(y = rating_rec,group=1), fun.y=median, geom="point", color = "black", shape = 3, size = 4,
group=1, alpha = 1, position = position_dodge(width = 0.5))+
labs(x = "Feature valence", y = "Explanation rating") +
scale_color_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
annotate("text", x = 0.5, y = 3.5, label = c("broad-scope"), angle = 90)+
annotate("text", x = 0.5, y = -3.5, label = c("narrow-scope"), angle = 90)+
theme(legend.position = "none")+
myTheme+
theme(panel.grid.major = element_line(color = "lightgrey",
size = 0.5,
linetype = 'dotted'))+
stat_summary(aes(label=round(after_stat(y),2)), fun.y=mean, geom="text", size=5,
vjust = -6)
g
ggsave("results_means_mainDV.svg",width=8,height=5)
ggsave("results_means_mainDV.pdf",width=8,height=5)
tdata_sub_g2 <- tdata_sub
tdata_sub_g2$cond_combined <- str_c(tdata_sub_g2$dv_query, ' ', tdata_sub_g2$outcome_valence)
library(ggridges)
g2 <- ggplot(tdata_sub_g2, aes(x = rating_rec, y = cond_combined, fill = after_stat(x))) +
geom_density_ridges_gradient() +
#geom_density_ridges(fill = "lightblue", alpha = 0.5)+
#stat_summary(aes(x = rating_rec), fun.x=mean, geom="point",
# color = "black", shape = 22, size = 2, group=1, alpha = 1)+
scale_fill_viridis_c(name = "Explanation \nRating", option = "C")+
labs(x = "Rating", y = "Category Features") +
myTheme
g2
Get the values of the CIs shown in the first plot:
values <- ggplot_build(g)$data[[4]] # values are shown in the 4th panel
values
## x group y ymin ymax PANEL flipped_aes xmin xmax colour
## 1 1 1 -0.2200000 -0.4805000 0.0000000 1 FALSE 1 1 black
## 2 2 1 0.1200000 -0.2800000 0.5400000 1 FALSE 2 2 black
## 3 1 1 -0.5961538 -0.9423077 -0.2980769 2 FALSE 1 1 black
## 4 2 1 0.3653846 -0.1250000 0.8079327 2 FALSE 2 2 black
## linewidth linetype width alpha
## 1 1 1 0 NA
## 2 1 1 0 NA
## 3 1 1 0 NA
## 4 1 1 0 NA
get group medians:
# groupwiseMean(rating_rec ~ Features + Knowledge,
# data = tdata_long,
# traditional = FALSE,
# percentile = TRUE)
groupwiseMedian(rating_rec ~ dv_query + outcome_valence,
data = tdata_long,
bca = FALSE,
percentile = TRUE,
R = 1000)
## dv_query outcome_valence n Median Conf.level Percentile.lower
## 1 probability negative 50 0 0.95 0
## 2 probability positive 50 0 0.95 0
## 3 satisfaction negative 104 0 0.95 0
## 4 satisfaction positive 104 0 0.95 0
## Percentile.upper
## 1 0
## 2 0
## 3 0
## 4 0
counts <- tdata_long %>%
group_by(dv_query, outcome_valence, rating_rec) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
counts
## # A tibble: 30 × 6
## # Groups: dv_query, outcome_valence [4]
## dv_query outcome_valence rating_rec n pct lbl
## <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 probability negative -3 4 0.08 8%
## 2 probability negative -1 1 0.02 2%
## 3 probability negative 0 44 0.88 88%
## 4 probability negative 2 1 0.02 2%
## 5 probability positive -3 3 0.06 6%
## 6 probability positive -2 1 0.02 2%
## 7 probability positive -1 1 0.02 2%
## 8 probability positive 0 39 0.78 78%
## 9 probability positive 1 2 0.04 4%
## 10 probability positive 3 1 0.02 2%
## # ℹ 20 more rows
# shows that 0 is the mode in all conditions
tdata_long$category[tdata_long$rating_rec < 0] <- "narrow"
tdata_long$category[tdata_long$rating_rec == 0] <- "unbiased"
tdata_long$category[tdata_long$rating_rec > 0] <- "broad"
counts2 <- tdata_long %>%
group_by(dv_query, outcome_valence, category) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
counts2
## # A tibble: 12 × 6
## # Groups: dv_query, outcome_valence [4]
## dv_query outcome_valence category n pct lbl
## <chr> <chr> <chr> <int> <dbl> <chr>
## 1 probability negative broad 1 0.02 2%
## 2 probability negative narrow 5 0.1 10%
## 3 probability negative unbiased 44 0.88 88%
## 4 probability positive broad 6 0.12 12%
## 5 probability positive narrow 5 0.1 10%
## 6 probability positive unbiased 39 0.78 78%
## 7 satisfaction negative broad 6 0.0577 6%
## 8 satisfaction negative narrow 27 0.260 26%
## 9 satisfaction negative unbiased 71 0.683 68%
## 10 satisfaction positive broad 25 0.240 24.0%
## 11 satisfaction positive narrow 18 0.173 17.3%
## 12 satisfaction positive unbiased 61 0.587 58.7%
counts2$category <- factor(counts2$category, levels = c("unbiased", "narrow", "broad"), labels = c("unbiased", "narrow l.s.", "broad l.s."))
Get proportion CIs for different categories in each diagnosability condition:
library(PropCIs)
library(DescTools)
library(purrr)
counts_prob_neg <- subset(counts2, dv_query == "probability" & outcome_valence == "negative")
counts_prob_pos <- subset(counts2, dv_query == "probability" & outcome_valence == "positive")
counts_sat_neg <- subset(counts2, dv_query == "satisfaction" & outcome_valence == "negative")
counts_sat_pos <- subset(counts2, dv_query == "satisfaction" & outcome_valence == "positive")
(MultinomCI(counts_prob_neg$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_1)
## est lwr.ci upr.ci
## [1,] 0.02 0.00 0.1151582
## [2,] 0.10 0.04 0.1951582
## [3,] 0.88 0.82 0.9751582
(MultinomCI(counts_sat_neg$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_3)
## est lwr.ci upr.ci
## [1,] 0.05769231 0.0000000 0.1455385
## [2,] 0.25961538 0.1730769 0.3474616
## [3,] 0.68269231 0.5961538 0.7705385
(MultinomCI(counts_prob_pos$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_2)
## est lwr.ci upr.ci
## [1,] 0.12 0.02 0.2271097
## [2,] 0.10 0.00 0.2071097
## [3,] 0.78 0.68 0.8871097
(MultinomCI(counts_sat_pos$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_4)
## est lwr.ci upr.ci
## [1,] 0.2403846 0.15384615 0.3433476
## [2,] 0.1730769 0.08653846 0.2760399
## [3,] 0.5865385 0.50000000 0.6895015
ci_low <- c(selection_ci_1[,2], selection_ci_2[,2], selection_ci_3[,2], selection_ci_4[,2])
ci_up <- c(selection_ci_1[,3], selection_ci_2[,3], selection_ci_3[,3], selection_ci_4[,3])
plotdata <- counts2
plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up
Plot:
library(scales)
theme_set(theme_light(base_size = 13, base_family = "Poppins"))
library(grid)
# new labels for the facets
query.labs <- c("test question: probability", "test question: satisfaction")
names(query.labs) <- c("probability", "satisfaction")
# new labels for the facets
valence.labs <- c("feature valence: negative", "feature valence: positive")
names(valence.labs) <- c("negative", "positive")
g<- ggplot(plotdata,
aes(x = category,
y = pct,
fill = outcome_valence)) +
facet_grid(dv_query ~ outcome_valence, labeller = labeller(outcome_valence = valence.labs, dv_query = query.labs))+
geom_bar(stat = "identity",
position = "dodge") +
scale_y_continuous(limits = seq(0, 1.2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 3.5,
position = position_dodge(width = 1),
vjust = -3) +
scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
#scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Explanatory preference",
x = "Explanatory preference")+
geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct,
# ymin = plotdata$ci_low,
# ymax = plotdata$ci_up,
# colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "none", axis.title = element_text(size = 20), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13),strip.text = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
theme(panel.spacing.y = unit(1, "lines"))
g
#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories.svg",width=7.5,height=6)
ggsave("categories.pdf",width=7.5,height=6)
library(afex)
library(emmeans)
a1 <- aov_car(rating_rec ~ dv_query*outcome_valence + Error(subj_code), tdata_long, anova_table = list(es = "pes"))
a1
## Anova Table (Type 3 tests)
##
## Response: rating_rec
## Effect df MSE F pes p.value
## 1 dv_query 1, 304 3.48 0.08 <.001 .774
## 2 outcome_valence 1, 304 3.48 8.21 ** .026 .004
## 3 dv_query:outcome_valence 1, 304 3.48 1.87 .006 .172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
more digits for the p.values can be obtained by running model comparisons manually.
null <- lme(rating_rec ~ 1, random = ~1|subj_code, data = tdata_long, method = "ML")
main_query <- lme(rating_rec ~ dv_query, random = ~1|subj_code, data = tdata_long, method = "ML")
anova(null, main_query)
## Model df AIC BIC logLik Test L.Ratio p-value
## null 1 3 1275.002 1286.192 -634.5008
## main_query 2 4 1276.922 1291.842 -634.4608 1 vs 2 0.08010185 0.7772
main_valence <- lme(rating_rec ~ dv_query + outcome_valence, random = ~1|subj_code, data = tdata_long, method = "ML")
anova(null, main_query, main_valence)
## Model df AIC BIC logLik Test L.Ratio p-value
## null 1 3 1275.002 1286.192 -634.5008
## main_query 2 4 1276.922 1291.842 -634.4608 1 vs 2 0.080102 0.7772
## main_valence 3 5 1266.335 1284.986 -628.1676 2 vs 3 12.586348 0.0004
interaction <- lme(rating_rec ~ dv_query*outcome_valence, random = ~1|subj_code, data = tdata_long, method = "ML")
anova(null, main_query, main_valence, interaction)
## Model df AIC BIC logLik Test L.Ratio p-value
## null 1 3 1275.002 1286.192 -634.5008
## main_query 2 4 1276.922 1291.842 -634.4608 1 vs 2 0.080102 0.7772
## main_valence 3 5 1266.335 1284.986 -628.1676 2 vs 3 12.586348 0.0004
## interaction 4 6 1266.444 1288.825 -627.2221 3 vs 4 1.891053 0.1691
###############
# a follow-up analysis
library(lsmeans)
# means
ls2 <- lsmeans(a1, c("dv_query", "outcome_valence")) # group means by between-condition
ls2
## dv_query outcome_valence lsmean SE df lower.CL upper.CL
## probability negative -0.220 0.264 304 -0.73941 0.299
## satisfaction negative -0.596 0.183 304 -0.95630 -0.236
## probability positive 0.120 0.264 304 -0.39941 0.639
## satisfaction positive 0.365 0.183 304 0.00524 0.726
##
## Confidence level used: 0.95
# contrast the strength levels (main effect; averaging over decision level, as there was no sig. interaction)
contrasts <- emmeans(a1, ~ dv_query*outcome_valence)
s <- pairs(contrasts, adjust = "none") # no need to adjust because one single contrast was planned a priori (the interaction contrast)
s
## contrast estimate SE df t.ratio
## probability negative - satisfaction negative 0.376 0.321 304 1.171
## probability negative - probability positive -0.340 0.373 304 -0.911
## probability negative - satisfaction positive -0.585 0.321 304 -1.823
## satisfaction negative - probability positive -0.716 0.321 304 -2.230
## satisfaction negative - satisfaction positive -0.962 0.259 304 -3.715
## probability positive - satisfaction positive -0.245 0.321 304 -0.764
## p.value
## 0.2425
## 0.3631
## 0.0694
## 0.0265
## 0.0002
## 0.4455
confint(s, level = 0.95)
## contrast estimate SE df lower.CL
## probability negative - satisfaction negative 0.376 0.321 304 -0.256
## probability negative - probability positive -0.340 0.373 304 -1.075
## probability negative - satisfaction positive -0.585 0.321 304 -1.217
## satisfaction negative - probability positive -0.716 0.321 304 -1.348
## satisfaction negative - satisfaction positive -0.962 0.259 304 -1.471
## probability positive - satisfaction positive -0.245 0.321 304 -0.877
## upper.CL
## 1.0082
## 0.3946
## 0.0467
## -0.0841
## -0.4522
## 0.3867
##
## Confidence level used: 0.95
###############
# a follow-up analysis
library(lsmeans)
# means
ls2 <- lsmeans(a1, c("dv_query", "outcome_valence")) # group means by between-condition
ls2
## dv_query outcome_valence lsmean SE df lower.CL upper.CL
## probability negative -0.220 0.264 304 -0.73941 0.299
## satisfaction negative -0.596 0.183 304 -0.95630 -0.236
## probability positive 0.120 0.264 304 -0.39941 0.639
## satisfaction positive 0.365 0.183 304 0.00524 0.726
##
## Confidence level used: 0.95
# contrast the strength levels (main effect; averaging over decision level, as there was no sig. interaction)
contrasts <- emmeans(a1, ~ dv_query*outcome_valence)
s <- pairs(pairs(contrasts), adjust = "none") # no need to adjust because one single contrast was planned a priori (the interaction contrast)
s
## contrast
## (probability negative - satisfaction negative) - (probability negative - probability positive)
## (probability negative - satisfaction negative) - (probability negative - satisfaction positive)
## (probability negative - satisfaction negative) - (satisfaction negative - probability positive)
## (probability negative - satisfaction negative) - (satisfaction negative - satisfaction positive)
## (probability negative - satisfaction negative) - (probability positive - satisfaction positive)
## (probability negative - probability positive) - (probability negative - satisfaction positive)
## (probability negative - probability positive) - (satisfaction negative - probability positive)
## (probability negative - probability positive) - (satisfaction negative - satisfaction positive)
## (probability negative - probability positive) - (probability positive - satisfaction positive)
## (probability negative - satisfaction positive) - (satisfaction negative - probability positive)
## (probability negative - satisfaction positive) - (satisfaction negative - satisfaction positive)
## (probability negative - satisfaction positive) - (probability positive - satisfaction positive)
## (satisfaction negative - probability positive) - (satisfaction negative - satisfaction positive)
## (satisfaction negative - probability positive) - (probability positive - satisfaction positive)
## (satisfaction negative - satisfaction positive) - (probability positive - satisfaction positive)
## estimate SE df t.ratio p.value
## 0.7162 0.321 304 2.230 0.0265
## 0.9615 0.259 304 3.715 0.0002
## 1.0923 0.523 304 2.089 0.0375
## 1.3377 0.487 304 2.747 0.0064
## 0.6215 0.454 304 1.368 0.1722
## 0.2454 0.321 304 0.764 0.4455
## 0.3762 0.321 304 1.171 0.2425
## 0.6215 0.454 304 1.368 0.1722
## -0.0946 0.618 304 -0.153 0.8784
## 0.1308 0.454 304 0.288 0.7736
## 0.3762 0.321 304 1.171 0.2425
## -0.3400 0.373 304 -0.911 0.3631
## 0.2454 0.321 304 0.764 0.4455
## -0.4708 0.588 304 -0.801 0.4239
## -0.7162 0.321 304 -2.230 0.0265
confint(s, level = 0.95)
## contrast
## (probability negative - satisfaction negative) - (probability negative - probability positive)
## (probability negative - satisfaction negative) - (probability negative - satisfaction positive)
## (probability negative - satisfaction negative) - (satisfaction negative - probability positive)
## (probability negative - satisfaction negative) - (satisfaction negative - satisfaction positive)
## (probability negative - satisfaction negative) - (probability positive - satisfaction positive)
## (probability negative - probability positive) - (probability negative - satisfaction positive)
## (probability negative - probability positive) - (satisfaction negative - probability positive)
## (probability negative - probability positive) - (satisfaction negative - satisfaction positive)
## (probability negative - probability positive) - (probability positive - satisfaction positive)
## (probability negative - satisfaction positive) - (satisfaction negative - probability positive)
## (probability negative - satisfaction positive) - (satisfaction negative - satisfaction positive)
## (probability negative - satisfaction positive) - (probability positive - satisfaction positive)
## (satisfaction negative - probability positive) - (satisfaction negative - satisfaction positive)
## (satisfaction negative - probability positive) - (probability positive - satisfaction positive)
## (satisfaction negative - satisfaction positive) - (probability positive - satisfaction positive)
## estimate SE df lower.CL upper.CL
## 0.7162 0.321 304 0.0841 1.3482
## 0.9615 0.259 304 0.4522 1.4709
## 1.0923 0.523 304 0.0635 2.1211
## 1.3377 0.487 304 0.3794 2.2960
## 0.6215 0.454 304 -0.2723 1.5154
## 0.2454 0.321 304 -0.3867 0.8774
## 0.3762 0.321 304 -0.2559 1.0082
## 0.6215 0.454 304 -0.2723 1.5154
## -0.0946 0.618 304 -1.3106 1.1214
## 0.1308 0.454 304 -0.7631 1.0246
## 0.3762 0.321 304 -0.2559 1.0082
## -0.3400 0.373 304 -1.0746 0.3946
## 0.2454 0.321 304 -0.3867 0.8774
## -0.4708 0.588 304 -1.6277 0.6862
## -0.7162 0.321 304 -1.3482 -0.0841
##
## Confidence level used: 0.95
neg_prob_vs_sat <- prop.test(x = c(plotdata$n[2], plotdata$n[8]), n = c(50, 104), alternative = "less", correct = F)
neg_prob_vs_sat
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[8]) out of c(50, 104)
## X-squared = 5.2259, df = 1, p-value = 0.01113
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.00000000 -0.06026536
## sample estimates:
## prop 1 prop 2
## 0.1000000 0.2596154
# Note: Yates' correction is turned off, because it is recommended only if a cell contains less than 5 observations see: https://en.wikipedia.org/wiki/Yates%27s_correction_for_continuity)
pos_prob_vs_sat <- prop.test(x = c(plotdata$n[4], plotdata$n[10]), n = c(50, 104), alternative = "less", correct = F)
pos_prob_vs_sat
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[4], plotdata$n[10]) out of c(50, 104)
## X-squared = 3.0437, df = 1, p-value = 0.04053
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.0000000 -0.0180889
## sample estimates:
## prop 1 prop 2
## 0.1200000 0.2403846