tdata <- read_csv("exp_data.csv")
# demographics
min(tdata$age)
## [1] 18
max(tdata$age)
## [1] 74
mean(tdata$age)
## [1] 38.84921
sd(tdata$age)
## [1] 13.65282
# 1 = male, 2 = female, 3 = other
table(tdata$gender)
##
## 1: male 2: female 3: non-binary
## 147 101 2
## 4: prefer not to say
## 2
1 = male, 2 = female, 3 = non-binary, 4 = prefer not to say
Data handling:
n per condition:
table(tdata_long$Story, tdata_long$LatentF)
##
## feature diagnosability: \nboth similar
## Daisy 42
## Stamp 42
## Watch 42
##
## feature diagnosability: \nlatent feature harder
## Daisy 42
## Stamp 42
## Watch 42
Get CI width of mean in each condition:
library(rcompanion)
ci_table <- groupwiseMean(rating_rec ~ Story + LatentF,
data = tdata_long,
traditional = FALSE,
percentile = TRUE)
(ci_width <- ci_table$Percentile.upper - ci_table$Percentile.lower)
## [1] 0.7640 0.5720 1.0010 0.9044 0.7620 0.4996
Get Ci width of mean for theoretically relevant factor (relative feature diagnosability)
ci_table <- groupwiseMean(rating_rec ~ LatentF,
data = tdata_long,
traditional = FALSE,
percentile = TRUE)
(ci_width <- ci_table$Percentile.upper - ci_table$Percentile.lower)
## [1] 0.492 0.389
Means for each scenario:
myTheme <- theme(plot.title = element_text(face="bold", size = 22),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text.x = element_text(size = 14, angle = 0),
axis.text.y = element_text(size = 16, angle = 0),
legend.text = element_text(size = 18),
legend.title = element_text(face = "bold", size = 18),
strip.text.x = element_text(size = 18),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line.x = element_line(colour = "black"),
axis.line.y = element_line(colour = "black"),
axis.text = element_text(colour ="black"),
axis.ticks = element_line(colour ="black"))
tdata_sub <- tdata_long
library(see)
## first, turn sID into a factor
tdata_sub$subj_code <- factor(tdata_sub$subj_code)
pd <- position_dodge(width = 0.3)
tdata_sub$valueJitter <- jitter(tdata_sub$rating_rec, factor = 0.01, amount = 0.004)
theme_set(theme_light(base_size = 20, base_family = "Poppins"))
# new labes for the facets
g <- ggplot(tdata_sub, aes(x = LatentF, y = valueJitter)) +
guides(fill=FALSE)+
facet_grid( ~ Story)+
#ggtitle("Subjects' causal srength ratings") +
scale_y_continuous(limits = c(-5.3, 5.3), breaks=seq(-5, 5, 1), expand = c(0,0)) +
scale_x_discrete(labels=c("both \nsimilar", "latent feature \nharder")) +
#stat_summary(fun.y = mean, geom = "bar", position = "dodge", colour = "black", alpha =0.5) +
geom_violinhalf(aes(y = rating_rec, group = LatentF, fill = LatentF), color = NA,
position=position_dodge(1), alpha = 0.4)+
#geom_line(position = pd, color = "black", size = 1, alpha=0.04) +
geom_hline(yintercept=0, linetype="dashed", color = "black")+
geom_jitter(aes(color = LatentF), alpha = 0.5, width = 0.15, height = 0.2) +
stat_summary(aes(y = rating_rec, group=1), fun.data = mean_cl_boot,
geom = "errorbar", width = 0, size = 1) +
stat_summary(aes(y = rating_rec, group=1, fill = LatentF), fun.y=mean, geom="point",
color = "black", shape = 22, size = 2, group=1, alpha = 1)+
stat_summary(aes(y = rating_rec,group=1), fun.y=median, geom="point", color = "black", shape = 3, size = 4,
group=1, alpha = 1, position = position_dodge(width = 0.5))+
labs(x = "Relative Feature nDiagnosability", y = "Categorization Rating") +
scale_color_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
annotate("text", x = 0.5, y = 3.5, label = c("broad-scope"), angle = 90)+
annotate("text", x = 0.5, y = -3.5, label = c("narrow-scope"), angle = 90)+
theme(legend.position = "none")+
myTheme+
theme(panel.grid.major = element_line(color = "lightgrey",
size = 0.5,
linetype = 'dotted'))+
stat_summary(aes(label=round(after_stat(y),2)), fun.y=mean, geom="text", size=5,
vjust = -6)
g
ggsave("results_scen_means_mainDV.svg",width=10,height=5)
ggsave("results_scen_means_mainDV.pdf",width=10,height=5)
#ggsave("results_means_selection.pdf",width=11,height=5)
Means averaged over scenario:
myTheme <- theme(plot.title = element_text(face="bold", size = 22),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text.x = element_text(size = 14, angle = 0),
axis.text.y = element_text(size = 16, angle = 0),
legend.text = element_text(size = 18),
legend.title = element_text(face = "bold", size = 18),
strip.text.x = element_text(size = 18),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line.x = element_line(colour = "black"),
axis.line.y = element_line(colour = "black"),
axis.text = element_text(colour ="black"),
axis.ticks = element_line(colour ="black"))
tdata_sub <- tdata_long
library(see)
## first, turn sID into a factor
tdata_sub$subj_code <- factor(tdata_sub$subj_code)
pd <- position_dodge(width = 0.3)
tdata_sub$valueJitter <- jitter(tdata_sub$rating_rec, factor = 0.01, amount = 0.004)
theme_set(theme_light(base_size = 20, base_family = "Poppins"))
# new labes for the facets
g <- ggplot(tdata_sub, aes(x = LatentF, y = valueJitter)) +
guides(fill=FALSE)+
#facet_grid( ~ Story)+
#ggtitle("Subjects' causal srength ratings") +
scale_y_continuous(limits = c(-5.3, 5.3), breaks=seq(-5, 5, 1), expand = c(0,0)) +
scale_x_discrete(labels=c("both \nsimilar", "latent feature \nharder")) +
#stat_summary(fun.y = mean, geom = "bar", position = "dodge", colour = "black", alpha =0.5) +
geom_violinhalf(aes(y = rating_rec, group = LatentF, fill = LatentF), color = NA,
position=position_dodge(1), alpha = 0.4)+
#geom_line(position = pd, color = "black", size = 1, alpha=0.04) +
geom_hline(yintercept=0, linetype="dashed", color = "black")+
geom_jitter(aes(color = LatentF), alpha = 0.5, width = 0.15, height = 0.2) +
stat_summary(aes(y = rating_rec, group=1), fun.data = mean_cl_boot,
geom = "errorbar", width = 0, size = 1) +
stat_summary(aes(y = rating_rec, group=1, color = outcome_valence), fun.y=mean, geom="line",
color = "black", shape = 22, size = 1, alpha = .7)+
stat_summary(aes(y = rating_rec, group=1, fill = LatentF), fun.y=mean, geom="point",
color = "black", shape = 22, size = 2, group=1, alpha = 1)+
stat_summary(aes(y = rating_rec,group=1), fun.y=median, geom="point", color = "black", shape = 3, size = 4,
group=1, alpha = 1, position = position_dodge(width = 0.5))+
labs(x = "Feature diagnosability", y = "Categorization Rating") +
scale_color_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
annotate("text", x = 0.5, y = 3.5, label = c("broad-scope"), angle = 90)+
annotate("text", x = 0.5, y = -3.5, label = c("narrow-scope"), angle = 90)+
theme(legend.position = "none")+
myTheme+
theme(panel.grid.major = element_line(color = "lightgrey",
size = 0.5,
linetype = 'dotted'))+
stat_summary(aes(label=round(after_stat(y),2)), fun.y=mean, geom="text", size=5,
vjust = -6)
g
ggsave("results_means_mainDV.svg",width=4,height=5)
ggsave("results_means_mainDV.pdf",width=4,height=5)
#ggsave("results_means_selection.pdf",width=11,height=5)
Use the ggplot_build package to see a table with the means and CI values plotted in the graph:
values <- ggplot_build(g)$data[[4]] # values are shown in the 4th panel
values
## x group y ymin ymax PANEL flipped_aes xmin xmax colour
## 1 1 1 -0.47619048 -0.7065476 -0.2460317 1 FALSE 1 1 black
## 2 2 1 0.05555556 -0.1349206 0.2539683 1 FALSE 2 2 black
## linewidth linetype width alpha
## 1 1 1 0 NA
## 2 1 1 0 NA
get group medians:
library(rcompanion)
# groupwiseMean(rating_rec ~ Features + Knowledge,
# data = tdata_long,
# traditional = FALSE,
# percentile = TRUE)
groupwiseMedian(rating_rec ~ LatentF,
data = tdata_long,
bca = FALSE,
percentile = TRUE,
R = 1000)
## LatentF n Median Conf.level
## 1 feature diagnosability: \nboth similar 126 0 0.95
## 2 feature diagnosability: \nlatent feature harder 126 0 0.95
## Percentile.lower Percentile.upper
## 1 0 0
## 2 0 0
counts <- tdata_long %>%
group_by(LatentF, rating_rec) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
counts
## # A tibble: 18 × 5
## # Groups: LatentF [2]
## LatentF rating_rec n pct lbl
## <fct> <dbl> <int> <dbl> <chr>
## 1 "feature diagnosability: \nboth similar" -5 2 0.0159 1.59%
## 2 "feature diagnosability: \nboth similar" -4 3 0.0238 2.38%
## 3 "feature diagnosability: \nboth similar" -3 11 0.0873 8.73%
## 4 "feature diagnosability: \nboth similar" -2 7 0.0556 5.56%
## 5 "feature diagnosability: \nboth similar" -1 4 0.0317 3.17%
## 6 "feature diagnosability: \nboth similar" 0 95 0.754 75.4…
## 7 "feature diagnosability: \nboth similar" 1 1 0.00794 0.79%
## 8 "feature diagnosability: \nboth similar" 3 1 0.00794 0.79%
## 9 "feature diagnosability: \nboth similar" 4 1 0.00794 0.79%
## 10 "feature diagnosability: \nboth similar" 5 1 0.00794 0.79%
## 11 "feature diagnosability: \nlatent feature har… -4 2 0.0159 1.59%
## 12 "feature diagnosability: \nlatent feature har… -3 2 0.0159 1.59%
## 13 "feature diagnosability: \nlatent feature har… -2 2 0.0159 1.59%
## 14 "feature diagnosability: \nlatent feature har… 0 113 0.897 89.6…
## 15 "feature diagnosability: \nlatent feature har… 2 2 0.0159 1.59%
## 16 "feature diagnosability: \nlatent feature har… 3 1 0.00794 0.79%
## 17 "feature diagnosability: \nlatent feature har… 4 2 0.0159 1.59%
## 18 "feature diagnosability: \nlatent feature har… 5 2 0.0159 1.59%
tdata_long$category[tdata_long$rating_rec < 0] <- "narrow"
tdata_long$category[tdata_long$rating_rec == 0] <- "unbiased"
tdata_long$category[tdata_long$rating_rec > 0] <- "broad"
counts2 <- tdata_long %>%
group_by(LatentF, category) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
counts2
## # A tibble: 6 × 5
## # Groups: LatentF [2]
## LatentF category n pct lbl
## <fct> <chr> <int> <dbl> <chr>
## 1 "feature diagnosability: \nboth similar" broad 4 0.0317 3%
## 2 "feature diagnosability: \nboth similar" narrow 27 0.214 21%
## 3 "feature diagnosability: \nboth similar" unbiased 95 0.754 75%
## 4 "feature diagnosability: \nlatent feature harder" broad 7 0.0556 5.56%
## 5 "feature diagnosability: \nlatent feature harder" narrow 6 0.0476 4.76%
## 6 "feature diagnosability: \nlatent feature harder" unbiased 113 0.897 89.68%
counts2$category <- factor(counts2$category, levels = c("unbiased", "narrow", "broad"), labels = c("unbiased", "narrow l.s.", "broad l.s."))
Get proportion CIs for different categories in each diagnosability condition:
library(PropCIs)
library(DescTools)
library(purrr)
counts_visible <- subset(counts2, LatentF == "feature diagnosability: \nboth similar")
counts_invisible <- subset(counts2, LatentF == "feature diagnosability: \nlatent feature harder")
(MultinomCI(counts_visible$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_vis)
## est lwr.ci upr.ci
## [1,] 0.03174603 0.0000000 0.1049183
## [2,] 0.21428571 0.1428571 0.2874579
## [3,] 0.75396825 0.6825397 0.8271405
(MultinomCI(counts_invisible$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_invis)
## est lwr.ci upr.ci
## [1,] 0.05555556 0.015873016 0.1093575
## [2,] 0.04761905 0.007936508 0.1014210
## [3,] 0.89682540 0.857142857 0.9506273
ci_low <- c(selection_ci_vis[,2], selection_ci_invis[,2])
ci_up <- c(selection_ci_vis[,3], selection_ci_invis[,3])
plotdata <- counts2
plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up
Plot:
library(scales)
theme_set(theme_light(base_size = 12, base_family = "Poppins"))
g<- ggplot(plotdata,
aes(x = category,
y = pct,
fill = LatentF)) +
facet_grid( ~ LatentF)+
geom_bar(stat = "identity",
position = "dodge") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 3.5,
position = position_dodge(width = 1),
vjust = -2.5) +
scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
#scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Explanatory preference",
x = "Explanatory preference")+
geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct,
# ymin = plotdata$ci_low,
# ymax = plotdata$ci_up,
# colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "none", axis.title = element_text(size = 20), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13),strip.text.x = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories.svg",width=7,height=5)
ggsave("categories.pdf",width=7,height=5)
library(afex)
library(emmeans)
a1 <- aov_car(rating_rec ~ LatentF + Error(subj_code), tdata_long, anova_table = list(es = "pes"))
a1
## Anova Table (Type 3 tests)
##
## Response: rating_rec
## Effect df MSE F pes p.value
## 1 LatentF 1, 250 1.63 10.91 ** .042 .001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
###############
# a follow-up analysis
library(lsmeans)
# means
ls2 <- lsmeans(a1, c("LatentF")) # group means by between-condition
ls2
## LatentF lsmean SE df lower.CL
## feature diagnosability: \nboth similar -0.4762 0.114 250 -0.700
## feature diagnosability: \nlatent feature harder 0.0556 0.114 250 -0.169
## upper.CL
## -0.252
## 0.280
##
## Confidence level used: 0.95
# contrast the strength levels (main effect; averaging over decision level, as there was no sig. interaction)
contrasts <- emmeans(a1, ~ LatentF)
s <- pairs(contrasts, adjust = "none")
s
## contrast
## feature diagnosability: \nboth similar - feature diagnosability: \nlatent feature harder
## estimate SE df t.ratio p.value
## -0.532 0.161 250 -3.304 0.0011
confint(s, level = 0.95)
## contrast
## feature diagnosability: \nboth similar - feature diagnosability: \nlatent feature harder
## estimate SE df lower.CL upper.CL
## -0.532 0.161 250 -0.849 -0.215
##
## Confidence level used: 0.95
t.test(rating_rec ~ LatentF, data = tdata_long, var.equal = F, alternative = "two.sided", paired = F) # two sided is used here to get the full confidence interval of the difference; note, however, that the hypothesis was a directed one (thus, alternative = "less" could have been used to get the one-sided p-value).
##
## Welch Two Sample t-test
##
## data: rating_rec by LatentF
## t = -3.3036, df = 237.19, p-value = 0.001102
## alternative hypothesis: true difference in means between group feature diagnosability:
## both similar and group feature diagnosability:
## latent feature harder is not equal to 0
## 95 percent confidence interval:
## -0.8488346 -0.2146574
## sample estimates:
## mean in group feature diagnosability: \nboth similar
## -0.47619048
## mean in group feature diagnosability: \nlatent feature harder
## 0.05555556