tdata <- read_csv("Supp_Exp_data.csv")
#tdata <- read.table("dummy_dat.csv", sep=';', header = T)
#tdata <- subset(tdata, subj_code != "RhoYo25w8umI") # This person needs to be excluded because they definitely did not understand the task
# demographics
min(tdata$age)
## [1] 19
max(tdata$age)
## [1] 78
mean(tdata$age)
## [1] 35.34028
sd(tdata$age)
## [1] 13.48045
# 1 = male, 2 = female, 3 = other
table(tdata$gender)
##
## 1: male 2: female 3: non-binary
## 35 106 2
## 4: prefer not to say
## 1
1 = male, 2 = female, 3 = non-binary, 4 = prefer not to say
## , , = Lian
##
##
## Vilosa Pylium
## no info 12 12
## 'don't know' 12 12
## unreadable 12 12
##
## , , = Gludon
##
##
## Vilosa Pylium
## no info 12 12
## 'don't know' 12 12
## unreadable 12 12
myTheme <- theme(plot.title = element_text(face="bold", size = 22),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
#axis.text.x = element_text(size = 14, angle = 30, vjust = 0.7),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 16, angle = 0),
legend.text = element_text(size = 18),
legend.title = element_text(face = "bold", size = 18),
strip.text.x = element_text(size = 18),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line.x = element_line(colour = "black"),
axis.line.y = element_line(colour = "black"),
axis.text = element_text(colour ="black"),
axis.ticks = element_line(colour ="black"))
tdata_sub <- tdata_long
library(see)
## first, turn sID into a factor
tdata_sub$subj_code <- factor(tdata_sub$subj_code)
pd <- position_dodge(width = 0.3)
tdata_sub$valueJitter <- jitter(tdata_sub$rating_rec, factor = 1, amount = 0.04)
theme_set(theme_light(base_size = 20, base_family = "Poppins"))
# new labes for the facets
g <- ggplot(tdata_sub, aes(x = source, y = valueJitter)) +
guides(fill=FALSE)+
#facet_grid( ~ Mutation1, Effects1)+
#ggtitle("Subjects' causal srength ratings") +
scale_y_continuous(limits = c(-5.3, 5.3), breaks=seq(-5, 5, 1), expand = c(0,0)) +
#scale_x_discrete(labels=c("Single-effect \ncause", "Common \ncause", "No \ncause")) +
#stat_summary(fun.y = mean, geom = "bar", position = "dodge", colour = "black", alpha =0.5) +
geom_violinhalf(aes(y = rating_rec, group = source, fill = source), color = NA, position=position_dodge(1), alpha = 0.4)+
#geom_line(position = pd, color = "black", size = 1, alpha=0.04) +
geom_hline(yintercept=0, linetype="dashed", color = "black")+
geom_jitter(aes(color = source), alpha = 0.5, width = 0.15, height = 0.2) +
stat_summary(aes(y = rating_rec,group=1), fun.data = mean_cl_boot, geom = "errorbar", width = 0, size = 1) +
stat_summary(aes(y = rating_rec,group=1, fill = source), fun.y=mean, geom="point", color = "black", shape = 22, size = 2, group=1, alpha = 1)+
stat_summary(aes(y = rating_rec,group=1), fun.y=median, geom="point", color = "black", shape = 3, size = 4,
group=1, alpha = 1, position = position_dodge(width = 0.5))+
labs(x = "Information about latent feature", y = "Explanation rating") +
scale_color_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
annotate("text", x = 0.5, y = 3.5, label = c("broad-scope"), angle = 90)+
annotate("text", x = 0.5, y = -3.5, label = c("narrow-scope"), angle = 90)+
theme(legend.position = "none")+
myTheme+
theme(panel.grid.major = element_line(color = "lightgrey",
size = 0.5,
linetype = 'dotted'))+
stat_summary(aes(label=round(after_stat(y),2)), fun.y=mean, geom="text", size=5,
vjust = -6)
g
ggsave("results_means.svg",width=5.5,height=5)
ggsave("results_means.pdf",width=5.5, height=5)
Use the ggplot_build package to see a table with the means and CI values plotted in the graph:
values <- ggplot_build(g)$data[[4]] # values are shown in the 4th panel
values
## x group y ymin ymax PANEL flipped_aes xmin xmax colour
## 1 1 1 -0.8333333 -1.2916667 -0.37500000 1 FALSE 1 1 black
## 2 2 1 -0.5416667 -1.0416667 0.02083333 1 FALSE 2 2 black
## 3 3 1 -0.1666667 -0.4583333 0.10416667 1 FALSE 3 3 black
## linewidth linetype width alpha
## 1 1 1 0 NA
## 2 1 1 0 NA
## 3 1 1 0 NA
library(rcompanion)
ci_table <- groupwiseMean(rating_rec ~ source,
data = tdata_long,
traditional = FALSE,
percentile = TRUE)
ci_table
## source n Mean Conf.level Percentile.lower Percentile.upper
## 1 no info 48 -0.833 0.95 -1.310 -0.3960
## 2 'don't know' 48 -0.542 0.95 -1.060 -0.0422
## 3 unreadable 48 -0.167 0.95 -0.479 0.1040
(ci_width <- ci_table$Percentile.upper - ci_table$Percentile.lower)
## [1] 0.9140 1.0178 0.5830
get group medians:
library(rcompanion)
# groupwiseMean(rating_rec ~ Features + Knowledge,
# data = tdata_long,
# traditional = FALSE,
# percentile = TRUE)
groupwiseMedian(rating_rec ~ source,
data = tdata_long,
bca = FALSE,
percentile = TRUE,
R = 1000)
## source n Median Conf.level Percentile.lower Percentile.upper
## 1 no info 48 0 0.95 0 0
## 2 'don't know' 48 0 0.95 0 0
## 3 unreadable 48 0 0.95 0 0
Count the different ratings observed:
counts <- tdata_long %>%
group_by(source, rating_rec) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
counts
## # A tibble: 21 × 5
## # Groups: source [3]
## source rating_rec n pct lbl
## <fct> <dbl> <int> <dbl> <chr>
## 1 no info -5 3 0.0625 6.2%
## 2 no info -4 3 0.0625 6.2%
## 3 no info -3 2 0.0417 4.2%
## 4 no info -2 4 0.0833 8.3%
## 5 no info -1 1 0.0208 2.1%
## 6 no info 0 34 0.708 70.8%
## 7 no info 2 1 0.0208 2.1%
## 8 'don't know' -5 2 0.0417 4.2%
## 9 'don't know' -4 2 0.0417 4.2%
## 10 'don't know' -3 2 0.0417 4.2%
## # … with 11 more rows
Make a linear trend analysis to test if bias becomes smaller across conditions:
contrasts(tdata_long$source) <- "contr.poly" # fits a polynomial contrast
LinearModel <- lm(rating_rec ~ source, data=tdata_long) # runs a linear model with the corresponding contrast coding
summary(LinearModel)
##
## Call:
## lm(formula = rating_rec ~ source, data = tdata_long)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8333 0.1667 0.1667 0.8333 5.5417
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.51389 0.12773 -4.023 9.32e-05 ***
## source.L 0.47140 0.22123 2.131 0.0348 *
## source.Q 0.03402 0.22123 0.154 0.8780
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.533 on 141 degrees of freedom
## Multiple R-squared: 0.03135, Adjusted R-squared: 0.01761
## F-statistic: 2.282 on 2 and 141 DF, p-value: 0.1058
Make a plot with the different rating categories:
tdata_long$category[tdata_long$rating_rec < 0] <- "narrow"
tdata_long$category[tdata_long$rating_rec == 0] <- "unbiased"
tdata_long$category[tdata_long$rating_rec > 0] <- "broad"
counts2 <- tdata_long %>%
group_by(source, category) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
counts2
## # A tibble: 9 × 5
## # Groups: source [3]
## source category n pct lbl
## <fct> <chr> <int> <dbl> <chr>
## 1 no info broad 1 0.0208 2%
## 2 no info narrow 13 0.271 27%
## 3 no info unbiased 34 0.708 71%
## 4 'don't know' broad 3 0.0625 6%
## 5 'don't know' narrow 14 0.292 29%
## 6 'don't know' unbiased 31 0.646 65%
## 7 unreadable broad 2 0.0417 4.2%
## 8 unreadable narrow 6 0.125 12.5%
## 9 unreadable unbiased 40 0.833 83.3%
counts2$category <- factor(counts2$category, levels = c("unbiased", "narrow", "broad"), labels = c("unbiased", "narrow l.s.", "broad l.s."))
Get proportion CIs for different conditions:
library(PropCIs)
library(DescTools)
library(purrr)
counts_noinfo <- subset(counts2, source == "no info")
counts_dontknow <- subset(counts2, source == "'don't know'")
counts_unreadable <- subset(counts2, source == "unreadable")
(MultinomCI(counts_noinfo$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_noinf)
## est lwr.ci upr.ci
## [1,] 0.02083333 0.0000000 0.1605639
## [2,] 0.27083333 0.1666667 0.4105639
## [3,] 0.70833333 0.6041667 0.8480639
(MultinomCI(counts_dontknow$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_dontknow)
## est lwr.ci upr.ci
## [1,] 0.0625000 0.0000000 0.1985005
## [2,] 0.2916667 0.1666667 0.4276671
## [3,] 0.6458333 0.5208333 0.7818338
(MultinomCI(counts_unreadable$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_unreadable)
## est lwr.ci upr.ci
## [1,] 0.04166667 0.00000000 0.1462200
## [2,] 0.12500000 0.04166667 0.2295533
## [3,] 0.83333333 0.75000000 0.9378867
ci_low <- c(selection_ci_noinf[,2], selection_ci_dontknow[,2], selection_ci_unreadable[,2])
ci_up <- c(selection_ci_noinf[,3], selection_ci_dontknow[,3], selection_ci_unreadable[,3])
plotdata <- counts2
plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up
Plot:
plotdata <- counts2
library(scales)
theme_set(theme_light(base_size = 12, base_family = "Poppins"))
g<- ggplot(plotdata,
aes(x = category,
y = pct,
fill = category)) +
facet_grid( ~ source)+
geom_bar(stat = "identity",
position = "dodge") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 3.5,
position = position_dodge(width = 1),
vjust = -3) +
scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
#scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Explanatory preference",
x = "Explanatory preference")+
geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct,
# ymin = plotdata$ci_low,
# ymax = plotdata$ci_up,
# colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "none", axis.title = element_text(size = 20), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13), strip.text.x = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories.svg",width=10,height=5)
ggsave("categories.pdf",width=10,height=5)
Test if more subjects show the bias in the “don’t know” as copmared to the “unreadable” condition.
prop.test(x = c(plotdata$n[8], plotdata$n[5]), n = c(48, 48), alternative = "less", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[8], plotdata$n[5]) out of c(48, 48)
## X-squared = 4.0421, df = 1, p-value = 0.02219
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.00000000 -0.03321282
## sample estimates:
## prop 1 prop 2
## 0.1250000 0.2916667
As predicted, proportion of subjects committing the narrow latent scope bias is significantly smaller in the “explanation” condition (unreadable).
Also test if more subjects show the bias in the “no information condition” than in the “unreadable condition”.
prop.test(x = c(plotdata$n[8], plotdata$n[2]), n = c(48, 48), alternative = "less", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[8], plotdata$n[2]) out of c(48, 48)
## X-squared = 3.2153, df = 1, p-value = 0.03648
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.00000000 -0.01431839
## sample estimates:
## prop 1 prop 2
## 0.1250000 0.2708333