explan_data <- read_delim("subject_explanations.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
Note: Subjects’ explanations were coded in the last columns (the column names describe the coded criterion). 1 means that the coding criterion is (clearly) met, 0 that it isn’t.
# turn explanation coding values into factors
explan_data <- tdata_long
explan_data$`Correct explanation` <- as.factor(explan_data$`Correct explanation`)
explan_data$`Unclear explanation` <- as.factor(explan_data$`Unclear explanation`)
explan_data$`Inferred absence because of low feature base rate` <- as.factor(explan_data$`Inferred absence because of low feature base rate`)
explan_data$`Inferred absence of latent feature` <- as.factor(explan_data$`Inferred absence of latent feature`)
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(prevalence, `Correct explanation`) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
plotdata_between
## # A tibble: 4 × 5
## # Groups: prevalence [2]
## prevalence `Correct explanation` n pct lbl
## <chr> <fct> <int> <dbl> <chr>
## 1 omitted 0 109 0.389 39%
## 2 omitted 1 171 0.611 61%
## 3 presented 0 103 0.368 37%
## 4 presented 1 177 0.632 63%
plotdata_sub <- subset(plotdata_between, `Correct explanation` == 1)
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = "",
y = pct,
fill = `Correct explanation`)) +
facet_grid( ~ prevalence)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Correct Explanation",
x = "Condition")+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
normative <- prop.test(x = c(plotdata_between$n[2], plotdata_between$n[4]), n = c(280, 280), alternative = "two.sided", correct = F)
normative
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata_between$n[2], plotdata_between$n[4]) out of c(280, 280)
## X-squared = 0.27326, df = 1, p-value = 0.6012
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.10175299 0.05889585
## sample estimates:
## prop 1 prop 2
## 0.6107143 0.6321429
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(prevalence, `Inferred absence because of low feature base rate`) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
plotdata_between
## # A tibble: 3 × 5
## # Groups: prevalence [2]
## prevalence Inferred absence because of low feature base r…¹ n pct lbl
## <chr> <fct> <int> <dbl> <chr>
## 1 omitted 0 280 1 100%
## 2 presented 0 268 0.957 96%
## 3 presented 1 12 0.0429 4%
## # … with abbreviated variable name
## # ¹`Inferred absence because of low feature base rate`
plotdata_sub <- subset(plotdata_between, `Inferred absence because of low feature base rate` == 1)
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = "",
y = pct,
fill = `Inferred absence because of low feature base rate`)) +
facet_grid( ~ prevalence)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Correct Explanation",
x = "Features")+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
rate <- prop.test(x = c(0, plotdata_between$n[3]), n = c(280, 280), alternative = "less", correct = F)
rate
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(0, plotdata_between$n[3]) out of c(280, 280)
## X-squared = 12.263, df = 1, p-value = 0.000231
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.0000000 -0.0229482
## sample estimates:
## prop 1 prop 2
## 0.00000000 0.04285714