explan_data <- read_delim("subject_explanations.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
##
## negative positive
## probability 50 50
## satisfaction 104 104
Note: Subjects’ explanations were coded in the last columns (the column names describe the coded criterion). 1 means that the coding criterion is (clearly) met, 0 that it isn’t.
# turn explanation coding values into factors
explan_data <- tdata_long
explan_data$`Correct explanation` <- as.factor(explan_data$`Correct explanation`)
explan_data$`Unclear explanation` <- as.factor(explan_data$`Unclear explanation`)
explan_data$`explanation focusing on perspective of patient` <- as.factor(explan_data$`explanation focusing on perspective of patient`)
explan_data$`Inferred absence because of low feature base rate` <- as.factor(explan_data$`Inferred absence because of low feature base rate`)
explan_data$`Inferred absence of latent feature due to visibility` <- as.factor(explan_data$`Inferred absence of latent feature due to visibility`)
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(dv_query, `Correct explanation`) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
plotdata_between
## # A tibble: 4 × 5
## # Groups: dv_query [2]
## dv_query `Correct explanation` n pct lbl
## <fct> <fct> <int> <dbl> <chr>
## 1 probability 0 26 0.26 26%
## 2 probability 1 74 0.74 74%
## 3 satisfaction 0 111 0.534 53.4%
## 4 satisfaction 1 97 0.466 46.6%
plotdata_sub <- subset(plotdata_between, `Correct explanation` == 1)
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = dv_query,
y = pct,
fill = `Correct explanation`)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Correct Explanation",
x = "Features")+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(100, 208), alternative = "greater", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[4]) out of c(100, 208)
## X-squared = 20.479, df = 1, p-value = 3.015e-06
## alternative hypothesis: greater
## 95 percent confidence interval:
## 0.1817703 1.0000000
## sample estimates:
## prop 1 prop 2
## 0.7400000 0.4663462
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(dv_query, `explanation focusing on perspective of patient`) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
plotdata_between
## # A tibble: 4 × 5
## # Groups: dv_query [2]
## dv_query explanation focusing on perspective of patien…¹ n pct lbl
## <fct> <fct> <int> <dbl> <chr>
## 1 probability 0 98 0.98 98%
## 2 probability 1 2 0.02 2%
## 3 satisfaction 0 166 0.798 80%
## 4 satisfaction 1 42 0.202 20%
## # … with abbreviated variable name
## # ¹`explanation focusing on perspective of patient`
plotdata_sub <- subset(plotdata_between, `explanation focusing on perspective of patient` == 1)
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = dv_query,
y = pct,
fill = `explanation focusing on perspective of patient`)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Correct Explanation",
x = "Features")+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(100, 208), alternative = "less", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[4]) out of c(100, 208)
## X-squared = 18.253, df = 1, p-value = 9.671e-06
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.0000000 -0.1306743
## sample estimates:
## prop 1 prop 2
## 0.0200000 0.2019231
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(dv_query, `Inferred absence because of low feature base rate`) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
plotdata_between
## # A tibble: 3 × 5
## # Groups: dv_query [2]
## dv_query Inferred absence because of low feature base…¹ n pct lbl
## <fct> <fct> <int> <dbl> <chr>
## 1 probability 0 100 1 100%
## 2 satisfaction 0 202 0.971 97%
## 3 satisfaction 1 6 0.0288 3%
## # … with abbreviated variable name
## # ¹`Inferred absence because of low feature base rate`
plotdata_sub <- subset(plotdata_between, `Inferred absence because of low feature base rate` == 1)
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = dv_query,
y = pct,
fill = `Inferred absence because of low feature base rate`)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Correct Explanation",
x = "Features")+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
prop.test(x = c(0, plotdata$n[3]), n = c(100, 208), alternative = "two.sided", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(0, plotdata$n[3]) out of c(100, 208)
## X-squared = 2.9419, df = 1, p-value = 0.08631
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.051592121 -0.006100187
## sample estimates:
## prop 1 prop 2
## 0.00000000 0.02884615