explan_data <- read_delim("subject_explanations.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
Note: Subjects’ explanations were coded in the last columns (the column names describe the coded criterion). 1 means that the coding criterion is (clearly) met, 0 that it isn’t.
# turn explanation coding values into factors
explan_data <- tdata_long
explan_data$`Correct explanation` <- as.factor(explan_data$`Correct explanation`)
explan_data$`Unclear explanation` <- as.factor(explan_data$`Unclear explanation`)
explan_data$`explanation focusing on perspective of patient` <- as.factor(explan_data$`explanation focusing on perspective of patient`)
explan_data$`Inferred absence because of low feature base rate` <- as.factor(explan_data$`Inferred absence because of low feature base rate`)
explan_data$`Inferred absence of latent feature due to visibility` <- as.factor(explan_data$`Inferred absence of latent feature due to visibility`)
Please note: reported below are proportion tests that didn’t apply Yate’s correction, but even if Yate’s correction were to be applied, it wouldn’t turn any of the significant results into non-significant results and vice versa. As whether or not Yate’s correction should be applied is debated, it was decided to report the tests without Yate’s continuity correction.
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(dv_query, `Correct explanation`) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
plotdata_between
## # A tibble: 6 × 5
## # Groups: dv_query [3]
## dv_query `Correct explanation` n pct lbl
## <fct> <fct> <int> <dbl> <chr>
## 1 probability 0 71 0.296 30%
## 2 probability 1 169 0.704 70%
## 3 satisfaction_would 0 117 0.488 48.8%
## 4 satisfaction_would 1 123 0.512 51.2%
## 5 satisfaction_is 0 101 0.421 42%
## 6 satisfaction_is 1 139 0.579 58%
plotdata_sub <- subset(plotdata_between, `Correct explanation` == 1)
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = dv_query,
y = pct,
fill = `Correct explanation`)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Correct Explanation",
x = "Features")+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
probability vs. satisfaction_would
prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(240, 240), alternative = "two.sided", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[4]) out of c(240, 240)
## X-squared = 18.502, df = 1, p-value = 1.697e-05
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.1060317 0.2773016
## sample estimates:
## prop 1 prop 2
## 0.7041667 0.5125000
probability vs. satisfaction_is
prop.test(x = c(plotdata$n[2], plotdata$n[6]), n = c(240, 240), alternative = "two.sided", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[6]) out of c(240, 240)
## X-squared = 8.1546, df = 1, p-value = 0.004295
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.03993811 0.21006189
## sample estimates:
## prop 1 prop 2
## 0.7041667 0.5791667
satisfaction_is vs. satisfaction would
prop.test(x = c(plotdata$n[4], plotdata$n[6]), n = c(240, 240), alternative = "less", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[4], plotdata$n[6]) out of c(240, 240)
## X-squared = 2.1514, df = 1, p-value = 0.07122
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.000000000 0.007926462
## sample estimates:
## prop 1 prop 2
## 0.5125000 0.5791667
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(dv_query, `explanation focusing on perspective of patient`) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
plotdata_between
## # A tibble: 5 × 5
## # Groups: dv_query [3]
## dv_query explanation focusing on perspective of…¹ n pct lbl
## <fct> <fct> <int> <dbl> <chr>
## 1 probability 0 240 1 100%
## 2 satisfaction_would 0 208 0.867 87%
## 3 satisfaction_would 1 32 0.133 13%
## 4 satisfaction_is 0 227 0.946 95%
## 5 satisfaction_is 1 13 0.0542 5%
## # … with abbreviated variable name
## # ¹`explanation focusing on perspective of patient`
plotdata_sub <- subset(plotdata_between, `explanation focusing on perspective of patient` == 1)
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = dv_query,
y = pct,
fill = `explanation focusing on perspective of patient`)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Correct Explanation",
x = "Features")+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
probability vs. satisfaction_is
prop.test(x = c(0, plotdata$n[3]), n = c(240, 240), alternative = "less", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(0, plotdata$n[3]) out of c(240, 240)
## X-squared = 34.286, df = 1, p-value = 2.379e-09
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.00000000 -0.09724083
## sample estimates:
## prop 1 prop 2
## 0.0000000 0.1333333
probability vs. satisfaction_would
prop.test(x = c(0, plotdata$n[5]), n = c(240, 240), alternative = "less", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(0, plotdata$n[5]) out of c(240, 240)
## X-squared = 13.362, df = 1, p-value = 0.0001284
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.00000000 -0.03013439
## sample estimates:
## prop 1 prop 2
## 0.00000000 0.05416667
satisfaction_would vs. satisfaction_is
prop.test(x = c(plotdata$n[3], plotdata$n[5]), n = c(240, 240), alternative = "greater", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[3], plotdata$n[5]) out of c(240, 240)
## X-squared = 8.8521, df = 1, p-value = 0.001464
## alternative hypothesis: greater
## 95 percent confidence interval:
## 0.03580517 1.00000000
## sample estimates:
## prop 1 prop 2
## 0.13333333 0.05416667
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(dv_query, `Inferred absence because of low feature base rate`) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
plotdata_between
## # A tibble: 6 × 5
## # Groups: dv_query [3]
## dv_query Inferred absence because of low featu…¹ n pct lbl
## <fct> <fct> <int> <dbl> <chr>
## 1 probability 0 238 0.992 99%
## 2 probability 1 2 0.00833 1%
## 3 satisfaction_would 0 236 0.983 98%
## 4 satisfaction_would 1 4 0.0167 2%
## 5 satisfaction_is 0 235 0.979 98%
## 6 satisfaction_is 1 5 0.0208 2%
## # … with abbreviated variable name
## # ¹`Inferred absence because of low feature base rate`
plotdata_sub <- subset(plotdata_between, `Inferred absence because of low feature base rate` == 1)
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = dv_query,
y = pct,
fill = `Inferred absence because of low feature base rate`)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Correct Explanation",
x = "Features")+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
probability vs. satisfaction_is
prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(240, 240), alternative = "two.sided", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[4]) out of c(240, 240)
## X-squared = 0.67511, df = 1, p-value = 0.4113
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.02819773 0.01153106
## sample estimates:
## prop 1 prop 2
## 0.008333333 0.016666667
probability vs. satisfaction_would
prop.test(x = c(plotdata$n[2], plotdata$n[6]), n = c(240, 240), alternative = "two.sided", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[6]) out of c(240, 240)
## X-squared = 1.3047, df = 1, p-value = 0.2533
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.033919275 0.008919275
## sample estimates:
## prop 1 prop 2
## 0.008333333 0.020833333
satisfaction_would vs. satisfaction_is
prop.test(x = c(plotdata$n[4], plotdata$n[6]), n = c(240, 240), alternative = "two.sided", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[4], plotdata$n[6]) out of c(240, 240)
## X-squared = 0.11323, df = 1, p-value = 0.7365
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.02843258 0.02009925
## sample estimates:
## prop 1 prop 2
## 0.01666667 0.02083333