explan_data <- read_delim("subject_explanations.csv", 
    delim = ";", escape_double = FALSE, trim_ws = TRUE)

1 Data preparation

##               
##                negative positive
##   probability        50       50
##   satisfaction      104      104

Note: Subjects’ explanations were coded in the last columns (the column names describe the coded criterion). 1 means that the coding criterion is (clearly) met, 0 that it isn’t.

# turn explanation coding values into factors 

explan_data <- tdata_long

explan_data$`Correct explanation` <- as.factor(explan_data$`Correct explanation`)

explan_data$`Unclear explanation` <- as.factor(explan_data$`Unclear explanation`)

explan_data$`explanation focusing on perspective of patient` <- as.factor(explan_data$`explanation focusing on perspective of patient`)

explan_data$`Inferred absence because of low feature base rate` <- as.factor(explan_data$`Inferred absence because of low feature base rate`)

explan_data$`Inferred absence of latent feature due to visibility` <- as.factor(explan_data$`Inferred absence of latent feature due to visibility`)

2 Explanation Analysis

Proportions of subjects in each feature condition who gave (correct) explanations for why the answer is 50:50

# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
  group_by(dv_query, `Correct explanation`) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


plotdata_between

## # A tibble: 4 × 5
## # Groups:   dv_query [2]
##   dv_query     `Correct explanation`     n   pct lbl  
##   <fct>        <fct>                 <int> <dbl> <chr>
## 1 probability  0                        26 0.26  26%  
## 2 probability  1                        74 0.74  74%  
## 3 satisfaction 0                       111 0.534 53.4%
## 4 satisfaction 1                        97 0.466 46.6%

plotdata_sub <- subset(plotdata_between, `Correct explanation` == 1)

plotdata <- plotdata_between

g<- ggplot(plotdata, 
       aes(x = dv_query,
           y = pct,
           fill = `Correct explanation`)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Correct Explanation",
       x = "Features")+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(100, 208), alternative = "greater", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], plotdata$n[4]) out of c(100, 208)
## X-squared = 20.479, df = 1, p-value = 3.015e-06
## alternative hypothesis: greater
## 95 percent confidence interval:
##  0.1817703 1.0000000
## sample estimates:
##    prop 1    prop 2 
## 0.7400000 0.4663462

Proportions of subjects in the different dv_query conditions whose explanation indicated that they were taking the perspective of the patient into account instead of answering which explanation is more likely

# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
  group_by(dv_query, `explanation focusing on perspective of patient`) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


plotdata_between

## # A tibble: 4 × 5
## # Groups:   dv_query [2]
##   dv_query     explanation focusing on perspective of patien…¹     n   pct lbl  
##   <fct>        <fct>                                           <int> <dbl> <chr>
## 1 probability  0                                                  98 0.98  98%  
## 2 probability  1                                                   2 0.02  2%   
## 3 satisfaction 0                                                 166 0.798 80%  
## 4 satisfaction 1                                                  42 0.202 20%  
## # … with abbreviated variable name
## #   ¹`explanation focusing on perspective of patient`

plotdata_sub <- subset(plotdata_between, `explanation focusing on perspective of patient` == 1)

plotdata <- plotdata_between

g<- ggplot(plotdata, 
       aes(x = dv_query,
           y = pct,
           fill = `explanation focusing on perspective of patient`)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Correct Explanation",
       x = "Features")+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(100, 208), alternative = "less", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], plotdata$n[4]) out of c(100, 208)
## X-squared = 18.253, df = 1, p-value = 9.671e-06
## alternative hypothesis: less
## 95 percent confidence interval:
##  -1.0000000 -0.1306743
## sample estimates:
##    prop 1    prop 2 
## 0.0200000 0.2019231

proportions of subjects who inferred the absence of the unobserved feature due to its low overall probability (inferred evidence account)

# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
  group_by(dv_query, `Inferred absence because of low feature base rate`) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


plotdata_between

## # A tibble: 3 × 5
## # Groups:   dv_query [2]
##   dv_query     Inferred absence because of low feature base…¹     n    pct lbl  
##   <fct>        <fct>                                          <int>  <dbl> <chr>
## 1 probability  0                                                100 1      100% 
## 2 satisfaction 0                                                202 0.971  97%  
## 3 satisfaction 1                                                  6 0.0288 3%   
## # … with abbreviated variable name
## #   ¹`Inferred absence because of low feature base rate`

plotdata_sub <- subset(plotdata_between, `Inferred absence because of low feature base rate` == 1)

plotdata <- plotdata_between

g<- ggplot(plotdata, 
       aes(x = dv_query,
           y = pct,
           fill = `Inferred absence because of low feature base rate`)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Correct Explanation",
       x = "Features")+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

prop.test(x = c(0, plotdata$n[3]), n = c(100, 208), alternative = "two.sided", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(0, plotdata$n[3]) out of c(100, 208)
## X-squared = 2.9419, df = 1, p-value = 0.08631
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.051592121 -0.006100187
## sample estimates:
##     prop 1     prop 2 
## 0.00000000 0.02884615