explan_data <- read_delim("subject_explanations.csv", 
    delim = ";", escape_double = FALSE, trim_ws = TRUE)

1 Data preparation

Note: Subjects’ explanations were coded in the last columns (the column names describe the coded criterion). 1 means that the coding criterion is (clearly) met, 0 that it isn’t.

# turn explanation coding values into factors 

explan_data <- tdata_long

explan_data$`Correct explanation` <- as.factor(explan_data$`Correct explanation`)

explan_data$`Unclear explanation` <- as.factor(explan_data$`Unclear explanation`)

explan_data$`Inferred absence because of low feature base rate` <- as.factor(explan_data$`Inferred absence because of low feature base rate`)

explan_data$`Inferred absence of latent feature` <- as.factor(explan_data$`Inferred absence of latent feature`)

2 Explanation Analysis

Proportions of subjects who gave (correct) explanations for why the answer is 50:50

# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
  group_by(prevalence, `Correct explanation`) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


plotdata_between

## # A tibble: 4 × 5
## # Groups:   prevalence [2]
##   prevalence `Correct explanation`     n   pct lbl  
##   <chr>      <fct>                 <int> <dbl> <chr>
## 1 omitted    0                       109 0.389 39%  
## 2 omitted    1                       171 0.611 61%  
## 3 presented  0                       103 0.368 37%  
## 4 presented  1                       177 0.632 63%

plotdata_sub <- subset(plotdata_between, `Correct explanation` == 1)

plotdata <- plotdata_between

g<- ggplot(plotdata, 
       aes(x = "",
           y = pct,
           fill = `Correct explanation`)) +
  facet_grid( ~ prevalence)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Correct Explanation",
       x = "Condition")+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

normative <- prop.test(x = c(plotdata_between$n[2], plotdata_between$n[4]), n = c(280, 280), alternative = "two.sided", correct = F)
normative

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata_between$n[2], plotdata_between$n[4]) out of c(280, 280)
## X-squared = 0.27326, df = 1, p-value = 0.6012
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.10175299  0.05889585
## sample estimates:
##    prop 1    prop 2 
## 0.6107143 0.6321429

proportions of subjects who inferred the absence of the unobserved feature due to its low overall probability (inferred evidence account)

# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
  group_by(prevalence, `Inferred absence because of low feature base rate`) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


plotdata_between

## # A tibble: 3 × 5
## # Groups:   prevalence [2]
##   prevalence Inferred absence because of low feature base r…¹     n    pct lbl  
##   <chr>      <fct>                                            <int>  <dbl> <chr>
## 1 omitted    0                                                  280 1      100% 
## 2 presented  0                                                  268 0.957  96%  
## 3 presented  1                                                   12 0.0429 4%   
## # … with abbreviated variable name
## #   ¹`Inferred absence because of low feature base rate`

plotdata_sub <- subset(plotdata_between, `Inferred absence because of low feature base rate` == 1)

plotdata <- plotdata_between

g<- ggplot(plotdata, 
       aes(x = "",
           y = pct,
           fill = `Inferred absence because of low feature base rate`)) +
  facet_grid( ~ prevalence)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Correct Explanation",
       x = "Features")+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

rate <- prop.test(x = c(0, plotdata_between$n[3]), n = c(280, 280), alternative = "less", correct = F)
rate

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(0, plotdata_between$n[3]) out of c(280, 280)
## X-squared = 12.263, df = 1, p-value = 0.000231
## alternative hypothesis: less
## 95 percent confidence interval:
##  -1.0000000 -0.0229482
## sample estimates:
##     prop 1     prop 2 
## 0.00000000 0.04285714