explan_data <- read_delim("subject_explanations.csv", 
    delim = ";", escape_double = FALSE, trim_ws = TRUE)

# remove subject with duplicate entry
explan_data <- explan_data %>% distinct(subj_code, .keep_all = TRUE)

# Note that two rows appear twice, and the duplicates must be deleted

1 Data preparation

Note: Subjects’ explanations were coded in the last columns (the column names describe the coded criterion). 1 means that the coding criterion is (clearly) met, 0 that it isn’t.

# turn explanation coding values into factors 

explan_data <- tdata_long

explan_data$`Correct explanation` <- as.factor(explan_data$`Correct explanation`)

explan_data$`Unclear explanation` <- as.factor(explan_data$`Unclear explanation`)

explan_data$`Inferred absence because of low feature base rate` <- as.factor(explan_data$`Inferred absence because of low feature base rate`)

explan_data$`Inferred absence of latent feature due to visibility` <- as.factor(explan_data$`Inferred absence of latent feature due to visibility`)

2 Explanation Analysis

Proportions of subjects who gave (correct) explanations for why the answer is 50:50

# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
  group_by(`Correct explanation`) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


plotdata_between

## # A tibble: 2 × 4
##   `Correct explanation`     n   pct lbl  
##   <fct>                 <int> <dbl> <chr>
## 1 0                        62  0.31 31%  
## 2 1                       138  0.69 69%

plotdata_sub <- subset(plotdata_between, `Correct explanation` == 1)

plotdata <- plotdata_between

g<- ggplot(plotdata, 
       aes(x = "",
           y = pct,
           fill = `Correct explanation`)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Correct Explanation",
       x = "Features")+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

proportions of subjects who inferred the absence of the unobserved feature due to its low overall probability (inferred evidence account)

# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
  group_by(`Inferred absence because of low feature base rate`) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


plotdata_between

## # A tibble: 2 × 4
##   `Inferred absence because of low feature base rate`     n   pct lbl  
##   <fct>                                               <int> <dbl> <chr>
## 1 0                                                     191 0.955 96%  
## 2 1                                                       9 0.045 4%

plotdata_sub <- subset(plotdata_between, `Inferred absence because of low feature base rate` == 1)

plotdata <- plotdata_between

g<- ggplot(plotdata, 
       aes(x = "",
           y = pct,
           fill = `Inferred absence because of low feature base rate`)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Correct Explanation",
       x = "Features")+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g