explan_data <- read_delim("subj_explanations_main.csv", 
    delim = ";", escape_double = FALSE, trim_ws = TRUE)

1 Data preparation

##               
##                notKnow
##   featherTooth      50
##   spearNet          50

Note: Subjects’ explanations were coded in the last columns (the column names describe the coded criterion). 1 means that the coding criterion is (clearly) met, 0 that it isn’t.

# turn explanation coding values into factors 

tdata_long$`Inferred absence total` <- tdata_long$`Inferred absence of latent feature due to visibility` + tdata_long$`Inferred absence of latent feature for any reason`

explan_data <- tdata_long

explan_data$`Correct explanation` <- as.factor(explan_data$`Correct explanation`)

explan_data$`Unclear explanation` <- as.factor(explan_data$`Unclear explanation`)

explan_data$`Inferred absence of latent feature due to visibility` <- as.factor(explan_data$`Inferred absence of latent feature due to visibility`)

explan_data$`Inferred absence of latent feature for any reason` <- as.factor(explan_data$`Inferred absence of latent feature for any reason`)

explan_data$`Inferred absence total` <- as.factor(explan_data$`Inferred absence total`)

2 Explanation Analysis

Proportions of subjects in each feature condition who gave (correct) explanations for why the answer is 50:50

# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
  group_by(Features, `Correct explanation`) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


plotdata_between

## # A tibble: 4 × 5
## # Groups:   Features [2]
##   Features           `Correct explanation`     n   pct lbl  
##   <fct>              <fct>                 <int> <dbl> <chr>
## 1 Spear and net      0                        15  0.3  30%  
## 2 Spear and net      1                        35  0.7  70%  
## 3 Feathers and tooth 0                         2  0.04 4%   
## 4 Feathers and tooth 1                        48  0.96 96%

plotdata_sub <- subset(plotdata_between, `Correct explanation` == 0)

plotdata <- plotdata_between

g<- ggplot(plotdata, 
       aes(x = Features,
           y = pct,
           fill = `Correct explanation`)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Correct Explanation",
       x = "Features")+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(50, 50), alternative = "less", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], plotdata$n[4]) out of c(50, 50)
## X-squared = 11.977, df = 1, p-value = 0.0002693
## alternative hypothesis: less
## 95 percent confidence interval:
##  -1.0000000 -0.1440641
## sample estimates:
## prop 1 prop 2 
##   0.70   0.96

Proportions of subjects in the different feature conditions whose explanation indicated that the unobserved feature is more likely to be absent because of “feature visibility” (i.e., because it should have been observed if it was actually present)

# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
  group_by(Features, `Inferred absence of latent feature due to visibility`) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


plotdata_between

## # A tibble: 3 × 5
## # Groups:   Features [2]
##   Features           Inferred absence of latent feature due …¹     n   pct lbl  
##   <fct>              <fct>                                     <int> <dbl> <chr>
## 1 Spear and net      0                                            43  0.86 86%  
## 2 Spear and net      1                                             7  0.14 14%  
## 3 Feathers and tooth 0                                            50  1    100% 
## # … with abbreviated variable name
## #   ¹`Inferred absence of latent feature due to visibility`

plotdata_sub <- subset(plotdata_between, `Inferred absence of latent feature due to visibility` == 1)

plotdata <- plotdata_between

g<- ggplot(plotdata, 
       aes(x = Features,
           y = pct,
           fill = `Inferred absence of latent feature due to visibility`)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Inferred absence of \nunobserved feature \ndue to visibility",
       x = "Features")+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

prop.test(x = c(plotdata$n[2], 0), n = c(50, 50), alternative = "greater", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], 0) out of c(50, 50)
## X-squared = 7.5269, df = 1, p-value = 0.003039
## alternative hypothesis: greater
## 95 percent confidence interval:
##  0.05928477 1.00000000
## sample estimates:
## prop 1 prop 2 
##   0.14   0.00

proportions of subjects who inferred the absence of the unobserved feature for whatever reason

# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
  group_by(Features, `Inferred absence total`) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


plotdata_between

## # A tibble: 3 × 5
## # Groups:   Features [2]
##   Features           `Inferred absence total`     n   pct lbl  
##   <fct>              <fct>                    <int> <dbl> <chr>
## 1 Spear and net      0                           40   0.8 80%  
## 2 Spear and net      1                           10   0.2 20%  
## 3 Feathers and tooth 0                           50   1   100%

plotdata_sub <- subset(plotdata_between, `Inferred absence total` == 1)

plotdata <- plotdata_between

g<- ggplot(plotdata, 
       aes(x = Features,
           y = pct,
           fill = `Inferred absence total`)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Inferred absence of unobserved feature (total)",
       x = "Features")+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

prop.test(x = c(plotdata$n[2], 0), n = c(50, 50), alternative = "greater", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], 0) out of c(50, 50)
## X-squared = 11.111, df = 1, p-value = 0.0004291
## alternative hypothesis: greater
## 95 percent confidence interval:
##  0.106953 1.000000
## sample estimates:
## prop 1 prop 2 
##    0.2    0.0