1 Analysis of subjects’ explanations

1.1 What subjects would have done if they hadn’t been forced to make a choice

library(readr)
explan_data <- read_delim("subjects_explanations_analysis_main.csv", 
    delim = ";", escape_double = FALSE, trim_ws = TRUE)

## Rows: 80 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (5): subj_code, explanation, rating_rec, Features, Knowledge
## dbl (7): different explanation, no explanation or unclear, misunderstood sce...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

colnames(explan_data)[3]  <- "different_explanation"        # subjects who gave an explanation that seemed to have nothing to do with visibility 
colnames(explan_data)[4]  <- "no_explanation"               # subjects who either gave no explanation for what they would do, or their explanation was unclear
colnames(explan_data)[5]  <- "misunderstood_scenario"       # subjects who wrote something that indicates that they didn't understand the scenario correctly
colnames(explan_data)[6]  <- "equally_likely"               # subjects who wrote explanations saying that 50:50 would have been their preferred answer
colnames(explan_data)[7]  <- "absence_because_visible"      # subjects who inferred the absence of the latent effect because it should have been visible
colnames(explan_data)[8]  <- "absence_because_description"  # subjects who inferred the absence of the latent effect for any other reason 
colnames(explan_data)[9]  <- "inferred_absence"             # overall number of subjects who inferred the absence of the latent feature 


# "nm" below means "not meet"; expressing the the criterion is not fulfilled

explan_data$different_explanation[explan_data$different_explanation == 1] <- "different_explanation"
explan_data$different_explanation[is.na(explan_data$different_explanation)] <- "nm"

explan_data$different_explanation <- factor(explan_data$different_explanation, levels = c("nm", "different_explanation"), 
                                     labels = c("nm", "different explanation"))

explan_data$no_explanation[explan_data$no_explanation == 1] <- "no_explan"
explan_data$no_explanation[is.na(explan_data$no_explanation)] <- "nm"

explan_data$no_explanation <- factor(explan_data$no_explanation, levels = c("nm", "no_explan"), 
                                     labels = c("nm", "no explanation or unclear"))

explan_data$misunderstood_scenario[explan_data$misunderstood_scenario == 1] <- "misunderstood"
explan_data$misunderstood_scenario[is.na(explan_data$misunderstood_scenario)] <- "nm"

explan_data$misunderstood_scenario <- factor(explan_data$misunderstood_scenario, levels = c("nm", "misunderstood"), 
                                     labels = c("nm", "misunderstood scenario"))


explan_data$equally_likely[explan_data$equally_likely == 1] <- "equally_likely"
explan_data$equally_likely[is.na(explan_data$equally_likely)] <- "nm"


explan_data$equally_likely <- factor(explan_data$equally_likely, levels = c("nm", "equally_likely"), 
                                     labels = c("nm", "equally likely"))



explan_data$absence_because_visible[explan_data$absence_because_visible == 1] <- "yes"
explan_data$absence_because_visible[is.na(explan_data$absence_because_visible)] <- "nm"


explan_data$absence_because_visible <- factor(explan_data$absence_because_visible, levels = c("nm", "yes"), 
                                     labels = c("nm", "absence because it should be visible"))



explan_data$absence_because_description[explan_data$absence_because_description == 1] <- "yes"
explan_data$absence_because_description[is.na(explan_data$absence_because_description)] <- "nm"


explan_data$absence_because_description <- factor(explan_data$absence_because_description, levels = c("nm", "yes"), 
                                     labels = c("nm", "absence given the description"))



explan_data$inferred_absence[explan_data$inferred_absence == 1] <- "yes"
explan_data$inferred_absence[is.na(explan_data$inferred_absence)] <- "nm"

explan_data$inferred_absence <- factor(explan_data$inferred_absence, levels = c("nm", "yes"),
                                       labels = c("nm", "inferred absence"))

1.2 subjects giving no explanation or unclear explanation

# create a summary dataset that also contains the percentages

plotdata_between <- explan_data %>%
  group_by(Features, no_explanation) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.

plotdata_between

## # A tibble: 4 × 5
## # Groups:   Features [2]
##   Features     no_explanation                n   pct lbl  
##   <chr>        <fct>                     <int> <dbl> <chr>
## 1 featherTooth nm                           23 0.575 57%  
## 2 featherTooth no explanation or unclear    17 0.425 42%  
## 3 spearNet     nm                           29 0.725 72%  
## 4 spearNet     no explanation or unclear    11 0.275 28%

plotdata_sub <- subset(plotdata_between, no_explanation == "no explanation or unclear")

plotdata <- plotdata_between



g<- ggplot(plotdata, 
       aes(x = Features,
           y = pct,
           fill = no_explanation)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Inferred state",
       x = "Features",
       title = "Subjects' inference about latent feature (overall)")+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct, 
   #        ymin = 1 - plotdata_sub$pct - plotdata_sub$CI, 
    #       ymax = 1- plotdata_sub$pct + plotdata_sub$CI, 
     #      colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

#ggsave("selections.pdf",width=6,height=5)
#ggsave("absence_overall.svg",width=6,height=5)

prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "two.sided", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 1.978, df = 1, p-value = 0.1596
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.05643688  0.35643688
## sample estimates:
## prop 1 prop 2 
##  0.425  0.275

1.3 Subjects clearly stating they’d prefer to answer 50:50

# create a summary dataset that also contains the percentages

plotdata_between <- explan_data %>%
  group_by(Features, equally_likely) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.

plotdata_between

## # A tibble: 4 × 5
## # Groups:   Features [2]
##   Features     equally_likely     n   pct lbl  
##   <chr>        <fct>          <int> <dbl> <chr>
## 1 featherTooth nm                24 0.6   60%  
## 2 featherTooth equally likely    16 0.4   40%  
## 3 spearNet     nm                27 0.675 68%  
## 4 spearNet     equally likely    13 0.325 32%

plotdata_sub <- subset(plotdata_between, equally_likely == "equally likely")

plotdata <- plotdata_between



g<- ggplot(plotdata, 
       aes(x = Features,
           y = pct,
           fill = equally_likely)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1", labels = c("nm", "'equally likely option'")) +
  labs(y = "Percentage", 
       fill = "Categorization of \nsubjects' explanations",
       x = "Features")+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct, 
   #        ymin = 1 - plotdata_sub$pct - plotdata_sub$CI, 
    #       ymax = 1- plotdata_sub$pct + plotdata_sub$CI, 
     #      colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

#ggsave("selections.pdf",width=6,height=5)
#ggsave("preferred_answer.svg",width=6,height=5)

prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "greater", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 0.48682, df = 1, p-value = 0.2427
## alternative hypothesis: greater
## 95 percent confidence interval:
##  -0.101271  1.000000
## sample estimates:
## prop 1 prop 2 
##  0.400  0.325

1.4 Subjects who inferred the absence of the latent feature

Because of its visibility:

# create a summary dataset that also contains the percentages

plotdata_between <- explan_data %>%
  group_by(Features, Knowledge, absence_because_visible) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'Features', 'Knowledge'. You can override
## using the `.groups` argument.

plotdata_between

## # A tibble: 4 × 6
## # Groups:   Features, Knowledge [2]
##   Features     Knowledge absence_because_visible                  n   pct lbl  
##   <chr>        <chr>     <fct>                                <int> <dbl> <chr>
## 1 featherTooth notKnow   nm                                      38 0.95  95%  
## 2 featherTooth notKnow   absence because it should be visible     2 0.05  5%   
## 3 spearNet     notKnow   nm                                      27 0.675 68%  
## 4 spearNet     notKnow   absence because it should be visible    13 0.325 32%

plotdata_sub <- subset(plotdata_between, absence_because_visible == "absence because it should be visible")

plotdata <- plotdata_between



g<- ggplot(plotdata, 
       aes(x = Features,
           y = pct,
           fill = absence_because_visible)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Inferred state",
       x = "Features",
       title = "Subjects' inference about latent feature")+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct, 
   #        ymin = 1 - plotdata_sub$pct - plotdata_sub$CI, 
    #       ymax = 1- plotdata_sub$pct + plotdata_sub$CI, 
     #      colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

#ggsave("selections.pdf",width=6,height=5)
#ggsave("absence_because_visible.svg",width=6,height=5)

prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "less", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 9.9282, df = 1, p-value = 0.0008138
## alternative hypothesis: less
## 95 percent confidence interval:
##  -1.0000000 -0.1406458
## sample estimates:
## prop 1 prop 2 
##  0.050  0.325

Inferred absence for any other reason:

# create a summary dataset that also contains the percentages

plotdata_between <- explan_data %>%
  group_by(Features, absence_because_description) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.

plotdata_between

## # A tibble: 3 × 5
## # Groups:   Features [2]
##   Features     absence_because_description       n   pct lbl  
##   <chr>        <fct>                         <int> <dbl> <chr>
## 1 featherTooth nm                               40   1   100% 
## 2 spearNet     nm                               36   0.9 90%  
## 3 spearNet     absence given the description     4   0.1 10%

plotdata_sub <- subset(plotdata_between, absence_because_description == "absence given the description")

plotdata <- plotdata_between



g<- ggplot(plotdata, 
       aes(x = Features,
           y = pct,
           fill = absence_because_description)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Inferred state",
       x = "Features",
       title = "Subjects' inference about latent feature 2")+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct, 
   #        ymin = 1 - plotdata_sub$pct - plotdata_sub$CI, 
    #       ymax = 1- plotdata_sub$pct + plotdata_sub$CI, 
     #      colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

#ggsave("selections.pdf",width=6,height=5)
#ggsave("absence_because_description.svg",width=6,height=5)

prop.test(x = c(0, plotdata$n[3]), n = c(40, 40), alternative = "less", correct = F)

## Warning in prop.test(x = c(0, plotdata$n[3]), n = c(40, 40), alternative =
## "less", : Chi-Quadrat-Approximation kann inkorrekt sein

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(0, plotdata$n[3]) out of c(40, 40)
## X-squared = 4.2105, df = 1, p-value = 0.02009
## alternative hypothesis: less
## 95 percent confidence interval:
##  -1.00000000 -0.02197774
## sample estimates:
## prop 1 prop 2 
##    0.0    0.1

Inferred Absence of latent feature overall:

# create a summary dataset that also contains the percentages

plotdata_between <- explan_data %>%
  group_by(Features, inferred_absence) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.

plotdata_between

## # A tibble: 4 × 5
## # Groups:   Features [2]
##   Features     inferred_absence     n   pct lbl  
##   <chr>        <fct>            <int> <dbl> <chr>
## 1 featherTooth nm                  37 0.925 92%  
## 2 featherTooth inferred absence     3 0.075 8%   
## 3 spearNet     nm                  24 0.6   60%  
## 4 spearNet     inferred absence    16 0.4   40%

plotdata_sub <- subset(plotdata_between, inferred_absence == "inferred absence")

plotdata <- plotdata_between



g<- ggplot(plotdata, 
       aes(x = Features,
           y = pct,
           fill = inferred_absence)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Inferred state",
       x = "Features",
       title = "Subjects' inference about latent feature (overall)")+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct, 
   #        ymin = 1 - plotdata_sub$pct - plotdata_sub$CI, 
    #       ymax = 1- plotdata_sub$pct + plotdata_sub$CI, 
     #      colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

#ggsave("selections.pdf",width=6,height=5)
#ggsave("absence_overall.svg",width=6,height=5)

prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "less", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 11.665, df = 1, p-value = 0.0003184
## alternative hypothesis: less
## 95 percent confidence interval:
##  -1.0000000 -0.1803429
## sample estimates:
## prop 1 prop 2 
##  0.075  0.400

1.5 Subjects giving a different kind of explanation (i.e., explanations that have nothing to do with visibility)

# create a summary dataset that also contains the percentages

plotdata_between <- explan_data %>%
  group_by(Features, different_explanation) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.

plotdata_between

## # A tibble: 4 × 5
## # Groups:   Features [2]
##   Features     different_explanation     n   pct lbl  
##   <chr>        <fct>                 <int> <dbl> <chr>
## 1 featherTooth nm                       34 0.85  85%  
## 2 featherTooth different explanation     6 0.15  15%  
## 3 spearNet     nm                       39 0.975 98%  
## 4 spearNet     different explanation     1 0.025 2%

plotdata_sub <- subset(plotdata_between, different_explanation == "different_explanation")

plotdata <- plotdata_between



g<- ggplot(plotdata, 
       aes(x = Features,
           y = pct,
           fill = different_explanation)) +
  #facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
  coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4.5,
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Inferred state",
       x = "Features",
       title = "Subjects' inference about latent feature (overall)")+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct, 
   #        ymin = 1 - plotdata_sub$pct - plotdata_sub$CI, 
    #       ymax = 1- plotdata_sub$pct + plotdata_sub$CI, 
     #      colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

#ggsave("selections.pdf",width=6,height=5)
#ggsave("absence_overall.svg",width=6,height=5)

prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "two.sided", correct = F)

## Warning in prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), :
## Chi-Quadrat-Approximation kann inkorrekt sein

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 3.9139, df = 1, p-value = 0.04789
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.004229395 0.245770605
## sample estimates:
## prop 1 prop 2 
##  0.150  0.025

Explanation Analysis

1 Analysis of subjects’ explanations

1.1 What subjects would have done if they hadn’t been forced to make a choice

1.2 subjects giving no explanation or unclear explanation

1.3 Subjects clearly stating they’d prefer to answer 50:50

1.4 Subjects who inferred the absence of the latent feature

1.5 Subjects giving a different kind of explanation (i.e., explanations that have nothing to do with visibility)