tdata <- read_csv("exp_data.csv")

1 Subject demographics

# demographics 

min(tdata$age)
## [1] 19
max(tdata$age)
## [1] 76
mean(tdata$age)
## [1] 38.35
sd(tdata$age)
## [1] 12.82732
# 1 = male, 2 = female, 3 = other
table(tdata$gender)
## 
##       1: male     2: female 3: non-binary 
##           122            76             2

1 = male, 2 = female, 3 = non-binary, 4 = prefer not to say

2 Results

##               
##                noInfo notKnow
##   featherTooth     50      50
##   spearNet         50      50
tdata_main <- subset(tdata_long, Knowledge == "'you don't know'")
tdata_supp <- subset(tdata_long, Knowledge == "no information")

2.1 Graphs

myTheme <- theme(plot.title = element_text(face="bold", size = 22),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        axis.text.x = element_text(size = 14, angle = 0), 
        axis.text.y = element_text(size = 16, angle = 0),
        legend.text = element_text(size = 18),
        legend.title = element_text(face = "bold", size = 18),
        strip.text.x = element_text(size = 18),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        panel.background = element_blank(), 
        axis.line.x = element_line(colour = "black"), 
        axis.line.y = element_line(colour = "black"),
        axis.text = element_text(colour ="black"), 
        axis.ticks = element_line(colour ="black"))

tdata_sub <- tdata_main


library(see)
## first, turn sID into a factor
tdata_sub$subj_code <- factor(tdata_sub$subj_code)

pd <- position_dodge(width = 0.3)

tdata_sub$valueJitter <- jitter(tdata_sub$rating_rec, factor = 0.01, amount = 0.004)

theme_set(theme_light(base_size = 20, base_family = "Poppins"))

# new labes for the facets 

g <- ggplot(tdata_sub, aes(x = Features, y = valueJitter)) +
  guides(fill=FALSE)+
  #facet_grid( ~ Knowledge)+
  #ggtitle("Subjects' causal srength ratings") +
  scale_y_continuous(limits = c(-5.3, 5.3), breaks=seq(-5, 5, 1), expand = c(0,0)) +
  scale_x_discrete(labels=c("both \nsimilar", "latent feature \nharder")) +
  #stat_summary(fun.y = mean, geom = "bar", position = "dodge", colour = "black", alpha =0.5) +
  geom_violinhalf(aes(y = rating_rec, group = Features, fill = Features), color = NA, 
                  position=position_dodge(1), alpha = 0.4)+
  #geom_line(position = pd, color = "black", size = 1, alpha=0.04) +
  geom_hline(yintercept=0, linetype="dashed", color = "black")+
  geom_jitter(aes(color = Features), alpha = 0.5, width = 0.15, height = 0.2) +
  stat_summary(aes(y = rating_rec, group=1), fun.data = mean_cl_boot, 
               geom = "errorbar", width = 0, size = 1) +
  stat_summary(aes(y = rating_rec, group=1, color = outcome_valence), fun.y=mean, geom="line", 
               color = "black", shape = 22, size = 1, alpha = .7)+
  stat_summary(aes(y = rating_rec, group=1, fill = Features), fun.y=mean, geom="point", 
               color = "black", shape = 22, size = 2, group=1, alpha = 1)+
  stat_summary(aes(y = rating_rec,group=1), fun.y=median, geom="point", color = "black", shape = 3, size = 4, 
               group=1, alpha = 1, position = position_dodge(width = 0.5))+
  labs(x = "Feature diagnosability", y = "Explanation Rating") +
  scale_color_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  annotate("text", x = 0.5, y = 3.5, label = c("broad-scope"), angle = 90)+
  annotate("text", x = 0.5, y = -3.5, label = c("narrow-scope"), angle = 90)+
  theme(legend.position = "none")+
  myTheme+
  theme(panel.grid.major = element_line(color = "lightgrey",
                                        size = 0.5,
                                        linetype = 'dotted'))+
  stat_summary(aes(label=round(after_stat(y),2)), fun.y=mean, geom="text", size=5,
             vjust = -6)

g

ggsave("results_means_mainDV.svg",width=4,height=5)
ggsave("results_means_mainDV.pdf",width=4,height=5)
#ggsave("results_means_selection.pdf",width=11,height=5)
library(ggridges)
g2 <- ggplot(tdata_sub, aes(x = rating_rec, y = Features, fill = after_stat(x))) +
  geom_density_ridges_gradient() +
  #geom_density_ridges(fill = "lightblue", alpha = 0.5)+
   #stat_summary(aes(x = rating_rec), fun.x=mean, geom="point", 
  #             color = "black", shape = 22, size = 2, group=1, alpha = 1)+
  scale_fill_viridis_c(name = "Explanation \nRating", option = "C")+
  labs(x = "Rating", y = "Feature diagnosability") +
  myTheme

g2

Use the ggplot_build package to see a table with the means and CI values plotted in the graph:

values <- ggplot_build(g)$data[[4]] # values are shown in the 4th panel 
values
##   x group    y    ymin  ymax PANEL flipped_aes xmin xmax colour linewidth
## 1 1     1 -0.5 -0.8405 -0.22     1       FALSE    1    1  black         1
## 2 2     1  0.0  0.0000  0.00     1       FALSE    2    2  black         1
##   linetype width alpha
## 1        1     0    NA
## 2        1     0    NA

get group medians:

library(rcompanion)
# groupwiseMean(rating_rec ~ Features + Knowledge,
#               data        = tdata_long,
#               traditional = FALSE,
#               percentile  = TRUE)

groupwiseMedian(rating_rec ~ Features,
                data        = tdata_main,
                bca         = FALSE,
                percentile  = TRUE,
                R           = 1000)
##                                          Features  n Median Conf.level
## 1          feature diagnosability: \nboth similar 50      0       0.95
## 2 feature diagnosability: \nlatent feature harder 50      0       0.95
##   Percentile.lower Percentile.upper
## 1                0                0
## 2                0                0
counts <- tdata_main %>%
  group_by(Features, rating_rec) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

counts
## # A tibble: 7 × 5
## # Groups:   Features [2]
##   Features                                          rating_rec     n   pct lbl  
##   <fct>                                                  <dbl> <int> <dbl> <chr>
## 1 "feature diagnosability: \nboth similar"                  -5     1  0.02 2%   
## 2 "feature diagnosability: \nboth similar"                  -3     2  0.04 4%   
## 3 "feature diagnosability: \nboth similar"                  -2     6  0.12 12%  
## 4 "feature diagnosability: \nboth similar"                  -1     3  0.06 6%   
## 5 "feature diagnosability: \nboth similar"                   0    37  0.74 74%  
## 6 "feature diagnosability: \nboth similar"                   1     1  0.02 2%   
## 7 "feature diagnosability: \nlatent feature harder"          0    50  1    100%
tdata_main$category[tdata_main$rating_rec < 0] <- "narrow"
tdata_main$category[tdata_main$rating_rec == 0] <- "unbiased"
tdata_main$category[tdata_main$rating_rec > 0] <- "broad"

counts2 <- tdata_main %>%
  group_by(Features, category) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))


counts2
## # A tibble: 4 × 5
## # Groups:   Features [2]
##   Features                                          category     n   pct lbl  
##   <fct>                                             <chr>    <int> <dbl> <chr>
## 1 "feature diagnosability: \nboth similar"          broad        1  0.02 2%   
## 2 "feature diagnosability: \nboth similar"          narrow      12  0.24 24%  
## 3 "feature diagnosability: \nboth similar"          unbiased    37  0.74 74%  
## 4 "feature diagnosability: \nlatent feature harder" unbiased    50  1    100%
counts2$category <- factor(counts2$category, levels = c("unbiased", "narrow", "broad"), labels = c("unbiased", "narrow l.s.", "broad l.s."))

Get proportion CIs for different categories in each diagnosability condition:

library(PropCIs)
library(DescTools)
library(purrr)

counts_spear <- subset(counts2, Features == "feature diagnosability: \nboth similar")

counts_feather <- subset(counts2, Features == "feature diagnosability: \nlatent feature harder")


(MultinomCI(counts_spear$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci_1)
##       est lwr.ci    upr.ci
## [1,] 0.02   0.00 0.1491064
## [2,] 0.24   0.14 0.3691064
## [3,] 0.74   0.64 0.8691064
(MultinomCI(counts_feather$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci_2)
##      est lwr.ci upr.ci
## [1,]   1      1      1
ci_low <- c(selection_ci_1[,2], selection_ci_2[,2])

ci_up <- c(selection_ci_1[,3], selection_ci_2[,3])
plotdata <- counts2

plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up

Plot:

library(scales)
theme_set(theme_light(base_size = 12, base_family = "Poppins"))

g<- ggplot(plotdata, 
       aes(x = category,
           y = pct,
           fill = Features)) +
  facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 3.5,
            position = position_dodge(width = 1),
            vjust = -6.5) +
  scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  #scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Explanatory preference",
       x = "Explanatory preference")+
  geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct, 
   #        ymin = plotdata$ci_low, 
    #       ymax = plotdata$ci_up, 
     #      colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "none", axis.title = element_text(size = 20), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13),strip.text.x = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories_main.svg",width=7,height=5)
ggsave("categories_main.pdf",width=7,height=5)

2.2 Analyses

2.2.1 Model comparisons

library(afex)
library(emmeans)

a1 <- aov_car(rating_rec ~ Features + Error(subj_code), tdata_main, anova_table = list(es = "pes"))
a1
## Anova Table (Type 3 tests)
## 
## Response: rating_rec
##     Effect    df  MSE        F  pes p.value
## 1 Features 1, 98 0.60 10.47 ** .097    .002
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

2.2.2 t-test

t.test(rating_rec ~ Features, data = tdata_main, var.equal = F, alternative = "two.sided", paired = F) # two sided is used here to get the full confidence interval of the difference; note, however, that the hypothesis was a directed one (thus, alternative = "less" could have been used to get the one-sided p-value). 
## 
##  Welch Two Sample t-test
## 
## data:  rating_rec by Features
## t = -3.2358, df = 49, p-value = 0.002177
## alternative hypothesis: true difference in means between group feature diagnosability: 
## both similar and group feature diagnosability: 
## latent feature harder is not equal to 0
## 95 percent confidence interval:
##  -0.8105269 -0.1894731
## sample estimates:
##          mean in group feature diagnosability: \nboth similar 
##                                                          -0.5 
## mean in group feature diagnosability: \nlatent feature harder 
##                                                           0.0

2.2.3 Proportion tests for behavior categories

Test if proportion of unbiased subjects is higher in “latent feature harder” than in “feature diagnosability: similar”

unbiased_props <- prop.test(x = c(counts2$n[3], counts2$n[4]), n = c(50, 50), alternative = "less", correct = F)
unbiased_props
## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(counts2$n[3], counts2$n[4]) out of c(50, 50)
## X-squared = 14.943, df = 1, p-value = 5.542e-05
## alternative hypothesis: less
## 95 percent confidence interval:
##  -1.000000 -0.157966
## sample estimates:
## prop 1 prop 2 
##   0.74   1.00

2.3 Analyses with additional condition

this part of the analysis script reports an analysis that includes the additional between-subjects conditions in which the latent feature was not mentioned in the test case description.

2.3.1 Graphs

myTheme <- theme(plot.title = element_text(face="bold", size = 22),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        axis.text.x = element_text(size = 14, angle = 0), 
        axis.text.y = element_text(size = 16, angle = 0),
        legend.text = element_text(size = 18),
        legend.title = element_text(face = "bold", size = 18),
        strip.text.x = element_text(size = 18),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        panel.background = element_blank(), 
        axis.line.x = element_line(colour = "black"), 
        axis.line.y = element_line(colour = "black"),
        axis.text = element_text(colour ="black"), 
        axis.ticks = element_line(colour ="black"))

tdata_sub <- tdata_long


library(see)
## first, turn sID into a factor
tdata_sub$subj_code <- factor(tdata_sub$subj_code)

pd <- position_dodge(width = 0.3)

tdata_sub$valueJitter <- jitter(tdata_sub$rating_rec, factor = 0.01, amount = 0.004)

theme_set(theme_light(base_size = 20, base_family = "Poppins"))

# new labes for the facets 

g <- ggplot(tdata_sub, aes(x = Features, y = valueJitter)) +
  guides(fill=FALSE)+
  facet_grid( ~ Knowledge)+
  #ggtitle("Subjects' causal srength ratings") +
  scale_y_continuous(limits = c(-5.3, 5.3), breaks=seq(-5, 5, 1), expand = c(0,0)) +
  scale_x_discrete(labels=c("feature diagnosability: \nboth similar", "feature diagnosability: \nlatent feature harder")) +
  #stat_summary(fun.y = mean, geom = "bar", position = "dodge", colour = "black", alpha =0.5) +
  geom_violinhalf(aes(y = rating_rec, group = Features, fill = Features), color = NA, 
                  position=position_dodge(1), alpha = 0.4)+
  #geom_line(position = pd, color = "black", size = 1, alpha=0.04) +
  geom_hline(yintercept=0, linetype="dashed", color = "black")+
  geom_jitter(aes(color = Features), alpha = 0.5, width = 0.15, height = 0.2) +
  stat_summary(aes(y = rating_rec, group=1), fun.data = mean_cl_boot, 
               geom = "errorbar", width = 0, size = 1) +
  stat_summary(aes(y = rating_rec, group=1, fill = Features), fun.y=mean, geom="point", 
               color = "black", shape = 22, size = 2, group=1, alpha = 1)+
  stat_summary(aes(y = rating_rec,group=1), fun.y=median, geom="point", color = "black", shape = 3, size = 4, 
               group=1, alpha = 1, position = position_dodge(width = 0.5))+
  labs(x = "Category Features", y = "Categorization Rating") +
  scale_color_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  annotate("text", x = 0.5, y = 3.5, label = c("broad-scope"), angle = 90)+
  annotate("text", x = 0.5, y = -3.5, label = c("narrow-scope"), angle = 90)+
  theme(legend.position = "none")+
  myTheme+
  theme(panel.grid.major = element_line(color = "lightgrey",
                                        size = 0.5,
                                        linetype = 'dotted'))+
  stat_summary(aes(label=round(after_stat(y),2)), fun.y=mean, geom="text", size=5,
             vjust = -6)

g

#ggsave("results_means_mainDV.svg",width=7,height=5)
#ggsave("results_means_mainDV.pdf",width=7,height=5)
#ggsave("results_means_selection.pdf",width=11,height=5)

Use the ggplot_build package to see a table with the means and CI values plotted in the graph:

values <- ggplot_build(g)$data[[4]] # values are shown in the 4th panel 
values
##   x group     y    ymin  ymax PANEL flipped_aes xmin xmax colour linewidth
## 1 1     1 -0.50 -0.8200 -0.22     1       FALSE    1    1  black         1
## 2 2     1  0.00  0.0000  0.00     1       FALSE    2    2  black         1
## 3 1     1 -1.38 -1.9005 -0.82     2       FALSE    1    1  black         1
## 4 2     1 -0.16 -0.4800  0.14     2       FALSE    2    2  black         1
##   linetype width alpha
## 1        1     0    NA
## 2        1     0    NA
## 3        1     0    NA
## 4        1     0    NA

get group medians:

library(rcompanion)
# groupwiseMean(rating_rec ~ Features + Knowledge,
#               data        = tdata_long,
#               traditional = FALSE,
#               percentile  = TRUE)

groupwiseMedian(rating_rec ~ Features + Knowledge,
                data        = tdata_long,
                bca         = FALSE,
                percentile  = TRUE,
                R           = 1000)
##                                          Features        Knowledge  n Median
## 1          feature diagnosability: \nboth similar 'you don't know' 50      0
## 2          feature diagnosability: \nboth similar   no information 50      0
## 3 feature diagnosability: \nlatent feature harder 'you don't know' 50      0
## 4 feature diagnosability: \nlatent feature harder   no information 50      0
##   Conf.level Percentile.lower Percentile.upper
## 1       0.95              0.0                0
## 2       0.95             -0.5                0
## 3       0.95              0.0                0
## 4       0.95              0.0                0
counts <- tdata_long %>%
  group_by(Features, Knowledge, rating_rec) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

counts
## # A tibble: 19 × 6
## # Groups:   Features, Knowledge [4]
##    Features                               Knowledge rating_rec     n   pct lbl  
##    <fct>                                  <fct>          <dbl> <int> <dbl> <chr>
##  1 "feature diagnosability: \nboth simil… 'you don…         -5     1  0.02 2%   
##  2 "feature diagnosability: \nboth simil… 'you don…         -3     2  0.04 4%   
##  3 "feature diagnosability: \nboth simil… 'you don…         -2     6  0.12 12%  
##  4 "feature diagnosability: \nboth simil… 'you don…         -1     3  0.06 6%   
##  5 "feature diagnosability: \nboth simil… 'you don…          0    37  0.74 74%  
##  6 "feature diagnosability: \nboth simil… 'you don…          1     1  0.02 2%   
##  7 "feature diagnosability: \nboth simil… no infor…         -5     9  0.18 18%  
##  8 "feature diagnosability: \nboth simil… no infor…         -4     2  0.04 4%   
##  9 "feature diagnosability: \nboth simil… no infor…         -3     3  0.06 6%   
## 10 "feature diagnosability: \nboth simil… no infor…         -2     3  0.06 6%   
## 11 "feature diagnosability: \nboth simil… no infor…         -1     1  0.02 2%   
## 12 "feature diagnosability: \nboth simil… no infor…          0    32  0.64 64%  
## 13 "feature diagnosability: \nlatent fea… 'you don…          0    50  1    100% 
## 14 "feature diagnosability: \nlatent fea… no infor…         -5     1  0.02 2%   
## 15 "feature diagnosability: \nlatent fea… no infor…         -3     1  0.02 2%   
## 16 "feature diagnosability: \nlatent fea… no infor…         -2     2  0.04 4%   
## 17 "feature diagnosability: \nlatent fea… no infor…         -1     1  0.02 2%   
## 18 "feature diagnosability: \nlatent fea… no infor…          0    44  0.88 88%  
## 19 "feature diagnosability: \nlatent fea… no infor…          5     1  0.02 2%
counts <- tdata_long %>%
  group_by(Features, Knowledge, rating_rec) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

counts
## # A tibble: 19 × 6
## # Groups:   Features, Knowledge [4]
##    Features                               Knowledge rating_rec     n   pct lbl  
##    <fct>                                  <fct>          <dbl> <int> <dbl> <chr>
##  1 "feature diagnosability: \nboth simil… 'you don…         -5     1  0.02 2%   
##  2 "feature diagnosability: \nboth simil… 'you don…         -3     2  0.04 4%   
##  3 "feature diagnosability: \nboth simil… 'you don…         -2     6  0.12 12%  
##  4 "feature diagnosability: \nboth simil… 'you don…         -1     3  0.06 6%   
##  5 "feature diagnosability: \nboth simil… 'you don…          0    37  0.74 74%  
##  6 "feature diagnosability: \nboth simil… 'you don…          1     1  0.02 2%   
##  7 "feature diagnosability: \nboth simil… no infor…         -5     9  0.18 18%  
##  8 "feature diagnosability: \nboth simil… no infor…         -4     2  0.04 4%   
##  9 "feature diagnosability: \nboth simil… no infor…         -3     3  0.06 6%   
## 10 "feature diagnosability: \nboth simil… no infor…         -2     3  0.06 6%   
## 11 "feature diagnosability: \nboth simil… no infor…         -1     1  0.02 2%   
## 12 "feature diagnosability: \nboth simil… no infor…          0    32  0.64 64%  
## 13 "feature diagnosability: \nlatent fea… 'you don…          0    50  1    100% 
## 14 "feature diagnosability: \nlatent fea… no infor…         -5     1  0.02 2%   
## 15 "feature diagnosability: \nlatent fea… no infor…         -3     1  0.02 2%   
## 16 "feature diagnosability: \nlatent fea… no infor…         -2     2  0.04 4%   
## 17 "feature diagnosability: \nlatent fea… no infor…         -1     1  0.02 2%   
## 18 "feature diagnosability: \nlatent fea… no infor…          0    44  0.88 88%  
## 19 "feature diagnosability: \nlatent fea… no infor…          5     1  0.02 2%
tdata_long$category[tdata_long$rating_rec < 0] <- "narrow"
tdata_long$category[tdata_long$rating_rec == 0] <- "unbiased"
tdata_long$category[tdata_long$rating_rec > 0] <- "broad"

counts2 <- tdata_long %>%
  group_by(Features, Knowledge, category) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

counts2
## # A tibble: 9 × 6
## # Groups:   Features, Knowledge [4]
##   Features                                  Knowledge category     n   pct lbl  
##   <fct>                                     <fct>     <chr>    <int> <dbl> <chr>
## 1 "feature diagnosability: \nboth similar"  'you don… broad        1  0.02 2%   
## 2 "feature diagnosability: \nboth similar"  'you don… narrow      12  0.24 24%  
## 3 "feature diagnosability: \nboth similar"  'you don… unbiased    37  0.74 74%  
## 4 "feature diagnosability: \nboth similar"  no infor… narrow      18  0.36 36%  
## 5 "feature diagnosability: \nboth similar"  no infor… unbiased    32  0.64 64%  
## 6 "feature diagnosability: \nlatent featur… 'you don… unbiased    50  1    100% 
## 7 "feature diagnosability: \nlatent featur… no infor… broad        1  0.02 2%   
## 8 "feature diagnosability: \nlatent featur… no infor… narrow       5  0.1  10%  
## 9 "feature diagnosability: \nlatent featur… no infor… unbiased    44  0.88 88%
counts2$category <- factor(counts2$category, levels = c("unbiased", "narrow", "broad"), labels = c("unbiased", "narrow l.s.", "broad l.s."))

Get proportion CIs for different categories in each diagnosability condition:

library(PropCIs)
library(DescTools)
library(purrr)

counts_spear_notknow <- subset(counts2, Features == "feature diagnosability: \nboth similar" & Knowledge == "'you don't know'")
counts_spear_noinf <- subset(counts2, Features == "feature diagnosability: \nboth similar" & Knowledge == "no information")

counts_feather_notknow <- subset(counts2, Features == "feature diagnosability: \nlatent feature harder" & Knowledge == "'you don't know'")
counts_feather_noinf <- subset(counts2, Features == "feature diagnosability: \nlatent feature harder" & Knowledge == "no information")


(MultinomCI(counts_spear_notknow$n,
           conf.level=0.95,
           method="wald") -> selection_ci_1)
##       est    lwr.ci     upr.ci
## [1,] 0.02 0.0000000 0.05880531
## [2,] 0.24 0.1216208 0.35837923
## [3,] 0.74 0.6184190 0.86158098
(MultinomCI(counts_spear_noinf$n,
           conf.level=0.95,
           method="wald") -> selection_ci_2)
##       est    lwr.ci    upr.ci
## [1,] 0.36 0.2269532 0.4930468
## [2,] 0.64 0.5069532 0.7730468
(MultinomCI(counts_feather_notknow$n,
           conf.level=0.95,
           method="wald") -> selection_ci_3)
##      est lwr.ci upr.ci
## [1,]   1      1      1
(MultinomCI(counts_feather_noinf$n,
           conf.level=0.95,
           method="wald") -> selection_ci_4)
##       est     lwr.ci     upr.ci
## [1,] 0.02 0.00000000 0.05880531
## [2,] 0.10 0.01684577 0.18315423
## [3,] 0.88 0.78992691 0.97007309
ci_low <- c(selection_ci_1[,2], selection_ci_2[,2], selection_ci_3[,2], selection_ci_4[,2])

ci_up <- c(selection_ci_1[,3], selection_ci_2[,3], selection_ci_3[,3], selection_ci_4[,3])
plotdata <- counts2

plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up

Plot:

library(scales)
theme_set(theme_light(base_size = 12, base_family = "Poppins"))

g<- ggplot(plotdata, 
       aes(x = category,
           y = pct,
           fill = Features)) +
  facet_grid(Features ~ Knowledge)+
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 3.5,
            position = position_dodge(width = 1),
            vjust = -3.5) +
  scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  #scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Explanatory preference",
       x = "Explanatory preference")+
  geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct, 
   #        ymin = plotdata$ci_low, 
    #       ymax = plotdata$ci_up, 
     #      colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "none", axis.title = element_text(size = 20), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13),strip.text.x = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

#ggsave("selections_between.pdf",width=6,height=5)
#ggsave("categories.svg",width=7,height=5)
#ggsave("categories.pdf",width=7,height=5)

2.3.2 Model comparisons

library(afex)
library(emmeans)

a1 <- aov_car(rating_rec ~ Features*Knowledge + Error(subj_code), tdata_long, anova_table = list(es = "pes"))
a1
## Anova Table (Type 3 tests)
## 
## Response: rating_rec
##               Effect     df  MSE         F  pes p.value
## 1           Features 1, 196 1.67 22.17 *** .102   <.001
## 2          Knowledge 1, 196 1.67   8.10 ** .040    .005
## 3 Features:Knowledge 1, 196 1.67    3.88 + .019    .050
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

more digits for the p.values can be obtained by running model comparisons manually.

2.3.3 Contrasts

############### 
# a follow-up analysis 
library(lsmeans)
# means

ls2 <- lsmeans(a1, c("Features", "Knowledge")) # group means by between-condition
ls2
##   Features                                       Knowledge        lsmean    SE
##  feature diagnosability: \nboth similar          'you don't know'  -0.50 0.183
##  feature diagnosability: \nlatent feature harder 'you don't know'   0.00 0.183
##  feature diagnosability: \nboth similar          no information    -1.38 0.183
##  feature diagnosability: \nlatent feature harder no information    -0.16 0.183
##   df lower.CL upper.CL
##  196    -0.86    -0.14
##  196    -0.36     0.36
##  196    -1.74    -1.02
##  196    -0.52     0.20
## 
## Confidence level used: 0.95
# contrast the strength levels (main effect; averaging over decision level, as there was no sig. interaction)
contrasts <- emmeans(a1, ~ Features*Knowledge)
s <- pairs(contrasts, adjust = "none")


s
##    contrast                                                                                                                       
##  feature diagnosability: \nboth similar 'you don't know' - feature diagnosability: \nlatent feature harder 'you don't know'       
##  feature diagnosability: \nboth similar 'you don't know' - feature diagnosability: \nboth similar no information                  
##  feature diagnosability: \nboth similar 'you don't know' - feature diagnosability: \nlatent feature harder no information         
##  feature diagnosability: \nlatent feature harder 'you don't know' - feature diagnosability: \nboth similar no information         
##  feature diagnosability: \nlatent feature harder 'you don't know' - feature diagnosability: \nlatent feature harder no information
##  feature diagnosability: \nboth similar no information - feature diagnosability: \nlatent feature harder no information           
##  estimate    SE  df t.ratio p.value
##     -0.50 0.258 196  -1.936  0.0544
##      0.88 0.258 196   3.406  0.0008
##     -0.34 0.258 196  -1.316  0.1897
##      1.38 0.258 196   5.342  <.0001
##      0.16 0.258 196   0.619  0.5364
##     -1.22 0.258 196  -4.723  <.0001
confint(s, level = 0.95)
##    contrast                                                                                                                       
##  feature diagnosability: \nboth similar 'you don't know' - feature diagnosability: \nlatent feature harder 'you don't know'       
##  feature diagnosability: \nboth similar 'you don't know' - feature diagnosability: \nboth similar no information                  
##  feature diagnosability: \nboth similar 'you don't know' - feature diagnosability: \nlatent feature harder no information         
##  feature diagnosability: \nlatent feature harder 'you don't know' - feature diagnosability: \nboth similar no information         
##  feature diagnosability: \nlatent feature harder 'you don't know' - feature diagnosability: \nlatent feature harder no information
##  feature diagnosability: \nboth similar no information - feature diagnosability: \nlatent feature harder no information           
##  estimate    SE  df lower.CL upper.CL
##     -0.50 0.258 196   -1.009  0.00946
##      0.88 0.258 196    0.371  1.38946
##     -0.34 0.258 196   -0.849  0.16946
##      1.38 0.258 196    0.871  1.88946
##      0.16 0.258 196   -0.349  0.66946
##     -1.22 0.258 196   -1.729 -0.71054
## 
## Confidence level used: 0.95

This additional condition provides additional evidence that subjects pragmatically rely on the presented information to try to infer the status of the latent feature. Here, subjects who read the spear-net'' scenario showed the most pronounced latent-scope bias ($M~= -1.38$, 95\% CI [$-1.96, -0.84$], Median~=$0$). This was predicted because it is reasonable to assume that a situation in which one feature (the net) that should be just as easily observable as another (the spear) isn't mentioned at all is more likely to happen in a world in which that feature is actually absent. A contrast analysis (comparison of green graphs in Fig.~\ref{fig:Exp_tokoloCont_res}) showed that the latent scope bias in this condition was larger than in the corresponding spear-net condition that used theyou don’t know’’ phrasing (\(\Delta M~= -0.88\), \(t\)(196)~\(= 3.41\), \(p~< .001\)). However, even here 32 subjects (64%) still weren’t biased. Furthermore, as predicted, in the no information condition'' in which subjects saw thefeather-tooth’’ scenario not mentioning the latent feature at all had less pragmatic influence. In this condition, 44 subjects (88%) gave unbiased ratings, and only five (10%) gave ratings indicating a latent-scope bias (\(M~= -0.16\), 95% CI [\(-0.48, 0.16\)], Median~=\(0\)). The observed pattern in this condition suggests that not mentioning the status of a feature that is anyway expected to be unobserved is regarded as less diagnostic for a situation in which that feature is actually absent.

3 Compare behavior proportions with those observed in first study

One should also compare the observed behavior proportions with those that were observed under the forced-choice response format test context that was used in the previous experiment. This will be done here.

First, create an object that contains the results of the previous (forced-choice) study.

Study <- c(rep("Forced",4))
Features <- c("feature diagnosability: \nboth similar", "feature diagnosability: \nboth similar", "feature diagnosability: \nlatent feature harder", "feature diagnosability: \nlatent feature harder")
category <- c("narrow l.s.", "broad l.s.", "narrow l.s.", "broad l.s.") # note that "unbiased" is actually not a reasonable category here as subjects had to choose one explanation (this was included here only for the sake of comparability with the continuous scale experiment)

n <- c(32,8,37,3)
pct <- c(0.80,0.20,0.925,0.075)
lbl <- c("80%", "20%", "92%", "8%") 
props_expForced <- data.frame(Features, category, n, pct, lbl, Study)

Now take the proportions of the current study (but include only the relevant “You don’t know” condition)

props_contScale <- subset(counts2, Knowledge == "'you don't know'")
props_contScale <- subset(props_contScale, select = c(1,3:6))
props_contScale$Study <- c("Scale")
inter_exp_probs <- rbind(props_expForced, props_contScale)
  1. Cross-exp. Comparison for the original “spear-net” condition. Test if proportion of narrow latent scope biases is reduced in the current experiment that uses a continuous scale test format.
biased_props_spear_net <- prop.test(x = c(inter_exp_probs$n[6], inter_exp_probs$n[1]), n = c(50, 40), alternative = "less", correct = F)
biased_props_spear_net
## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(inter_exp_probs$n[6], inter_exp_probs$n[1]) out of c(50, 40)
## X-squared = 27.889, df = 1, p-value = 6.423e-08
## alternative hypothesis: less
## 95 percent confidence interval:
##  -1.0000000 -0.4161529
## sample estimates:
## prop 1 prop 2 
##   0.24   0.80
  1. Cross-exp. Comparison for the novel “feather-tooth” condition. Test if proportion of narrow latent scope biases is reduced in the current experiment that uses a continuous scale test format.
biased_props_feather_tooth <- prop.test(x = c(0, inter_exp_probs$n[3]), n = c(50, 40), alternative = "less", correct = F)
biased_props_feather_tooth
## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(0, inter_exp_probs$n[3]) out of c(50, 40)
## X-squared = 78.538, df = 1, p-value < 2.2e-16
## alternative hypothesis: less
## 95 percent confidence interval:
##  -1.0000000 -0.8564987
## sample estimates:
## prop 1 prop 2 
##  0.000  0.925