tdata <- read_csv("exp_data.csv")

## Rows: 160 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): subj_code, desktop_conf, attent_conf, scale_orient, Scenario, expl...
## dbl  (4): DV_rating, narrow_check, broad_check, age
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

1 Subject demographics

# demographics 

min(tdata$age)

## [1] 20

max(tdata$age)

## [1] 72

mean(tdata$age)

## [1] 39.08125

sd(tdata$age)

## [1] 12.99006

# 1 = male, 2 = female, 3 = other
table(tdata$gender)

## 
##   1: male 2: female 
##        89        71

1 = male, 2 = female, 3 = non-binary, 4 = prefer not to say

2 Data preperation

##               
##                noInfo notKnow
##   featherTooth     40      40
##   spearNet         40      40

3 Analyses

tdata_main <- subset(tdata_long, Knowledge == "'you don't know'")
tdata_supp <- subset(tdata_long, Knowledge == "no information")

counts <- tdata_main %>%
  group_by(Features, rating_rec) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.

counts

## # A tibble: 4 × 5
## # Groups:   Features [2]
##   Features                                          rating_rec     n   pct lbl  
##   <fct>                                             <fct>      <int> <dbl> <chr>
## 1 "feature diagnosability: \nboth similar"          broad          8 0.2   20%  
## 2 "feature diagnosability: \nboth similar"          narrow        32 0.8   80%  
## 3 "feature diagnosability: \nlatent feature harder" broad          3 0.075 8%   
## 4 "feature diagnosability: \nlatent feature harder" narrow        37 0.925 92%

counts$category <- factor(counts$rating_rec, levels = c("unbiased", "narrow", "broad"), labels = c("unbiased", "narrow l.s.", "broad l.s."))

Get proportion CIs for different categories in each diagnosability condition:

library(PropCIs)
library(DescTools)

## 
## Attache Paket: 'DescTools'

## Das folgende Objekt ist maskiert 'package:data.table':
## 
##     %like%

library(purrr)

counts_spear <- subset(counts, Features == "feature diagnosability: \nboth similar")

counts_feather <- subset(counts, Features == "feature diagnosability: \nlatent feature harder")


(MultinomCI(counts_spear$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci_1)

##      est lwr.ci    upr.ci
## [1,] 0.2    0.1 0.3272464
## [2,] 0.8    0.7 0.9272464

(MultinomCI(counts_feather$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci_2)

##        est lwr.ci   upr.ci
## [1,] 0.075  0.025 0.159792
## [2,] 0.925  0.875 1.000000

ci_low <- c(selection_ci_1[,2], selection_ci_2[,2])

ci_up <- c(selection_ci_1[,3], selection_ci_2[,3])

plotdata <- counts

plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up

Plot:

library(scales)

## 
## Attache Paket: 'scales'

## Das folgende Objekt ist maskiert 'package:purrr':
## 
##     discard

## Das folgende Objekt ist maskiert 'package:readr':
## 
##     col_factor

theme_set(theme_light(base_size = 12, base_family = "Poppins"))

g<- ggplot(plotdata, 
       aes(x = category,
           y = pct,
           fill = Features)) +
  facet_grid( ~ Features)+
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 3.5,
            position = position_dodge(width = 1),
            vjust = -5.5) +
  scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  #scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Explanatory preference",
       x = "Explanatory preference")+
  geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct, 
   #        ymin = plotdata$ci_low, 
    #       ymax = plotdata$ci_up, 
     #      colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "none", axis.title = element_text(size = 20), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13),strip.text.x = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories_main.svg",width=7,height=5)
ggsave("categories_main.pdf",width=7,height=5)

Tests against chance:

(t2 <- binom.test(plotdata$n[2], 40))

## 
##  Exact binomial test
## 
## data:  plotdata$n[2] and 40
## number of successes = 32, number of trials = 40, p-value = 0.0001822
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
##  0.6435220 0.9094776
## sample estimates:
## probability of success 
##                    0.8

(t4 <- binom.test(plotdata$n[4], 40))

## 
##  Exact binomial test
## 
## data:  plotdata$n[4] and 40
## number of successes = 37, number of trials = 40, p-value = 1.947e-08
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
##  0.7961353 0.9842578
## sample estimates:
## probability of success 
##                  0.925

(ci_low <- c(t2$conf.int[1], t4$conf.int[1]))

## [1] 0.6435220 0.7961353

(ci_up <- c(t2$conf.int[2], t4$conf.int[2]))

## [1] 0.9094776 0.9842578

(p_values <- c(t2$p.value, t4$p.value))

## [1] 1.821658e-04 1.946501e-08

Test if proportion of narrow latent scope biases is different between conditions:

prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "two.sided", correct = F)

## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 2.635, df = 1, p-value = 0.1045
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.27341956  0.02341956
## sample estimates:
## prop 1 prop 2 
##  0.800  0.925

3.1 Analysis with additional test query condition

The additional between-subjects condition was one in which the unobserved feature wasn’t mentioned at all in the test query. In what follows, the graphs from above will be recreated and the additional condition will be included.

counts <- tdata_long %>%
  group_by(Knowledge, Features, rating_rec) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

## `summarise()` has grouped output by 'Knowledge', 'Features'. You can override
## using the `.groups` argument.

counts

## # A tibble: 8 × 6
## # Groups:   Knowledge, Features [4]
##   Knowledge        Features                         rating_rec     n   pct lbl  
##   <fct>            <fct>                            <fct>      <int> <dbl> <chr>
## 1 'you don't know' "feature diagnosability: \nboth… broad          8 0.2   20%  
## 2 'you don't know' "feature diagnosability: \nboth… narrow        32 0.8   80%  
## 3 'you don't know' "feature diagnosability: \nlate… broad          3 0.075 8%   
## 4 'you don't know' "feature diagnosability: \nlate… narrow        37 0.925 92%  
## 5 no information   "feature diagnosability: \nboth… broad          5 0.125 12%  
## 6 no information   "feature diagnosability: \nboth… narrow        35 0.875 88%  
## 7 no information   "feature diagnosability: \nlate… broad          7 0.175 18%  
## 8 no information   "feature diagnosability: \nlate… narrow        33 0.825 82%

counts$category <- factor(counts$rating_rec, levels = c("unbiased", "narrow", "broad"), labels = c("unbiased", "narrow l.s.", "broad l.s."))
counts

## # A tibble: 8 × 7
## # Groups:   Knowledge, Features [4]
##   Knowledge        Features                rating_rec     n   pct lbl   category
##   <fct>            <fct>                   <fct>      <int> <dbl> <chr> <fct>   
## 1 'you don't know' "feature diagnosabilit… broad          8 0.2   20%   broad l…
## 2 'you don't know' "feature diagnosabilit… narrow        32 0.8   80%   narrow …
## 3 'you don't know' "feature diagnosabilit… broad          3 0.075 8%    broad l…
## 4 'you don't know' "feature diagnosabilit… narrow        37 0.925 92%   narrow …
## 5 no information   "feature diagnosabilit… broad          5 0.125 12%   broad l…
## 6 no information   "feature diagnosabilit… narrow        35 0.875 88%   narrow …
## 7 no information   "feature diagnosabilit… broad          7 0.175 18%   broad l…
## 8 no information   "feature diagnosabilit… narrow        33 0.825 82%   narrow …

Get proportion CIs for different categories in each diagnosability condition:

library(PropCIs)
library(DescTools)
library(purrr)

counts_dontknow_spear <- subset(counts, Knowledge == "'you don't know'" & Features == "feature diagnosability: \nboth similar")
counts_dontknow_feather <- subset(counts, Knowledge == "'you don't know'" & Features == "feature diagnosability: \nlatent feature harder")

counts_noinf_spear <- subset(counts, Knowledge == "no information" & Features == "feature diagnosability: \nboth similar")
counts_noinf_feather <- subset(counts, Knowledge == "no information" & Features == "feature diagnosability: \nlatent feature harder")


(MultinomCI(counts_dontknow_spear$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci_1)

##      est lwr.ci    upr.ci
## [1,] 0.2    0.1 0.3272464
## [2,] 0.8    0.7 0.9272464

(MultinomCI(counts_dontknow_feather$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci_2)

##        est lwr.ci   upr.ci
## [1,] 0.075  0.025 0.159792
## [2,] 0.925  0.875 1.000000

(MultinomCI(counts_noinf_spear$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci_3)

##        est lwr.ci    upr.ci
## [1,] 0.125   0.05 0.2293201
## [2,] 0.875   0.80 0.9793201

(MultinomCI(counts_noinf_feather$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci_4)

##        est lwr.ci    upr.ci
## [1,] 0.175  0.075 0.2868133
## [2,] 0.825  0.725 0.9368133

ci_low <- c(selection_ci_1[,2], selection_ci_2[,2], selection_ci_3[,2], selection_ci_4[,2])

ci_up <- c(selection_ci_1[,3], selection_ci_2[,3], selection_ci_3[,3], selection_ci_4[,3])

plotdata <- counts

plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up

Plot:

library(scales)
theme_set(theme_light(base_size = 12, base_family = "Poppins"))

g<- ggplot(plotdata, 
       aes(x = category,
           y = pct,
           fill = Features)) +
  facet_grid(Knowledge ~ Features)+
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 3.5,
            position = position_dodge(width = 1),
            vjust = -5.5) +
  scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  #scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Explanatory preference",
       x = "Explanatory preference")+
  geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct, 
   #        ymin = plotdata$ci_low, 
    #       ymax = plotdata$ci_up, 
     #      colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "none", axis.title = element_text(size = 20), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13),strip.text.x = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g