tdata <- read_csv("exp_data.csv")
## Rows: 560 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): subj_code, desktop_conf, attent_conf, prevalence, latent_feature, ...
## dbl  (5): condition, instr_tests, LS_bias_probe_rating, age, rating_rec
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

1 Subject demographics

# demographics 

min(tdata$age)
## [1] 18
max(tdata$age)
## [1] 85
mean(tdata$age)
## [1] 41.09107
sd(tdata$age)
## [1] 13.92982
# 1 = male, 2 = female, 3 = other
table(tdata$gender)
## 
##              1: male            2: female        3: non-binary 
##                  276                  280                    3 
## 4: prefer not to say 
##                    1

1 = male, 2 = female, 3 = non-binary, 4 = prefer not to say

Check n in each condition:

table(tdata$condition)
## 
##  1  2  3  4  5  6  7  8 
## 70 70 70 70 70 70 70 70

There are 8 conditions because what was manipulated whether rate information was given or not, the latent feature (beak vs. feet), varied and counterbalanced the orientation of the rating scale (narrow left vs. narrow right).

# make rate information factor a factor (its called "prevalence" in the data frame)

tdata$prevalence <- factor(tdata$prevalence, levels = c("omitted", "presented"), labels = c("omitted", "presented"))

2 Graphs and summaries

2.1 Rating distribution

myTheme <- theme(plot.title = element_text(face="bold", size = 22),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        axis.text.x = element_text(size = 14, angle = 0), 
        axis.text.y = element_text(size = 16, angle = 0),
        legend.text = element_text(size = 18),
        legend.title = element_text(face = "bold", size = 18),
        strip.text.x = element_text(size = 18),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        panel.background = element_blank(), 
        axis.line.x = element_line(colour = "black"), 
        axis.line.y = element_line(colour = "black"),
        axis.text = element_text(colour ="black"), 
        axis.ticks = element_line(colour ="black"))

tdata_long <- tdata
tdata_sub <- tdata_long


library(see)
## first, turn sID into a factor
tdata_sub$subj_code <- factor(tdata_sub$subj_code)

pd <- position_dodge(width = 0.3)

tdata_sub$valueJitter <- jitter(tdata_sub$rating_rec, factor = 0.01, amount = 0.004)

theme_set(theme_light(base_size = 20, base_family = "Poppins"))

# new labes for the facets 

g <- ggplot(tdata_sub, aes(x = prevalence, y = valueJitter)) +
  guides(fill=FALSE)+
  #facet_grid( ~ Knowledge)+
  #ggtitle("Subjects' causal srength ratings") +
  scale_y_continuous(limits = c(-5.3, 5.3), breaks=seq(-5, 5, 1), expand = c(0,0)) +
  #scale_x_discrete(labels=c("Spear and \nnet", "Feathers and \ntooth")) +
  #stat_summary(fun.y = mean, geom = "bar", position = "dodge", colour = "black", alpha =0.5) +
  geom_violinhalf(aes(y = rating_rec, group = prevalence, fill = prevalence), color = NA, 
                  position=position_dodge(1), alpha = 0.4)+
  #geom_line(position = pd, color = "black", size = 1, alpha=0.04) +
  geom_hline(yintercept=0, linetype="dashed", color = "black")+
  geom_jitter(aes(color = prevalence), alpha = 0.5, width = 0.15, height = 0.2) +
  stat_summary(aes(y = rating_rec, group=1), fun.data = mean_cl_boot, 
               geom = "errorbar", width = 0, size = 1) +
  stat_summary(aes(y = rating_rec, group=1), fun.y=mean, geom="line", 
               color = "black", shape = 22, size = 1, alpha = .7)+
  stat_summary(aes(y = rating_rec, group=1, fill = prevalence), fun.y=mean, geom="point", 
               color = "black", shape = 22, size = 2, group=1, alpha = 1)+
  stat_summary(aes(y = rating_rec,group=1), fun.y=median, geom="point", color = "black", shape = 3, size = 4, 
               group=1, alpha = 1, position = position_dodge(width = 0.5))+
  labs(x = "Feature rate information", y = "Explanation rating") +
  scale_color_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  annotate("text", x = 0.5, y = 3.5, label = c("broad-scope"), angle = 90)+
  annotate("text", x = 0.5, y = -3.5, label = c("narrow-scope"), angle = 90)+
  theme(legend.position = "none")+
  myTheme+
  theme(panel.grid.major = element_line(color = "lightgrey",
                                        size = 0.5,
                                        linetype = 'dotted'))+
  stat_summary(aes(label=round(after_stat(y),2)), fun.y=mean, geom="text", size=5,
             vjust = -6)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in stat_summary(aes(y = rating_rec, group = 1), fun.y = mean, geom =
## "line", : Ignoring unknown parameters: `shape`
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
g

ggsave("results_means.svg",width=4.5,height=5)
ggsave("results_means.pdf",width=4.5,height=5)
#ggsave("results_means_selection.pdf",width=11,height=5)
library(ggridges)
## Warning: Paket 'ggridges' wurde unter R Version 4.2.3 erstellt
g2 <- ggplot(tdata_long, aes(x = rating_rec, y = prevalence, fill = prevalence)) +
  scale_x_continuous(breaks = seq(-5, 5, 1))+
  geom_density_ridges(alpha = 0.5)+
   #stat_summary(aes(x = rating_rec), fun.x=mean, geom="point", 
  #             color = "black", shape = 22, size = 2, group=1, alpha = 1)+
  scale_fill_manual(values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  #scale_fill_viridis_c(name = "Explanation \nRating", option = "C", breaks=c(-5,0,5), labels=c("narrow scope", "no preference", "broad scope"))+
  labs(x = "Explanation rating", y = "Test question") +
  scale_y_discrete(limits=rev)+
  myTheme+
  theme_light(base_family = "Poppins", base_size = 20)+
  theme(panel.grid = element_blank(), axis.text = element_text(colour ="black"))+
  theme(legend.position="none",
        legend.title=element_blank(),legend.key.width = unit(1.95, 'cm'))+
  annotate("text", y = 0.7, x = 4, label = c("broad-scope"), angle = 0)+
  annotate("text", y = 0.7, x = -4, label = c("narrow-scope"), angle = 0)+
  theme(axis.text.y = element_text(size = 14, angle = 0))

g2
## Picking joint bandwidth of 0.388

ggsave("results_dist.svg",width=7,height=5)
## Picking joint bandwidth of 0.388
ggsave("results_dist.pdf",width=6,height=5)
## Picking joint bandwidth of 0.388

3 Test difference of group means

Note that with \(N~= 560\), the sample size is at \(0.7N\) of the planned final sample size (\(N=800\)). At this point, we’re thus at the planned interim look.

The critical p-value (one-sided) at this point:

library(rpact)
## Warning: Paket 'rpact' wurde unter R Version 4.2.3 erstellt
design <- getDesignGroupSequential(
  typeOfDesign = "asP", 
  informationRates = c(0.7, 1), 
  alpha = 0.05, 
  sided = 1)
design$stageLevels * 1
## [1] 0.03948640 0.02670694

is \(p~= .0395\).

Conduct t-Test:

first, check if variances are equal or not:

(tdata_long %>%
  group_by(prevalence) %>%
  summarise_at(vars(rating_rec), list(name=var)) -> var)
## # A tibble: 2 × 2
##   prevalence  name
##   <fct>      <dbl>
## 1 omitted     1.50
## 2 presented   2.07

Variances unequal, so apply correction in T-Test function below:

t.test(rating_rec ~ prevalence, data = tdata_long, var.equal = F, alternative = "greater", paired = F) # two sided is used here to get the full confidence interval of the difference; note, however, that the hypothesis was a directed one (thus, alternative = "less" could have been used to get the one-sided p-value). 
## 
##  Welch Two Sample t-test
## 
## data:  rating_rec by prevalence
## t = 2.0871, df = 544.1, p-value = 0.01867
## alternative hypothesis: true difference in means between group omitted and group presented is greater than 0
## 95 percent confidence interval:
##  0.04963351        Inf
## sample estimates:
##   mean in group omitted mean in group presented 
##             -0.03928571             -0.27500000

empirical \(p\) is smaller than the critical \(p\) determined for the interim look. This also means that data collection can be terminated at this point.

Cohen’s d:

# Cohen's d
lsr::cohensD(tdata_long$rating_rec ~ tdata_long$prevalence)
## [1] 0.1763956

Get the values of the CIs of the means:

# check width of the means 95% cis (is not supposed to be grater than 0.5)
library(rcompanion)
ci_table <- groupwiseMean(rating_rec ~ prevalence,
              data        = tdata,
              traditional = FALSE,
              percentile  = TRUE)


ci_table
##   prevalence   n    Mean Conf.level Percentile.lower Percentile.upper
## 1    omitted 280 -0.0393       0.95           -0.186            0.100
## 2  presented 280 -0.2750       0.95           -0.450           -0.114
(ci_width <- ci_table$Percentile.upper - ci_table$Percentile.lower)
## [1] 0.286 0.336

get group medians:

# groupwiseMean(rating_rec ~ Features + Knowledge,
#               data        = tdata_long,
#               traditional = FALSE,
#               percentile  = TRUE)

groupwiseMedian(rating_rec ~ prevalence,
                data        = tdata_long,
                bca         = FALSE,
                percentile  = TRUE,
                R           = 1000)
##   prevalence   n Median Conf.level Percentile.lower Percentile.upper
## 1    omitted 280      0       0.95                0                0
## 2  presented 280      0       0.95                0                0

4 Subject categories

counts <- tdata_long %>%
  group_by(prevalence, rating_rec) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))
## `summarise()` has grouped output by 'prevalence'. You can override using the
## `.groups` argument.
counts
## # A tibble: 20 × 5
## # Groups:   prevalence [2]
##    prevalence rating_rec     n     pct lbl   
##    <fct>           <dbl> <int>   <dbl> <chr> 
##  1 omitted            -5     8 0.0286  2.86% 
##  2 omitted            -3     2 0.00714 0.71% 
##  3 omitted            -2     5 0.0179  1.79% 
##  4 omitted            -1     1 0.00357 0.36% 
##  5 omitted             0   249 0.889   88.93%
##  6 omitted             1     3 0.0107  1.07% 
##  7 omitted             2     5 0.0179  1.79% 
##  8 omitted             4     2 0.00714 0.71% 
##  9 omitted             5     5 0.0179  1.79% 
## 10 presented          -5    11 0.0393  3.93% 
## 11 presented          -4     6 0.0214  2.14% 
## 12 presented          -3     7 0.025   2.50% 
## 13 presented          -2     4 0.0143  1.43% 
## 14 presented          -1     6 0.0214  2.14% 
## 15 presented           0   235 0.839   83.93%
## 16 presented           1     1 0.00357 0.36% 
## 17 presented           2     2 0.00714 0.71% 
## 18 presented           3     3 0.0107  1.07% 
## 19 presented           4     2 0.00714 0.71% 
## 20 presented           5     3 0.0107  1.07%
# shows that 0 is the mode in all conditions
tdata_long$category[tdata_long$rating_rec < 0] <- "narrow"
## Warning: Unknown or uninitialised column: `category`.
tdata_long$category[tdata_long$rating_rec == 0] <- "unbiased"
tdata_long$category[tdata_long$rating_rec > 0] <- "broad"

counts2 <- tdata_long %>%
  group_by(prevalence, category) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))
## `summarise()` has grouped output by 'prevalence'. You can override using the
## `.groups` argument.
counts2
## # A tibble: 6 × 5
## # Groups:   prevalence [2]
##   prevalence category     n    pct lbl   
##   <fct>      <chr>    <int>  <dbl> <chr> 
## 1 omitted    broad       15 0.0536 5.36% 
## 2 omitted    narrow      16 0.0571 5.71% 
## 3 omitted    unbiased   249 0.889  88.93%
## 4 presented  broad       11 0.0393 3.9%  
## 5 presented  narrow      34 0.121  12.1% 
## 6 presented  unbiased   235 0.839  83.9%
counts2$category <- factor(counts2$category, levels = c("unbiased", "narrow", "broad"), labels = c("unbiased", "narrow l.s.", "broad l.s."))
counts2
## # A tibble: 6 × 5
## # Groups:   prevalence [2]
##   prevalence category        n    pct lbl   
##   <fct>      <fct>       <int>  <dbl> <chr> 
## 1 omitted    broad l.s.     15 0.0536 5.36% 
## 2 omitted    narrow l.s.    16 0.0571 5.71% 
## 3 omitted    unbiased      249 0.889  88.93%
## 4 presented  broad l.s.     11 0.0393 3.9%  
## 5 presented  narrow l.s.    34 0.121  12.1% 
## 6 presented  unbiased      235 0.839  83.9%

Get proportion CIs for different conditions:

library(PropCIs)
library(DescTools)
## 
## Attache Paket: 'DescTools'
## Das folgende Objekt ist maskiert 'package:data.table':
## 
##     %like%
library(purrr)

counts_presented <- subset(counts2, prevalence == "presented")

counts_omitted <- subset(counts2, prevalence == "omitted")


(MultinomCI(counts_omitted$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci_1)
##             est     lwr.ci     upr.ci
## [1,] 0.05357143 0.02142857 0.08832704
## [2,] 0.05714286 0.02500000 0.09189847
## [3,] 0.88928571 0.85714286 0.92404133
(MultinomCI(counts_presented$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci_2)
##             est     lwr.ci     upr.ci
## [1,] 0.03928571 0.00000000 0.08102427
## [2,] 0.12142857 0.08214286 0.16316713
## [3,] 0.83928571 0.80000000 0.88102427
ci_low <- c(selection_ci_1[,2], selection_ci_2[,2])

ci_up <- c(selection_ci_1[,3], selection_ci_2[,3])
plotdata <- counts2

plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up

Plot:

library(scales)
## 
## Attache Paket: 'scales'
## Das folgende Objekt ist maskiert 'package:purrr':
## 
##     discard
## Das folgende Objekt ist maskiert 'package:readr':
## 
##     col_factor
theme_set(theme_light(base_size = 12, base_family = "Poppins"))

# new labels for the facets 
rates.labs <- c("feature rate information: \nomitted", "feature rate information: \npresented")
names(rates.labs) <- c("omitted", "presented")


g<- ggplot(plotdata, 
       aes(x = category,
           y = pct,
           fill = prevalence)) +
  facet_grid( ~ prevalence, labeller = labeller(prevalence = rates.labs))+
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4,
            position = position_dodge(width = 1),
            vjust = -2.6) +
  scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
  #scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Explanatory preference",
       x = "Explanatory preference")+
  geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct, 
   #        ymin = plotdata$ci_low, 
    #       ymax = plotdata$ci_up, 
     #      colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "none", axis.title = element_text(size = 20), axis.text = element_text(size = 13, color = "black"),
        legend.text = element_text(size = 13),legend.title = element_text(size = 13),strip.text.x = element_text(size = 13))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

g

#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories.svg",width=7,height=5)
ggsave("categories.pdf",width=7,height=5)

Proportion tests:

  1. Test if proportion of normative judgments is higher in “omitted”
normative <- prop.test(x = c(plotdata$n[3], plotdata$n[6]), n = c(280, 280), alternative = "greater", correct = F)
normative
## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[3], plotdata$n[6]) out of c(280, 280)
## X-squared = 2.9839, df = 1, p-value = 0.04205
## alternative hypothesis: greater
## 95 percent confidence interval:
##  0.002516301 1.000000000
## sample estimates:
##    prop 1    prop 2 
## 0.8892857 0.8392857
  1. Test if proportion of narrow latent scope biases is higher in “presented”
narrowlsb <- prop.test(x = c(plotdata$n[5], plotdata$n[2]), n = c(280, 280), alternative = "greater", correct = F)
narrowlsb
## 
##  2-sample test for equality of proportions without continuity correction
## 
## data:  c(plotdata$n[5], plotdata$n[2]) out of c(280, 280)
## X-squared = 7.1153, df = 1, p-value = 0.003821
## alternative hypothesis: greater
## 95 percent confidence interval:
##  0.02489729 1.00000000
## sample estimates:
##     prop 1     prop 2 
## 0.12142857 0.05714286

5 Remaining results (additional test cases)

Blue feet probe:

counts_bluefeet <- tdata_long %>%
  group_by(bluefeet_choice) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

counts_bluefeet
## # A tibble: 7 × 4
##   bluefeet_choice                                                n     pct lbl  
##   <chr>                                                      <int>   <dbl> <chr>
## 1 This duck has PIX67 mutation (blue beak and blue feet) an…     3 0.00536 0.54%
## 2 This duck has a NAX20 mutation (blue feet).                  547 0.977   97.6…
## 3 This duck has a PIX67 mutation (blue beak and blue feet) …     1 0.00179 0.18%
## 4 This duck has a PIX67 mutation (blue beak and blue feet).      3 0.00536 0.54%
## 5 This duck has a TOX20 mutation (blue beak) and a NAX20 mu…     1 0.00179 0.18%
## 6 This duck has a TOX20 mutation (blue beak).                    2 0.00357 0.36%
## 7 This duck has no mutation affecting beak and feet color.       3 0.00536 0.54%
counts_bluefeet$bluefeet_choice <- factor(counts_bluefeet$bluefeet_choice, 
                                          levels = c("This duck has no mutation affecting beak and feet color.",
                                                     "This duck has a TOX20 mutation (blue beak).",
                                                     "This duck has a NAX20 mutation (blue feet).",
                                                     "This duck has a PIX67 mutation (blue beak and blue feet).",
                                                     "This duck has a TOX20 mutation (blue beak) and a NAX20 mutation (blue feet).",
                                                     "This duck has PIX67 mutation (blue beak and blue feet) and a TOX20 mutation (blue beak).",
                                                     "This duck has a PIX67 mutation (blue beak and blue feet) and NAX20 mutation (blue feet)."
                                                     ),
                                          labels = c("no mutation", "mutation affecting \nonly beak", "mutation affecting \nonly feet",
                                                     "mutation affecting \nbeak and feet", 
                                                     "mutation affecting \nonly beak + \nmutation affecting \nonly feet", 
                                                     "mutation affecting \nbeak and feet + \nmutation affecting \nonly beak",
                                                     "mutation affecting \nbeak and feet + \nmutation affecting \nonly feet"))
library(PropCIs)
library(DescTools)
library(purrr)

(MultinomCI(counts_bluefeet$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci)
##              est    lwr.ci     upr.ci
## [1,] 0.005357143 0.0000000 0.01737179
## [2,] 0.976785714 0.9678571 0.98880036
## [3,] 0.001785714 0.0000000 0.01380036
## [4,] 0.005357143 0.0000000 0.01737179
## [5,] 0.001785714 0.0000000 0.01380036
## [6,] 0.003571429 0.0000000 0.01558607
## [7,] 0.005357143 0.0000000 0.01737179
ci_low <- c(selection_ci[,2])

ci_up <- c(selection_ci[,3])
plotdata <- counts_bluefeet

plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up

Plot:

library(scales)
theme_set(theme_light(base_size = 12, base_family = "Poppins"))

g<- ggplot(plotdata, 
       aes(x = bluefeet_choice,
           y = pct,
           fill = bluefeet_choice)) +
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels=c("BkFt2", "Ft1")) +
  #coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4,
            position = position_dodge(width = 1),
            vjust = -1.5) +
  scale_fill_manual(name = "Selected explanation",values=c("#66c2a5", "#e78ac3", "#e6ab02", "#8da0cb", "#a6d854", "#a65628", "#e41a1c"))+
  #scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Explanatory preference",
       x = "Explanatory preference")+
  geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct, 
   #        ymin = plotdata$ci_low, 
    #       ymax = plotdata$ci_up, 
     #      colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "right", axis.title = element_blank(), axis.text.y = element_text(size = 14, color = "black"),
        axis.text.x = element_blank(),
        legend.text = element_text(size = 12),legend.title = element_text(size = 13), legend.spacing.y = unit(0.3, 'cm'))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.title.x = element_blank())+
    guides(fill = guide_legend(byrow = TRUE))

g

#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories_bluefeet.svg",width=8,height=5)
ggsave("categories_bluefeet.pdf",width=8,height=5)

Blue beak probe:

counts_bluebeak <- tdata_long %>%
  group_by(bluebeak_choice) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct, accuracy = 0.1))

counts_bluebeak
## # A tibble: 5 × 4
##   bluebeak_choice                                                n     pct lbl  
##   <chr>                                                      <int>   <dbl> <chr>
## 1 This duck has a NAX20 mutation (blue feet).                   12 0.0214  2.1% 
## 2 This duck has a PIX67 mutation (blue beak and blue feet).      1 0.00179 0.2% 
## 3 This duck has a TOX20 mutation (blue beak) and a NAX20 mu…     4 0.00714 0.7% 
## 4 This duck has a TOX20 mutation (blue beak).                  540 0.964   96.4%
## 5 This duck has no mutation affecting beak and feet color.       3 0.00536 0.5%
# add the missing categories to the data frame (to have them in the plot later)

counts_bluebeak[nrow(counts_bluebeak) + 1,] <- list("This duck has a PIX67 mutation (blue beak and blue feet) and NAX20 mutation (blue feet).", 0, 0.00, "0%")


counts_bluebeak[nrow(counts_bluebeak) + 1,] <- list("This duck has PIX67 mutation (blue beak and blue feet) and a TOX20 mutation (blue beak).", 0, 0.00, "0%")


counts_bluebeak
## # A tibble: 7 × 4
##   bluebeak_choice                                                n     pct lbl  
##   <chr>                                                      <int>   <dbl> <chr>
## 1 This duck has a NAX20 mutation (blue feet).                   12 0.0214  2.1% 
## 2 This duck has a PIX67 mutation (blue beak and blue feet).      1 0.00179 0.2% 
## 3 This duck has a TOX20 mutation (blue beak) and a NAX20 mu…     4 0.00714 0.7% 
## 4 This duck has a TOX20 mutation (blue beak).                  540 0.964   96.4%
## 5 This duck has no mutation affecting beak and feet color.       3 0.00536 0.5% 
## 6 This duck has a PIX67 mutation (blue beak and blue feet) …     0 0       0%   
## 7 This duck has PIX67 mutation (blue beak and blue feet) an…     0 0       0%
counts_bluebeak$bluebeak_choice <- factor(counts_bluebeak$bluebeak_choice, 
                                          levels = c("This duck has no mutation affecting beak and feet color.",
                                                     "This duck has a TOX20 mutation (blue beak).",
                                                     "This duck has a NAX20 mutation (blue feet).",
                                                     "This duck has a PIX67 mutation (blue beak and blue feet).",
                                                     "This duck has a TOX20 mutation (blue beak) and a NAX20 mutation (blue feet).",
                                                     "This duck has PIX67 mutation (blue beak and blue feet) and a TOX20 mutation (blue beak).",
                                                     "This duck has a PIX67 mutation (blue beak and blue feet) and NAX20 mutation (blue feet)."
                                                     ),
                                          labels = c("no mutation", "mutation affecting \nonly beak", "mutation affecting \nonly feet",
                                                     "mutation affecting \nbeak and feet", 
                                                     "mutation affecting \nonly beak + \nmutation affecting \nonly feet", 
                                                     "mutation affecting \nbeak and feet + \nmutation affecting \nonly beak",
                                                     "mutation affecting \nbeak and feet + \nmutation affecting \nonly feet"))
library(PropCIs)
library(DescTools)
library(purrr)

(MultinomCI(counts_bluebeak$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci)
##              est      lwr.ci     upr.ci
## [1,] 0.021428571 0.008928571 0.03588146
## [2,] 0.001785714 0.000000000 0.01623860
## [3,] 0.007142857 0.000000000 0.02159574
## [4,] 0.964285714 0.951785714 0.97873860
## [5,] 0.005357143 0.000000000 0.01981003
## [6,] 0.000000000 0.000000000 0.01445289
## [7,] 0.000000000 0.000000000 0.01445289
ci_low <- c(selection_ci[,2])

ci_up <- c(selection_ci[,3])
plotdata <- counts_bluebeak

plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up

Plot:

library(scales)
theme_set(theme_light(base_size = 12, base_family = "Poppins"))

g<- ggplot(plotdata, 
       aes(x = bluebeak_choice,
           y = pct,
           fill = bluebeak_choice)) +
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels=c("Bk1", "Ft1")) +
  #coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4,
            position = position_dodge(width = 1),
            vjust = -1.5) +
  scale_fill_manual(name = "Selected explanation",values=c("#66c2a5", "#e78ac3", "#e6ab02", "#8da0cb", "#a6d854", "#a65628", "#e41a1c"))+
  #scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Explanatory preference",
       x = "Explanatory preference")+
  geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct, 
   #        ymin = plotdata$ci_low, 
    #       ymax = plotdata$ci_up, 
     #      colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "right", axis.title = element_blank(), axis.text.y = element_text(size = 14, color = "black"),
        axis.text.x = element_blank(),
        legend.text = element_text(size = 12),legend.title = element_text(size = 13), legend.spacing.y = unit(0.3, 'cm'))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.title.x = element_blank())+
    guides(fill = guide_legend(byrow = TRUE))

g

#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories_bluebeak.svg",width=8,height=5)
ggsave("categories_bluebeak.pdf",width=8,height=5)

typical duck probe:

counts_typical <- tdata_long %>%
  group_by(typical_choice) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

counts_typical
## # A tibble: 5 × 4
##   typical_choice                                                 n     pct lbl  
##   <chr>                                                      <int>   <dbl> <chr>
## 1 This duck has PIX67 mutation (blue beak and blue feet) an…     2 0.00357 0.36%
## 2 This duck has a NAX20 mutation (blue feet).                    7 0.0125  1.25%
## 3 This duck has a PIX67 mutation (blue beak and blue feet).      1 0.00179 0.18%
## 4 This duck has a TOX20 mutation (blue beak) and a NAX20 mu…     2 0.00357 0.36%
## 5 This duck has no mutation affecting beak and feet color.     548 0.979   97.8…
# add the missing categories to the data frame (to have them in the plot later)


counts_typical[nrow(counts_typical) + 1,] <- list("This duck has a PIX67 mutation (blue beak and blue feet) and NAX20 mutation (blue feet).", 0, 0.00, "0%")

counts_typical[nrow(counts_typical) + 1,] <- list("This duck has a TOX20 mutation (blue beak).", 0, 0.00, "0%")


counts_typical
## # A tibble: 7 × 4
##   typical_choice                                                 n     pct lbl  
##   <chr>                                                      <int>   <dbl> <chr>
## 1 This duck has PIX67 mutation (blue beak and blue feet) an…     2 0.00357 0.36%
## 2 This duck has a NAX20 mutation (blue feet).                    7 0.0125  1.25%
## 3 This duck has a PIX67 mutation (blue beak and blue feet).      1 0.00179 0.18%
## 4 This duck has a TOX20 mutation (blue beak) and a NAX20 mu…     2 0.00357 0.36%
## 5 This duck has no mutation affecting beak and feet color.     548 0.979   97.8…
## 6 This duck has a PIX67 mutation (blue beak and blue feet) …     0 0       0%   
## 7 This duck has a TOX20 mutation (blue beak).                    0 0       0%
counts_typical$typical_choice <- factor(counts_typical$typical_choice, 
                                          levels = c("This duck has no mutation affecting beak and feet color.",
                                                     "This duck has a TOX20 mutation (blue beak).",
                                                     "This duck has a NAX20 mutation (blue feet).",
                                                     "This duck has a PIX67 mutation (blue beak and blue feet).",
                                                     "This duck has a TOX20 mutation (blue beak) and a NAX20 mutation (blue feet).",
                                                     "This duck has PIX67 mutation (blue beak and blue feet) and a TOX20 mutation (blue beak).",
                                                     "This duck has a PIX67 mutation (blue beak and blue feet) and NAX20 mutation (blue feet)."
                                                     ),
                                          labels = c("no mutation", "mutation affecting \nonly beak", "mutation affecting \nonly feet",
                                                     "mutation affecting \nbeak and feet", 
                                                     "mutation affecting \nonly beak + \nmutation affecting \nonly feet", 
                                                     "mutation affecting \nbeak and feet + \nmutation affecting \nonly beak",
                                                     "mutation affecting \nbeak and feet + \nmutation affecting \nonly feet"))


counts_typical
## # A tibble: 7 × 4
##   typical_choice                                                 n     pct lbl  
##   <fct>                                                      <int>   <dbl> <chr>
## 1 "mutation affecting \nbeak and feet + \nmutation affectin…     2 0.00357 0.36%
## 2 "mutation affecting \nonly feet"                               7 0.0125  1.25%
## 3 "mutation affecting \nbeak and feet"                           1 0.00179 0.18%
## 4 "mutation affecting \nonly beak + \nmutation affecting \n…     2 0.00357 0.36%
## 5 "no mutation"                                                548 0.979   97.8…
## 6 "mutation affecting \nbeak and feet + \nmutation affectin…     0 0       0%   
## 7 "mutation affecting \nonly beak"                               0 0       0%
library(PropCIs)
library(DescTools)
library(purrr)

(MultinomCI(counts_typical$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci)
##              est      lwr.ci     upr.ci
## [1,] 0.003571429 0.000000000 0.01513640
## [2,] 0.012500000 0.003571429 0.02406498
## [3,] 0.001785714 0.000000000 0.01335069
## [4,] 0.003571429 0.000000000 0.01513640
## [5,] 0.978571429 0.969642857 0.99013640
## [6,] 0.000000000 0.000000000 0.01156498
## [7,] 0.000000000 0.000000000 0.01156498
ci_low <- c(selection_ci[,2])

ci_up <- c(selection_ci[,3])
plotdata <- counts_typical

plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up

Plot:

library(scales)
theme_set(theme_light(base_size = 12, base_family = "Poppins"))

g<- ggplot(plotdata, 
       aes(x = typical_choice,
           y = pct,
           fill = typical_choice)) +
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels=c("No mutation")) +
  #coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4,
            position = position_dodge(width = 1),
            vjust = -1.5) +
  scale_fill_manual(name = "Selected explanation",values=c("#66c2a5", "#e78ac3", "#e6ab02", "#8da0cb", "#a6d854", "#a65628", "#e41a1c"))+
  #scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Explanatory preference",
       x = "Explanatory preference")+
  geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct, 
   #        ymin = plotdata$ci_low, 
    #       ymax = plotdata$ci_up, 
     #      colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "right", axis.title = element_blank(), axis.text.y = element_text(size = 14, color = "black"),
        axis.text.x = element_blank(),
        legend.text = element_text(size = 12),legend.title = element_text(size = 13), legend.spacing.y = unit(0.3, 'cm'))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.title.x = element_blank())+
    guides(fill = guide_legend(byrow = TRUE))

g

#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories_typical.svg",width=8,height=5)
ggsave("categories_typical.pdf",width=8,height=5)
counts_both <- tdata_long %>%
  group_by(bluefeet_bluebeak_choice) %>%
  summarize(n = n()) %>% 
  mutate(pct = n/sum(n),
         lbl = scales::percent(pct))

counts_both 
## # A tibble: 7 × 4
##   bluefeet_bluebeak_choice                                       n     pct lbl  
##   <chr>                                                      <int>   <dbl> <chr>
## 1 This duck has PIX67 mutation (blue beak and blue feet) an…    22 0.0393  3.93%
## 2 This duck has a NAX20 mutation (blue feet).                    2 0.00357 0.36%
## 3 This duck has a PIX67 mutation (blue beak and blue feet) …    29 0.0518  5.18%
## 4 This duck has a PIX67 mutation (blue beak and blue feet).    485 0.866   86.6…
## 5 This duck has a TOX20 mutation (blue beak) and a NAX20 mu…    20 0.0357  3.57%
## 6 This duck has a TOX20 mutation (blue beak).                    1 0.00179 0.18%
## 7 This duck has no mutation affecting beak and feet color.       1 0.00179 0.18%
counts_both$bluefeet_bluebeak_choice <- factor(counts_both$bluefeet_bluebeak_choice, 
                                          levels = c("This duck has no mutation affecting beak and feet color.",
                                                     "This duck has a TOX20 mutation (blue beak).",
                                                     "This duck has a NAX20 mutation (blue feet).",
                                                     "This duck has a PIX67 mutation (blue beak and blue feet).",
                                                     "This duck has a TOX20 mutation (blue beak) and a NAX20 mutation (blue feet).",
                                                     "This duck has PIX67 mutation (blue beak and blue feet) and a TOX20 mutation (blue beak).",
                                                     "This duck has a PIX67 mutation (blue beak and blue feet) and NAX20 mutation (blue feet)."
                                                     ),
                                          labels = c("no mutation", "mutation affecting \nonly beak", "mutation affecting \nonly feet",
                                                     "mutation affecting \nbeak and feet", 
                                                     "mutation affecting \nonly beak + \nmutation affecting \nonly feet", 
                                                     "mutation affecting \nbeak and feet + \nmutation affecting \nonly beak",
                                                     "mutation affecting \nbeak and feet + \nmutation affecting \nonly feet"))


counts_both
## # A tibble: 7 × 4
##   bluefeet_bluebeak_choice                                       n     pct lbl  
##   <fct>                                                      <int>   <dbl> <chr>
## 1 "mutation affecting \nbeak and feet + \nmutation affectin…    22 0.0393  3.93%
## 2 "mutation affecting \nonly feet"                               2 0.00357 0.36%
## 3 "mutation affecting \nbeak and feet + \nmutation affectin…    29 0.0518  5.18%
## 4 "mutation affecting \nbeak and feet"                         485 0.866   86.6…
## 5 "mutation affecting \nonly beak + \nmutation affecting \n…    20 0.0357  3.57%
## 6 "mutation affecting \nonly beak"                               1 0.00179 0.18%
## 7 "no mutation"                                                  1 0.00179 0.18%
library(PropCIs)
library(DescTools)
library(purrr)

(MultinomCI(counts_both$n,
           conf.level=0.95,
           method="sisonglaz") -> selection_ci)
##              est     lwr.ci     upr.ci
## [1,] 0.039285714 0.01428571 0.06574373
## [2,] 0.003571429 0.00000000 0.03002944
## [3,] 0.051785714 0.02678571 0.07824373
## [4,] 0.866071429 0.84107143 0.89252944
## [5,] 0.035714286 0.01071429 0.06217230
## [6,] 0.001785714 0.00000000 0.02824373
## [7,] 0.001785714 0.00000000 0.02824373
ci_low <- c(selection_ci[,2])

ci_up <- c(selection_ci[,3])
plotdata <- counts_both

plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up

Plot:

library(scales)
theme_set(theme_light(base_size = 12, base_family = "Poppins"))

g<- ggplot(plotdata, 
       aes(x = bluefeet_bluebeak_choice,
           y = pct,
           fill = bluefeet_bluebeak_choice)) +
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_y_continuous(limits = seq(0, 2),
                     breaks = seq(0, 1, .25),
                     expand = c(0,0),
                     label = percent) +
  #scale_x_discrete(labels=c("BkFt2", "Bk1 + Ft1", "Bk1", "BkFt2 + Ft1", "BkFt2")) +
  #coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
  #coord_cartesian(clip = "off")+
  geom_text(aes(label = lbl), 
            size = 4,
            position = position_dodge(width = 1),
            vjust = -1.5) +
  scale_fill_manual(name = "Selected explanation",values=c("#66c2a5", "#e78ac3", "#e6ab02", "#8da0cb", "#a6d854", "#a65628", "#e41a1c"))+
  #scale_fill_brewer(palette = "Pastel1") +
  labs(y = "Percentage", 
       fill = "Explanatory preference",
       x = "Explanatory preference")+
  geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
  #annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
  #annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct, 
   #        ymin = plotdata$ci_low, 
    #       ymax = plotdata$ci_up, 
     #      colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
  #annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
  theme(legend.position = "right", axis.title = element_blank(), axis.text.y = element_text(size = 14, color = "black"),
        axis.text.x = element_blank(),
        legend.text = element_text(size = 12),legend.title = element_text(size = 13), legend.spacing.y = unit(0.3, 'cm'))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.title.x = element_blank())+
    guides(fill = guide_legend(byrow = TRUE))

g

#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories_both.svg",width=8,height=5)
ggsave("categories_both.pdf",width=8,height=5)