tdata <- read_csv("exp_data.csv")
## Rows: 160 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): subj_code, desktop_conf, attent_conf, scale_orient, Scenario, expl...
## dbl (4): DV_rating, narrow_check, broad_check, age
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# demographics
min(tdata$age)
## [1] 20
max(tdata$age)
## [1] 72
mean(tdata$age)
## [1] 39.08125
sd(tdata$age)
## [1] 12.99006
# 1 = male, 2 = female, 3 = other
table(tdata$gender)
##
## 1: male 2: female
## 89 71
1 = male, 2 = female, 3 = non-binary, 4 = prefer not to say
##
## noInfo notKnow
## featherTooth 40 40
## spearNet 40 40
tdata_main <- subset(tdata_long, Knowledge == "'you don't know'")
tdata_supp <- subset(tdata_long, Knowledge == "no information")
counts <- tdata_main %>%
group_by(Features, rating_rec) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.
counts
## # A tibble: 4 × 5
## # Groups: Features [2]
## Features rating_rec n pct lbl
## <fct> <fct> <int> <dbl> <chr>
## 1 "feature diagnosability: \nboth similar" broad 8 0.2 20%
## 2 "feature diagnosability: \nboth similar" narrow 32 0.8 80%
## 3 "feature diagnosability: \nlatent feature harder" broad 3 0.075 8%
## 4 "feature diagnosability: \nlatent feature harder" narrow 37 0.925 92%
counts$category <- factor(counts$rating_rec, levels = c("unbiased", "narrow", "broad"), labels = c("unbiased", "narrow l.s.", "broad l.s."))
Get proportion CIs for different categories in each diagnosability condition:
library(PropCIs)
library(DescTools)
##
## Attache Paket: 'DescTools'
## Das folgende Objekt ist maskiert 'package:data.table':
##
## %like%
library(purrr)
counts_spear <- subset(counts, Features == "feature diagnosability: \nboth similar")
counts_feather <- subset(counts, Features == "feature diagnosability: \nlatent feature harder")
(MultinomCI(counts_spear$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_1)
## est lwr.ci upr.ci
## [1,] 0.2 0.1 0.3272464
## [2,] 0.8 0.7 0.9272464
(MultinomCI(counts_feather$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_2)
## est lwr.ci upr.ci
## [1,] 0.075 0.025 0.159792
## [2,] 0.925 0.875 1.000000
ci_low <- c(selection_ci_1[,2], selection_ci_2[,2])
ci_up <- c(selection_ci_1[,3], selection_ci_2[,3])
plotdata <- counts
plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up
Plot:
library(scales)
##
## Attache Paket: 'scales'
## Das folgende Objekt ist maskiert 'package:purrr':
##
## discard
## Das folgende Objekt ist maskiert 'package:readr':
##
## col_factor
theme_set(theme_light(base_size = 12, base_family = "Poppins"))
g<- ggplot(plotdata,
aes(x = category,
y = pct,
fill = Features)) +
facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "dodge") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 3.5,
position = position_dodge(width = 1),
vjust = -5.5) +
scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
#scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Explanatory preference",
x = "Explanatory preference")+
geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct,
# ymin = plotdata$ci_low,
# ymax = plotdata$ci_up,
# colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "none", axis.title = element_text(size = 20), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13),strip.text.x = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
#ggsave("selections_between.pdf",width=6,height=5)
ggsave("categories_main.svg",width=7,height=5)
ggsave("categories_main.pdf",width=7,height=5)
Tests against chance:
(t2 <- binom.test(plotdata$n[2], 40))
##
## Exact binomial test
##
## data: plotdata$n[2] and 40
## number of successes = 32, number of trials = 40, p-value = 0.0001822
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
## 0.6435220 0.9094776
## sample estimates:
## probability of success
## 0.8
(t4 <- binom.test(plotdata$n[4], 40))
##
## Exact binomial test
##
## data: plotdata$n[4] and 40
## number of successes = 37, number of trials = 40, p-value = 1.947e-08
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
## 0.7961353 0.9842578
## sample estimates:
## probability of success
## 0.925
(ci_low <- c(t2$conf.int[1], t4$conf.int[1]))
## [1] 0.6435220 0.7961353
(ci_up <- c(t2$conf.int[2], t4$conf.int[2]))
## [1] 0.9094776 0.9842578
(p_values <- c(t2$p.value, t4$p.value))
## [1] 1.821658e-04 1.946501e-08
Test if proportion of narrow latent scope biases is different between conditions:
prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "two.sided", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 2.635, df = 1, p-value = 0.1045
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.27341956 0.02341956
## sample estimates:
## prop 1 prop 2
## 0.800 0.925
The additional between-subjects condition was one in which the unobserved feature wasn’t mentioned at all in the test query. In what follows, the graphs from above will be recreated and the additional condition will be included.
counts <- tdata_long %>%
group_by(Knowledge, Features, rating_rec) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
## `summarise()` has grouped output by 'Knowledge', 'Features'. You can override
## using the `.groups` argument.
counts
## # A tibble: 8 × 6
## # Groups: Knowledge, Features [4]
## Knowledge Features rating_rec n pct lbl
## <fct> <fct> <fct> <int> <dbl> <chr>
## 1 'you don't know' "feature diagnosability: \nboth… broad 8 0.2 20%
## 2 'you don't know' "feature diagnosability: \nboth… narrow 32 0.8 80%
## 3 'you don't know' "feature diagnosability: \nlate… broad 3 0.075 8%
## 4 'you don't know' "feature diagnosability: \nlate… narrow 37 0.925 92%
## 5 no information "feature diagnosability: \nboth… broad 5 0.125 12%
## 6 no information "feature diagnosability: \nboth… narrow 35 0.875 88%
## 7 no information "feature diagnosability: \nlate… broad 7 0.175 18%
## 8 no information "feature diagnosability: \nlate… narrow 33 0.825 82%
counts$category <- factor(counts$rating_rec, levels = c("unbiased", "narrow", "broad"), labels = c("unbiased", "narrow l.s.", "broad l.s."))
counts
## # A tibble: 8 × 7
## # Groups: Knowledge, Features [4]
## Knowledge Features rating_rec n pct lbl category
## <fct> <fct> <fct> <int> <dbl> <chr> <fct>
## 1 'you don't know' "feature diagnosabilit… broad 8 0.2 20% broad l…
## 2 'you don't know' "feature diagnosabilit… narrow 32 0.8 80% narrow …
## 3 'you don't know' "feature diagnosabilit… broad 3 0.075 8% broad l…
## 4 'you don't know' "feature diagnosabilit… narrow 37 0.925 92% narrow …
## 5 no information "feature diagnosabilit… broad 5 0.125 12% broad l…
## 6 no information "feature diagnosabilit… narrow 35 0.875 88% narrow …
## 7 no information "feature diagnosabilit… broad 7 0.175 18% broad l…
## 8 no information "feature diagnosabilit… narrow 33 0.825 82% narrow …
Get proportion CIs for different categories in each diagnosability condition:
library(PropCIs)
library(DescTools)
library(purrr)
counts_dontknow_spear <- subset(counts, Knowledge == "'you don't know'" & Features == "feature diagnosability: \nboth similar")
counts_dontknow_feather <- subset(counts, Knowledge == "'you don't know'" & Features == "feature diagnosability: \nlatent feature harder")
counts_noinf_spear <- subset(counts, Knowledge == "no information" & Features == "feature diagnosability: \nboth similar")
counts_noinf_feather <- subset(counts, Knowledge == "no information" & Features == "feature diagnosability: \nlatent feature harder")
(MultinomCI(counts_dontknow_spear$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_1)
## est lwr.ci upr.ci
## [1,] 0.2 0.1 0.3272464
## [2,] 0.8 0.7 0.9272464
(MultinomCI(counts_dontknow_feather$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_2)
## est lwr.ci upr.ci
## [1,] 0.075 0.025 0.159792
## [2,] 0.925 0.875 1.000000
(MultinomCI(counts_noinf_spear$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_3)
## est lwr.ci upr.ci
## [1,] 0.125 0.05 0.2293201
## [2,] 0.875 0.80 0.9793201
(MultinomCI(counts_noinf_feather$n,
conf.level=0.95,
method="sisonglaz") -> selection_ci_4)
## est lwr.ci upr.ci
## [1,] 0.175 0.075 0.2868133
## [2,] 0.825 0.725 0.9368133
ci_low <- c(selection_ci_1[,2], selection_ci_2[,2], selection_ci_3[,2], selection_ci_4[,2])
ci_up <- c(selection_ci_1[,3], selection_ci_2[,3], selection_ci_3[,3], selection_ci_4[,3])
plotdata <- counts
plotdata$ci_low <- ci_low
plotdata$ci_up <- ci_up
Plot:
library(scales)
theme_set(theme_light(base_size = 12, base_family = "Poppins"))
g<- ggplot(plotdata,
aes(x = category,
y = pct,
fill = Features)) +
facet_grid(Knowledge ~ Features)+
geom_bar(stat = "identity",
position = "dodge") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#coord_cartesian(xlim =c(1, 7), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 3.5,
position = position_dodge(width = 1),
vjust = -5.5) +
scale_fill_manual(name = "Strength",values=c("#66c2a5", "#e78ac3", "#8da0cb", "#a6d854"))+
#scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Explanatory preference",
x = "Explanatory preference")+
geom_pointrange(ymin = ci_low, ymax = ci_up, position = position_dodge(width = 0.89), shape = 22, size = 0.3)+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata$Transformation, y = plotdata$pct,
# ymin = plotdata$ci_low,
# ymax = plotdata$ci_up,
# colour = "black", size = 0.8, shape = 22, fill = Transformation, fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "none", axis.title = element_text(size = 20), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13),strip.text.x = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g