library(readr)
explan_data <- read_delim("subjects_explanations_analysis_main.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
## Rows: 80 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (5): subj_code, explanation, rating_rec, Features, Knowledge
## dbl (7): different explanation, no explanation or unclear, misunderstood sce...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(explan_data)[3] <- "different_explanation" # subjects who gave an explanation that seemed to have nothing to do with visibility
colnames(explan_data)[4] <- "no_explanation" # subjects who either gave no explanation for what they would do, or their explanation was unclear
colnames(explan_data)[5] <- "misunderstood_scenario" # subjects who wrote something that indicates that they didn't understand the scenario correctly
colnames(explan_data)[6] <- "equally_likely" # subjects who wrote explanations saying that 50:50 would have been their preferred answer
colnames(explan_data)[7] <- "absence_because_visible" # subjects who inferred the absence of the latent effect because it should have been visible
colnames(explan_data)[8] <- "absence_because_description" # subjects who inferred the absence of the latent effect for any other reason
colnames(explan_data)[9] <- "inferred_absence" # overall number of subjects who inferred the absence of the latent feature
# "nm" below means "not meet"; expressing the the criterion is not fulfilled
explan_data$different_explanation[explan_data$different_explanation == 1] <- "different_explanation"
explan_data$different_explanation[is.na(explan_data$different_explanation)] <- "nm"
explan_data$different_explanation <- factor(explan_data$different_explanation, levels = c("nm", "different_explanation"),
labels = c("nm", "different explanation"))
explan_data$no_explanation[explan_data$no_explanation == 1] <- "no_explan"
explan_data$no_explanation[is.na(explan_data$no_explanation)] <- "nm"
explan_data$no_explanation <- factor(explan_data$no_explanation, levels = c("nm", "no_explan"),
labels = c("nm", "no explanation or unclear"))
explan_data$misunderstood_scenario[explan_data$misunderstood_scenario == 1] <- "misunderstood"
explan_data$misunderstood_scenario[is.na(explan_data$misunderstood_scenario)] <- "nm"
explan_data$misunderstood_scenario <- factor(explan_data$misunderstood_scenario, levels = c("nm", "misunderstood"),
labels = c("nm", "misunderstood scenario"))
explan_data$equally_likely[explan_data$equally_likely == 1] <- "equally_likely"
explan_data$equally_likely[is.na(explan_data$equally_likely)] <- "nm"
explan_data$equally_likely <- factor(explan_data$equally_likely, levels = c("nm", "equally_likely"),
labels = c("nm", "equally likely"))
explan_data$absence_because_visible[explan_data$absence_because_visible == 1] <- "yes"
explan_data$absence_because_visible[is.na(explan_data$absence_because_visible)] <- "nm"
explan_data$absence_because_visible <- factor(explan_data$absence_because_visible, levels = c("nm", "yes"),
labels = c("nm", "absence because it should be visible"))
explan_data$absence_because_description[explan_data$absence_because_description == 1] <- "yes"
explan_data$absence_because_description[is.na(explan_data$absence_because_description)] <- "nm"
explan_data$absence_because_description <- factor(explan_data$absence_because_description, levels = c("nm", "yes"),
labels = c("nm", "absence given the description"))
explan_data$inferred_absence[explan_data$inferred_absence == 1] <- "yes"
explan_data$inferred_absence[is.na(explan_data$inferred_absence)] <- "nm"
explan_data$inferred_absence <- factor(explan_data$inferred_absence, levels = c("nm", "yes"),
labels = c("nm", "inferred absence"))
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(Features, no_explanation) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.
plotdata_between
## # A tibble: 4 × 5
## # Groups: Features [2]
## Features no_explanation n pct lbl
## <chr> <fct> <int> <dbl> <chr>
## 1 featherTooth nm 23 0.575 57%
## 2 featherTooth no explanation or unclear 17 0.425 42%
## 3 spearNet nm 29 0.725 72%
## 4 spearNet no explanation or unclear 11 0.275 28%
plotdata_sub <- subset(plotdata_between, no_explanation == "no explanation or unclear")
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = Features,
y = pct,
fill = no_explanation)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Inferred state",
x = "Features",
title = "Subjects' inference about latent feature (overall)")+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct,
# ymin = 1 - plotdata_sub$pct - plotdata_sub$CI,
# ymax = 1- plotdata_sub$pct + plotdata_sub$CI,
# colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
#ggsave("selections.pdf",width=6,height=5)
#ggsave("absence_overall.svg",width=6,height=5)
prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "two.sided", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 1.978, df = 1, p-value = 0.1596
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.05643688 0.35643688
## sample estimates:
## prop 1 prop 2
## 0.425 0.275
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(Features, equally_likely) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.
plotdata_between
## # A tibble: 4 × 5
## # Groups: Features [2]
## Features equally_likely n pct lbl
## <chr> <fct> <int> <dbl> <chr>
## 1 featherTooth nm 24 0.6 60%
## 2 featherTooth equally likely 16 0.4 40%
## 3 spearNet nm 27 0.675 68%
## 4 spearNet equally likely 13 0.325 32%
plotdata_sub <- subset(plotdata_between, equally_likely == "equally likely")
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = Features,
y = pct,
fill = equally_likely)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1", labels = c("nm", "'equally likely option'")) +
labs(y = "Percentage",
fill = "Categorization of \nsubjects' explanations",
x = "Features")+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct,
# ymin = 1 - plotdata_sub$pct - plotdata_sub$CI,
# ymax = 1- plotdata_sub$pct + plotdata_sub$CI,
# colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
#ggsave("selections.pdf",width=6,height=5)
#ggsave("preferred_answer.svg",width=6,height=5)
prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "greater", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 0.48682, df = 1, p-value = 0.2427
## alternative hypothesis: greater
## 95 percent confidence interval:
## -0.101271 1.000000
## sample estimates:
## prop 1 prop 2
## 0.400 0.325
Because of its visibility:
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(Features, Knowledge, absence_because_visible) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
## `summarise()` has grouped output by 'Features', 'Knowledge'. You can override
## using the `.groups` argument.
plotdata_between
## # A tibble: 4 × 6
## # Groups: Features, Knowledge [2]
## Features Knowledge absence_because_visible n pct lbl
## <chr> <chr> <fct> <int> <dbl> <chr>
## 1 featherTooth notKnow nm 38 0.95 95%
## 2 featherTooth notKnow absence because it should be visible 2 0.05 5%
## 3 spearNet notKnow nm 27 0.675 68%
## 4 spearNet notKnow absence because it should be visible 13 0.325 32%
plotdata_sub <- subset(plotdata_between, absence_because_visible == "absence because it should be visible")
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = Features,
y = pct,
fill = absence_because_visible)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Inferred state",
x = "Features",
title = "Subjects' inference about latent feature")+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct,
# ymin = 1 - plotdata_sub$pct - plotdata_sub$CI,
# ymax = 1- plotdata_sub$pct + plotdata_sub$CI,
# colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
#ggsave("selections.pdf",width=6,height=5)
#ggsave("absence_because_visible.svg",width=6,height=5)
prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "less", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 9.9282, df = 1, p-value = 0.0008138
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.0000000 -0.1406458
## sample estimates:
## prop 1 prop 2
## 0.050 0.325
Inferred absence for any other reason:
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(Features, absence_because_description) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.
plotdata_between
## # A tibble: 3 × 5
## # Groups: Features [2]
## Features absence_because_description n pct lbl
## <chr> <fct> <int> <dbl> <chr>
## 1 featherTooth nm 40 1 100%
## 2 spearNet nm 36 0.9 90%
## 3 spearNet absence given the description 4 0.1 10%
plotdata_sub <- subset(plotdata_between, absence_because_description == "absence given the description")
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = Features,
y = pct,
fill = absence_because_description)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Inferred state",
x = "Features",
title = "Subjects' inference about latent feature 2")+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct,
# ymin = 1 - plotdata_sub$pct - plotdata_sub$CI,
# ymax = 1- plotdata_sub$pct + plotdata_sub$CI,
# colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
#ggsave("selections.pdf",width=6,height=5)
#ggsave("absence_because_description.svg",width=6,height=5)
prop.test(x = c(0, plotdata$n[3]), n = c(40, 40), alternative = "less", correct = F)
## Warning in prop.test(x = c(0, plotdata$n[3]), n = c(40, 40), alternative =
## "less", : Chi-Quadrat-Approximation kann inkorrekt sein
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(0, plotdata$n[3]) out of c(40, 40)
## X-squared = 4.2105, df = 1, p-value = 0.02009
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.00000000 -0.02197774
## sample estimates:
## prop 1 prop 2
## 0.0 0.1
Inferred Absence of latent feature overall:
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(Features, inferred_absence) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.
plotdata_between
## # A tibble: 4 × 5
## # Groups: Features [2]
## Features inferred_absence n pct lbl
## <chr> <fct> <int> <dbl> <chr>
## 1 featherTooth nm 37 0.925 92%
## 2 featherTooth inferred absence 3 0.075 8%
## 3 spearNet nm 24 0.6 60%
## 4 spearNet inferred absence 16 0.4 40%
plotdata_sub <- subset(plotdata_between, inferred_absence == "inferred absence")
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = Features,
y = pct,
fill = inferred_absence)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Inferred state",
x = "Features",
title = "Subjects' inference about latent feature (overall)")+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct,
# ymin = 1 - plotdata_sub$pct - plotdata_sub$CI,
# ymax = 1- plotdata_sub$pct + plotdata_sub$CI,
# colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
#ggsave("selections.pdf",width=6,height=5)
#ggsave("absence_overall.svg",width=6,height=5)
prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "less", correct = F)
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 11.665, df = 1, p-value = 0.0003184
## alternative hypothesis: less
## 95 percent confidence interval:
## -1.0000000 -0.1803429
## sample estimates:
## prop 1 prop 2
## 0.075 0.400
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(Features, different_explanation) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
## `summarise()` has grouped output by 'Features'. You can override using the
## `.groups` argument.
plotdata_between
## # A tibble: 4 × 5
## # Groups: Features [2]
## Features different_explanation n pct lbl
## <chr> <fct> <int> <dbl> <chr>
## 1 featherTooth nm 34 0.85 85%
## 2 featherTooth different explanation 6 0.15 15%
## 3 spearNet nm 39 0.975 98%
## 4 spearNet different explanation 1 0.025 2%
plotdata_sub <- subset(plotdata_between, different_explanation == "different_explanation")
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = Features,
y = pct,
fill = different_explanation)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Inferred state",
x = "Features",
title = "Subjects' inference about latent feature (overall)")+
#annotate(geom = "hline",yintercept = 0.5, y = 0.5, color = "black", size = 1, linetype='dotted')+
#annotate("pointrange", x = plotdata_sub$Transformation, y = 1 - plotdata_sub$pct,
# ymin = 1 - plotdata_sub$pct - plotdata_sub$CI,
# ymax = 1- plotdata_sub$pct + plotdata_sub$CI,
# colour = "black", size = 0.8, shape = 22, fill = "lightblue", fatten = 1)+
#annotate("text", x = pvalues_x, y = Inf, label = pvalues, size = 4, vjust = 1.8)+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
#ggsave("selections.pdf",width=6,height=5)
#ggsave("absence_overall.svg",width=6,height=5)
prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), alternative = "two.sided", correct = F)
## Warning in prop.test(x = c(plotdata$n[2], plotdata$n[4]), n = c(40, 40), :
## Chi-Quadrat-Approximation kann inkorrekt sein
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(plotdata$n[2], plotdata$n[4]) out of c(40, 40)
## X-squared = 3.9139, df = 1, p-value = 0.04789
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.004229395 0.245770605
## sample estimates:
## prop 1 prop 2
## 0.150 0.025