explan_data <- read_delim("subject_explanations.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
# remove subject with duplicate entry
explan_data <- explan_data %>% distinct(subj_code, .keep_all = TRUE)
# Note that two rows appear twice, and the duplicates must be deleted
Note: Subjects’ explanations were coded in the last columns (the column names describe the coded criterion). 1 means that the coding criterion is (clearly) met, 0 that it isn’t.
# turn explanation coding values into factors
explan_data <- tdata_long
explan_data$`Correct explanation` <- as.factor(explan_data$`Correct explanation`)
explan_data$`Unclear explanation` <- as.factor(explan_data$`Unclear explanation`)
explan_data$`Inferred absence because of low feature base rate` <- as.factor(explan_data$`Inferred absence because of low feature base rate`)
explan_data$`Inferred absence of latent feature due to visibility` <- as.factor(explan_data$`Inferred absence of latent feature due to visibility`)
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(`Correct explanation`) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
plotdata_between
## # A tibble: 2 × 4
## `Correct explanation` n pct lbl
## <fct> <int> <dbl> <chr>
## 1 0 62 0.31 31%
## 2 1 138 0.69 69%
plotdata_sub <- subset(plotdata_between, `Correct explanation` == 1)
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = "",
y = pct,
fill = `Correct explanation`)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Correct Explanation",
x = "Features")+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g
# create a summary dataset that also contains the percentages
plotdata_between <- explan_data %>%
group_by(`Inferred absence because of low feature base rate`) %>%
summarize(n = n()) %>%
mutate(pct = n/sum(n),
lbl = scales::percent(pct))
plotdata_between
## # A tibble: 2 × 4
## `Inferred absence because of low feature base rate` n pct lbl
## <fct> <int> <dbl> <chr>
## 1 0 191 0.955 96%
## 2 1 9 0.045 4%
plotdata_sub <- subset(plotdata_between, `Inferred absence because of low feature base rate` == 1)
plotdata <- plotdata_between
g<- ggplot(plotdata,
aes(x = "",
y = pct,
fill = `Inferred absence because of low feature base rate`)) +
#facet_grid( ~ Features)+
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(limits = seq(0, 2),
breaks = seq(0, 1, .25),
expand = c(0,0),
label = percent) +
#scale_x_discrete(labels = c("not \nmentioned", "'you don't \nknow'"))+
coord_cartesian(xlim =c(1, 2), ylim = c(0, 1.1))+
#coord_cartesian(clip = "off")+
geom_text(aes(label = lbl),
size = 4.5,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Pastel1") +
labs(y = "Percentage",
fill = "Correct Explanation",
x = "Features")+
theme(legend.position = "top", axis.title = element_text(size = 15), axis.text = element_text(size = 13, color = "black"),
legend.text = element_text(size = 13),legend.title = element_text(size = 13))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g