Code
library(tidyverse)
library(plotly)
library(crosstalk)
library(here)
Jon Minton
November 30, 2023
Below is an example of creating a plotly chart with an interactive slider using crosstalk.
By default, the plot shows the proportion of datazones in a local authority that are in the 15% most deprived datazones in Scotland. (Using the 2020 SIMD).
The slider allows different thresholds than the 15% default to be selected.
To see the code itself, just click on the word ‘code’ to open up the block’.
if(!file.exists(here("simd_data.xlsx"))){
download.file(
url = "https://www.gov.scot/binaries/content/documents/govscot/publications/statistics/2020/01/scottish-index-of-multiple-deprivation-2020-data-zone-look-up-file/documents/scottish-index-of-multiple-deprivation-data-zone-look-up/scottish-index-of-multiple-deprivation-data-zone-look-up/govscot%3Adocument/SIMD%2B2020v2%2B-%2Bdatazone%2Blookup.xlsx",
destfile = here("simd_data.xlsx"),
mode = "wb"
)
}
dta <- openxlsx::readWorkbook(here("simd_data.xlsx"), sheet = "SIMD 2020v2 DZ lookup data")
The code for the figure itself is below. It’s quite a convoluted process. There’s almost certaintly neater ways of doing this. The main thing to keep in mind is all the figures exist; just only one is visible at a time.
# So let's construct a new aval containing the different x-y tuples given the threshold selected
calc_prop_deprived <- function(q, dta){
dta %>%
group_by(HBname) %>%
summarise(prop_deprived = mean(pct_rank < q)) %>%
ungroup()
}
df_rank <-
dta %>%
select(HBname, SIMD2020v2_Rank) %>%
mutate(pct_rank = SIMD2020v2_Rank / max(SIMD2020v2_Rank))
shared_df <- tibble(
dep_quants = seq(0.05, 0.95, by = 0.05)
) %>%
mutate(derived_props = map(dep_quants, calc_prop_deprived, dta = df_rank)) %>%
unnest(derived_props) %>%
mutate(undep_quants = 1 - dep_quants)
# Now to put it in the structure, and set active for `dep_quants = 0.15`
unique_dep_quants <- unique(shared_df$dep_quants)
n_steps <- length(unique_dep_quants)
dep_vals <- list()
for (step in 1:n_steps){
tmp <-
shared_df %>%
filter(dep_quants == unique_dep_quants[step]) %>%
select(HBname, prop_deprived) %>%
mutate(HBname = reorder(HBname, prop_deprived))
dep_vals[[step]] <- list(
visible = FALSE,
name = paste0('Quantile: ', unique_dep_quants[step]),
x=tmp$prop_deprived,
y=tmp$HBname
)
}
# 15% is the third list object
dep_vals[3][[1]]$visible = TRUE
# Now visualise
# create steps and plot all traces
dep_steps <- list()
fig <- plot_ly()
for (i in c(3, 1, 2, 4:n_steps)) { # Start with 3 as this is 15% and this should determine the default HB order
fig <- add_bars(fig,x=dep_vals[i][[1]]$x, y=dep_vals[i][[1]]$y, visible = dep_vals[i][[1]]$visible,
name = dep_vals[i][[1]]$name, orientation = 'h', hoverinfo = 'x+y', color = I("gray"),
showlegend = FALSE) %>%
layout(
title = list(
text = glue::glue("Proportion of datazones in Health Boards at least this deprived")
),
xaxis = list(
title = "Proportion this deprived in Health Board",
range = list(0, 1)
),
yaxis = list(
title = "Health Board"
)
)
step <- list(args = list('visible', rep(FALSE, length(dep_vals))),
method = 'restyle')
step$args[[2]][i] = TRUE
step$label = unique_dep_quants[i]
dep_steps[[i]] = step
}
#names(dep_steps) <- unique_dep_quants
fig <- fig %>%
layout(sliders = list(list(active = 2,
currentvalue = list(prefix = "Deprivation: "),
steps = dep_steps)))
fig
As you can see, there’s still some work to do regarding formatting. But it works!
For comparison, here’s the same data used to produce a static plot
# Now to put it in the structure, and set active for `dep_quants = 0.15`
df_15pc <- shared_df |>
filter(between(dep_quants, 0.149, 0.151)) |>
select(-dep_quants, -undep_quants)
df_15pc |>
mutate(pct_deprived = 100 * prop_deprived) |>
ggplot(aes(y= pct_deprived, x = fct_reorder(HBname, pct_deprived))) +
geom_bar(stat = "identity") +
geom_text(
aes(
label = ifelse(df_15pc$prop_deprived > 0, sprintf("%.1f", pct_deprived), "")
),
color = "white",
hjust = 1,
nudge_y = -0.5
) +
coord_flip() +
labs(
x = "Health Board",
y = "Percent of datazones in 15% most deprived proportion of Scotland",
title = "Percent of datazones in Health Board in 15% most deprived areas of Scotland",
subtitle = "SIMD 2020"
) +
geom_hline(yintercept = 0)