This allowed me to generate this graph which shows immunization coverage across the world for nine different vaccines. The various dates that monitoring starts shows that new immunization programs are being rolled out on a regular basis - good to see.
Below is the code for downloading the data and making the graph...
If you would rather just make the graph with some cleaner data, the data from Aug 20, 2018 is available on github and can be downloaded using the read_csv() code shown about half way down the script.
## START
## download the data
library(tidyverse)
# install.packages("WHO")
library(WHO)
# check out the codes of the WHO data...
codes <- get_codes()
# get codes for immunizations
immun_codes <- codes[grepl("[Ii]mmuniz", codes$display), ]
immun_codes$label
# go through each of the 18 to find global data...
# Number 1 has global data
# download number 1
# requires internet access
immun_data <- as.tibble(get_data(immun_codes$label[1]))
# filter for global data
immun_data <- filter(immun_data, region == "(WHO) Global")
# repeat download for next 2 to 18 WHO codes
# had to do this as a loop as couldn't get it to work using lapply...
# start off with second value as first is above..
# requires internet access and patience...
for(i in 2:length(immun_codes$label)){
# download the data
data <- as.tibble(get_data(immun_codes$label[i]))
#tell you that it has downloaded...
print(paste("Dataset",immun_codes$label[i], "downloaded."))
# filter the data for Global values
data <- filter(data, region == "(WHO) Global")
# if there is some data bind_rows()
if(nrow(data)>1){
# bind_rows() function from dplyr
immun_data <- bind_rows(immun_data, data)
}else{
# if not just tell us....
print("No global data in this set")
}
}
# reduce columns using select() function
immun_data <- select(immun_data, gho, region, year, value )
# to avoid having to download every time... save a local copy
file_name <- paste0("global_immun_data", Sys.Date())
write_csv(immun_data, file_name)
## ----read_back if you have saved to continue from here
# immun_data <- read_csv(file_name)
# read in data from github using read_csv() function
# immun_data <- read_csv("https://raw.githubusercontent.com/brennanpincardiff/RforBiochemists/master/data/global_immun_data2018-08-20")
# Let's make our plot...
plot <- ggplot(immun_data, aes(x = year, y = value,
colour = gho)) +
geom_line(size = 1)+
theme(legend.position="none")
plot
# separate the plots with facet wrap
plotf <- plot + facet_wrap(~gho)
plotf
## The individual graph titles are difficult to read
# Shorten them by removing text using gsub() = global substitution
immun_data$gho_s <- gsub("immunization coverage among 1-year-olds",
"", immun_data$gho)
immun_data$gho_s <- gsub("immunization coverage by the nationally recommended age",
"", immun_data$gho_s)
# make the plot again
plot <- ggplot(immun_data, aes(x = year, y = value,
colour = gho_s)) +
geom_line(size = 1) +
theme(legend.position="none")
# separate plots with facet_wrap
plotf <- plot + facet_wrap(~gho_s)
plotf <- plotf + theme_bw() + theme(legend.position="none")
plotf
# improve plot with y limits, titles & source
source <- paste("Source: World Health Organisation, accessed:", Sys.Date())
plotf <- plotf + ylim(0,100)
plotf <- plotf + labs(x = NULL, y = "Immunization Rate",
title = "Global immunization rates",
subtitle = source)
plotf
## END
No comments:
Post a Comment
Comments and suggestions are welcome.