Tuesday 10 March 2020

Exploring some corona virus time courses...

Updated 12 Mar 2020 - updating names including "United Kingdom" and adding system date to plot.

It's hard to avoid and the instinct is to get stressed and apathetic in alternating cycles.
I found a interesting graph on twitter and decided to explore some of the data.

There is an online interactive dashboard from John Hopkins University and they have shared the data on Github.

So here is a script to pull this data into R and make a few graphs.

There also seems to be two R packages (one by Rami Krispin on CRAN) and one by GuangchuangYu on Github but I haven't explored them yet...

I explored the data to compare the number of COVID 19 cases in the UK with France and Italy. Here is the graph:



Note the y-axis is a log scale not a linear one.


START
# exploring corona virus data...
library(tidyverse)
library(lubridate)
library(ggthemes)
# import the data
url <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"

data <- read_csv(url)

# all the cases in the world
# plot cases in the world
data %>%
    select(-`Province/State`, - Lat, -Long) %>%
    pivot_longer(-`Country/Region`, names_to = "date", values_to = "cases")%>%
    group_by(mdy(date)) %>%
    summarise(total_cases = sum(cases)) %>%
    ggplot(aes(x=`mdy(date)`, y=total_cases)) +
    geom_point() +
    scale_y_continuous(trans='log10') +
    labs(x = "Date",
    y = "Number of COVID-19 cases", 
    title = "COVID-19 Cases worldwide",
    subtitle = "source: https://github.com/CSSEGISandData/COVID-19") + 
    theme_economist_white() -> p1

p1




# plot cases in China
data %>%
    select(-`Province/State`, - Lat, -Long) %>%
    filter(`Country/Region` == "China")%>% 
    pivot_longer(-`Country/Region`, names_to = "date", values_to = "cases")%>%
    group_by(mdy(date)) %>%
    summarise(total_cases = sum(cases)) -> data2

p2 <- p1 %+% data2
p2 + ggtitle("COVID-19 cases in China")





# plot cases in Hubei - capital is Wuhan
data %>%
    select(-`Country/Region`, - Lat, -Long) %>%
    filter(`Province/State` == "Hubei") %>% 
    pivot_longer(-`Province/State`, names_to = "date", values_to = "cases")%>%
    group_by(mdy(date)) %>%
    summarise(total_cases = sum(cases)) -> data3

p2 <- p1 %+% data3
p2 + ggtitle("COVID-19 cases Hubei Province")





# plot cases outside of China
data %>%
    select(-`Province/State`, - Lat, -Long) %>%
    # exclude Mainland China
    filter(!`Country/Region` == "China") %>%
    pivot_longer(-`Country/Region`, names_to = "date", values_to = "cases")%>%
    group_by(mdy(date)) %>%
    summarise(total_cases = sum(cases)) -> world_less_china
p_wlc <- p1 %+% world_less_china
p_wlc + ggtitle("COVID-19 in world excluding China")
# fewer cases but increasing...


# let's plot some other countries
# make a function
plot_country <- function(country){
    data %>%
        select(-`Province/State`, - Lat, -Long) %>%
        filter(`Country/Region` == country) %>%
        pivot_longer(-`Country/Region`, names_to = "date", values_to = "cases")%>%
        group_by(mdy(date)) %>%
        summarise(total_cases = sum(cases)) %>%
        ggplot(aes(x=`mdy(date)`, y=total_cases)) +
        geom_point() +
        scale_y_continuous(trans='log10') +
        labs(x = "Date",
            y = "Number of COVID-19 cases", 
            title = paste("Cases in",country),
            subtitle = "source: https://github.com/CSSEGISandData/COVID-19") + 
        theme_economist_white()
    
}


plot_country("Korea, South")
# something interesting happened here...


plot_country("Italy")





plot_country("Iran")


plot_country("United Kingdom")




# compare multiple countries
three_countries <- c("United Kingdom", "France", "Italy")
data %>%
    select(-`Province/State`, - Lat, -Long) %>%
    filter(`Country/Region` %in% three_countries) %>% 
    pivot_longer(-`Country/Region`, names_to = "date", values_to = "cases") %>% 
    filter(cases>0)  %>%
    ggplot(aes(x=mdy(date), y=cases, color = `Country/Region`)) +
    geom_point() +
    scale_y_continuous(trans='log10') +
    labs(x = "Date",
        y = "Number of COVID-19 cases", 
        title = paste("Cases in UK, France & Italy"),
        subtitle = paste("source: https://github.com/CSSEGISandData/COVID-19. Updated",
            Sys.Date())) + 
    theme_economist_white()





END