As part of the Hacking Health Covid-19, the SKEMA Global Lab in AI provided to SKEMA’ students a fully developped data science environment to realize their project. See [here].
For this specific module, this team used these following courses:
In the context of the global health crisis related to Covid-19, the SKEMA Global Lab in AI gave us the opportunity to study the impacts of this crisis on our world. First of all, we would like to thank the entire SKEMA Global Lab in AI team, who provided us with all the knowledge to master the different tools (and in particular Rstudio) essential to study the data on a large scale. We would also like to thank Mr Rami KRISPIN, who made his API available, without which we would not have been able to complete this project.
Within the framework of our study, our team decided to first study the spread of the virus on a global scale as well as its financial consequences on the markets. In a second step, we focused on the spread of the virus within Europe, the policies implemented and their results to stop the spread of the virus. Finally, we decided to deepen the analysis on the impact of the virus in France, especially on the different industries but also on the feelings of the French people through a sentiment analysis using the Twitter API.
library(dplyr)
library(tidyr)
library(leaflet)
library(leaflet.extras)
library(leaftime)
library(purrr)
library(leafpop)
library(ggplot2)
library(plotly)
library(kableExtra)
library(readr)
library(readxl)
library(dplyr)
library(rAmCharts)
library(lubridate)
library(gganimate)
library(rtweet)
library(tidytext)
library(lubridate)
library(rdbnomics)
CoronavirusWorld <- utils::read.csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus-csv/master/coronavirus_dataset.csv")
CoronavirusWorld[CoronavirusWorld == "" ] <- NA
CoronavirusWorld$name <- ifelse(is.na(CoronavirusWorld$Province.State) == TRUE,
paste0(CoronavirusWorld$Country.Region), paste0(CoronavirusWorld$Province.State))
CoronavirusWorld$date <- as.Date(CoronavirusWorld$date)
CoronavirusWorld2 <- spread(CoronavirusWorld, "type", "cases")
WorldSpreadCovid <- CoronavirusWorld2 %>% group_by(name) %>% mutate(CumulConfirmes=cumsum(confirmed)) %>% mutate(CumulMorts=cumsum(death)) %>% mutate(CumulSoignes=cumsum(recovered))
WorldSpreadCovid$category1 <- ifelse(
WorldSpreadCovid$CumulConfirmes >= -1 &
WorldSpreadCovid$CumulConfirmes <= 0,
0,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 1 &
WorldSpreadCovid$CumulConfirmes <= 100,
1,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 101 &
WorldSpreadCovid$CumulConfirmes <= 1000,
2,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 1001 &
WorldSpreadCovid$CumulConfirmes <= 3000,
4,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 3001 &
WorldSpreadCovid$CumulConfirmes <= 5000,
5,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 5001 &
WorldSpreadCovid$CumulConfirmes <= 10000,
8,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 10001 &
WorldSpreadCovid$CumulConfirmes <= 20000,
10,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 20001 &
WorldSpreadCovid$CumulConfirmes <= 50000,
15,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 50001 &
WorldSpreadCovid$CumulConfirmes <= 100000,
20,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 100001 &
WorldSpreadCovid$CumulConfirmes <= 200000,
25,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 200001 &
WorldSpreadCovid$CumulConfirmes <= Inf,
30,
35
)
)
)
)
)
)
)
)
)
)
)
WorldSpreadCovid$category2 <- ifelse(
WorldSpreadCovid$CumulMorts >= -1 &
WorldSpreadCovid$CumulMorts <= 0,
0,
ifelse(
WorldSpreadCovid$CumulMorts >= 1 &
WorldSpreadCovid$CumulMorts <= 100,
1,
ifelse(
WorldSpreadCovid$CumulMorts >= 101 &
WorldSpreadCovid$CumulMorts <= 1000,
2,
ifelse(
WorldSpreadCovid$CumulMorts >= 1001 &
WorldSpreadCovid$CumulMorts <= 3000,
4,
ifelse(
WorldSpreadCovid$CumulMorts >= 3001 &
WorldSpreadCovid$CumulMorts <= 5000,
5,
ifelse(
WorldSpreadCovid$CumulMorts >= 5001 &
WorldSpreadCovid$CumulMorts <= 10000,
8,
ifelse(
WorldSpreadCovid$CumulMorts >= 10001 &
WorldSpreadCovid$CumulMorts <= 20000,
10,
ifelse(
WorldSpreadCovid$CumulMorts >= 20001 &
WorldSpreadCovid$CumulMorts <= 50000,
15,
ifelse(
WorldSpreadCovid$CumulMorts >= 50001 &
WorldSpreadCovid$CumulMorts <= 100000,
20,
ifelse(
WorldSpreadCovid$CumulMorts >= 100001 &
WorldSpreadCovid$CumulMorts <= 200000,
25,
ifelse(
WorldSpreadCovid$CumulMorts >= 200001 &
WorldSpreadCovid$CumulMorts <= Inf,
30,
35
)
)
)
)
)
)
)
)
)
)
)
WorldSpreadCovid$category3 <- ifelse(
WorldSpreadCovid$CumulSoignes >= -1 &
WorldSpreadCovid$CumulSoignes <= 0,
0,
ifelse(
WorldSpreadCovid$CumulSoignes >= 1 &
WorldSpreadCovid$CumulSoignes <= 100,
1,
ifelse(
WorldSpreadCovid$CumulSoignes >= 101 &
WorldSpreadCovid$CumulSoignes <= 1000,
2,
ifelse(
WorldSpreadCovid$CumulSoignes >= 1001 &
WorldSpreadCovid$CumulSoignes <= 3000,
4,
ifelse(
WorldSpreadCovid$CumulSoignes >= 3001 &
WorldSpreadCovid$CumulSoignes <= 5000,
5,
ifelse(
WorldSpreadCovid$CumulSoignes >= 5001 &
WorldSpreadCovid$CumulSoignes <= 10000,
8,
ifelse(
WorldSpreadCovid$CumulSoignes >= 10001 &
WorldSpreadCovid$CumulSoignes <= 20000,
10,
ifelse(
WorldSpreadCovid$CumulSoignes >= 20001 &
WorldSpreadCovid$CumulSoignes <= 50000,
15,
ifelse(
WorldSpreadCovid$CumulSoignes >= 50001 &
WorldSpreadCovid$CumulSoignes <= 100000,
20,
ifelse(
WorldSpreadCovid$CumulSoignes >= 100001 &
WorldSpreadCovid$CumulSoignes <= 200000,
25,
ifelse(
WorldSpreadCovid$CumulSoignes >= 200001 &
WorldSpreadCovid$CumulSoignes <= Inf,
30,
35
)
)
)
)
)
)
)
)
)
)
)
WorldMapCovid <- filter(WorldSpreadCovid, date == max(date))
content <- paste("<strong>", WorldMapCovid$name, "</strong>", "<br/>", "<strong>", WorldMapCovid$CumulConfirmes, "Confirmed", "</strong>", "<br/>", WorldMapCovid$CumulMorts, "Death", "<br/>", WorldMapCovid$CumulSoignes, "Recovered")
content2 <- paste("<strong>", WorldMapCovid$name, "</strong>", "<br/>", WorldMapCovid$CumulConfirmes, "Confirmed", "<br/>", "<strong>", WorldMapCovid$CumulMorts, "Death", "</strong>", "<br/>", WorldMapCovid$CumulSoignes, "Recovered")
content3 <- paste("<strong>", WorldMapCovid$name, "</strong>", "<br/>", WorldMapCovid$CumulConfirmes, "Confirmed", "<br/>", WorldMapCovid$CumulMorts, "Death", "<br/>", "<strong>", WorldMapCovid$CumulSoignes, "Recovered", "</strong>")
leaflet(WorldMapCovid) %>%
addFullscreenControl() %>%
addProviderTiles(providers$CartoDB.DarkMatter) %>%
addCircleMarkers(
radius = ~ sqrt(category1) * 5,
popup = content,
color = "orange",
stroke = FALSE,
fillOpacity = 0.6,
group = "Confirmed"
) %>%
addCircleMarkers(
radius = ~ sqrt(category3) * 5,
popup = content3,
color = "green",
stroke = FALSE,
fillOpacity = 1,
group = "Recovered"
) %>%
addCircleMarkers(
radius = ~ sqrt(category2) * 5,
popup = content2,
color = "red",
stroke = FALSE,
fillOpacity = 1,
group = "Death") %>%
addLayersControl(
overlayGroups = c("Confirmed", "Recovered", "Death"),
options = layersControlOptions(collapsed = FALSE),
position = 'topright'
)
It is an interactive map generated through the library (leaflet). This map shows for each country, the number of Covid-19 cases, the number of deaths related to the epidemic and the number of people treated. This map is dated 8 May 2020 (it has evolved since then). We can easily see that the two “cradles” of the epidemic are currently in Europe and the United States. These are the two regions of the world where the virus has been circulating most actively and where there have been the most coronavirus-related deaths. However, one must be cautious with this information, especially on the number of confirmed cases, because some countries (including France) do not have the material means to test their entire population, so the number of confirmed cases is underestimated.
You can click on the different options at the top right of the map to choose the information you wish to display. You can also click on a country to see the detailed numbers for that country.
summary_df2
summary_df2$Province.State[summary_df2$Province.State == "" ] <- NA
summary_df2$name <- ifelse(is.na(summary_df2$Province.State) == TRUE,
paste0(summary_df2$Country.Region), paste0(summary_df2$Province.State))
PlotCumsumConfirmed <- summary_df2 %>% group_by(name) %>% mutate(CumulConfirmes=cumsum(confirmed))
PlotCumsumConfirmed$CumulConfirmes <- as.numeric(PlotCumsumConfirmed$CumulConfirmes)
PlotCumsumDeath <- summary_df2 %>% group_by(name) %>%
mutate(CumulMorts=cumsum(death))
PlotCumsumDeath$CumulMorts <- as.numeric(PlotCumsumDeath$CumulMorts)
Plot_formatted <- PlotCumsumConfirmed %>%
mutate(date = ymd(date)) %>%
group_by(date) %>%
mutate(rank = rank(-CumulConfirmes),
Value_rel = CumulConfirmes/CumulConfirmes[rank==1],
Value_lbl = paste0(" ",round(CumulConfirmes))) %>%
group_by(name) %>%
filter(rank <=10) %>%
ungroup()
StaticPlotCovid = ggplot(Plot_formatted, aes(rank, group = name,
fill = as.factor(name), color = as.factor(name))) +
geom_tile(aes(y = CumulConfirmes/2,
height = CumulConfirmes,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(name, " ")), vjust = 0.2, hjust = 1) +
geom_text(aes(y=CumulConfirmes, label = Value_lbl, hjust=0)) +
coord_flip(clip = "off", expand = FALSE) +
scale_y_continuous(labels = scales::comma) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position="none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.grid.major.x = element_line( size=.1, color="grey" ),
panel.grid.minor.x = element_line( size=.1, color="grey" ),
plot.title=element_text(size=25, hjust=0.5, face="bold", colour="grey", vjust=-1),
plot.subtitle=element_text(size=18, hjust=0.5, face="italic", color="grey"),
plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
plot.background=element_blank(),
plot.margin = margin(2,2, 2, 4, "cm"))
library(gganimate)
anim = StaticPlotCovid + transition_states(date, transition_length = 4, state_length = 1) +
view_follow(fixed_x = TRUE) +
labs(title = 'Évolution du nombre de cas de Covid 19 par pays : {closest_state}',
subtitle = "10 premiers pays / régions",
caption = "Nombre de cas confirmés | Data Source: Rami Krispin/coronavirus")
animate(anim, 1000, fps = 15, width = 1200, height = 1000,
renderer = gifski_renderer("gganimcovid.gif"))
We chose two financial markets for this study and decided to study the period preceding the coronavirus crisis in the countries concerned, the aim being to demonstrate the anticipation of the markets but also the problems linked to the interdependence of all developed countries when a major crisis occurs.
CAC40 <- read_xlsx("./data/PX1-3.xlsx")
CAC40$date <- as.Date(CAC40$date)
ggplot(CAC40) +
aes(x = date, y = cloture) +
geom_line(size = 0.78) +
scale_color_hue() +
labs(x = "Date", y = "Closing value", title = "Evolution of the stock market index CAC 40", subtitle = "Since January 2020") +
hrbrthemes::theme_modern_rc()
The beginning of the containment took place on March 17 in France, we can see however that the CAC 40 market had fallen by more than 500 points before the end of February. This is explained by the fall of financial markets worldwide, indeed, China, which is the main exporting country of the planet was totally paralyzed by the Covid-19 pandemic, so, by domino effect, the fall of Chinese exports made all the markets collapse. This crisis has highlighted the “limits” of globalization and the interconnection between the different countries of the world. Indeed, when a country “falls”, it risks dragging all of its partners with it. This interdependence therefore needs to be reviewed in order to avoid having to face this type of situation again in the future. A parallel can be drawn with the subprime crisis that originated in the United States and led to a global economic crisis.
SP500 <- read_xlsx("./data/SPX.xlsx")
SP500$date <- as.Date(SP500$date) #mettre la colonne date sous le bon format
SP500 <- dplyr::rename(SP500, cloture = fermeture)
ggplot(SP500) +
aes(x = date, y = cloture) +
geom_line(size = 0.78, colour = "#0c4c8a") +
labs(x = "Date", y = "Closing value", title = "Evolution of the stock market index S&P 500", subtitle = "Since February 2020") +
hrbrthemes::theme_modern_rc()
Concerning the S&P 500, the same conclusion can be drawn from this crisis, indeed, the coronavirus crisis arrived in the United States in mid-March but the financial markets started to collapse long before. This collapse is due in part to the anticipation of the markets, but more importantly to the interconnection of the various global financial markets.
We decided to study the spread of the virus in Europe, focusing in particular on France, Italy, Spain and Germany. We made this choice because the first three countries were severely affected by Covid-19 (also including the United Kingdom), then Germany was chosen for this study because this country was able to limit the impact of the epidemic on its population. We will therefore try to understand how the epidemic was contained by these different countries, the means put in place and their effectiveness.
coronavirus <- utils::read.csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus-csv/master/coronavirus_dataset.csv")
summary_df <- coronavirus %>% group_by(Country.Region, type) %>%
summarise(total_cases = sum(cases)) %>%
arrange(-total_cases)
summary_df %>% head(20)
# A tibble: 20 x 3
# Groups: Country.Region [13]
Country.Region type total_cases
<chr> <chr> <int>
1 US confirmed 1417774
2 Russia confirmed 252245
3 US recovered 246414
4 United Kingdom confirmed 234440
5 Spain confirmed 229540
6 Italy confirmed 223096
7 Brazil confirmed 203165
8 France confirmed 178994
9 Germany confirmed 174478
10 Germany recovered 150300
11 Turkey confirmed 144749
12 Spain recovered 143374
13 Italy recovered 115288
14 Iran confirmed 114533
15 Turkey recovered 104030
16 Iran recovered 90539
17 US death 85898
18 China confirmed 84029
19 India confirmed 81997
20 Peru confirmed 80604
coronavirus$date <- as.Date(coronavirus$date, "%Y-%m-%d")
coronavirus %>%
filter(date == max(date)) %>%
select(country = Country.Region, type, cases) %>%
group_by(country, type) %>%
summarise(total_cases = sum(cases)) %>%
pivot_wider(names_from = type,
values_from = total_cases) %>%
arrange(-confirmed)
# A tibble: 188 x 4
# Groups: country [188]
country confirmed death recovered
<chr> <int> <int> <int>
1 US 27368 1779 2984
2 Brazil 13028 759 1055
3 Russia 9974 93 5527
4 Peru 4298 98 827
5 India 3942 98 1569
6 United Kingdom 3455 429 11
7 Chile 2659 22 790
8 Mexico 2409 257 1485
9 Saudi Arabia 2039 10 1429
10 Iran 1808 71 1111
# … with 178 more rows
CombineCountries <- filter(coronavirus, Country.Region == "France" | Country.Region == "Spain" | Country.Region == "Germany" | Country.Region == "Italy")
CombineGraph <-ggplot(CombineCountries, aes(date, cases))
CombineGraph2 <- CombineGraph + geom_bar(stat = "identity", aes(colour = type)) +
facet_wrap(~ Country.Region) +
xlab("Date") +
ggtitle("Coronavirus cases") +
theme_linedraw()
CombineGraph3 <- CombineGraph2 + theme(axis.title.y = element_blank())
CombineGraph3 <- ggplotly(CombineGraph3)
CombineGraph3
This graph is interactive, so you can move your cursor over it to see the evolution of the epidemic for a country.
To analyse the epidemic peak in each country, we use the number of new deaths each day. Because countries do not currently have the capacity to test all of their citizens, the numbers of confirmed cases can be misleading as to when the epidemic peak occurs. For example, if tomorrow a country has the capacity to test all of its citizens in one week, the number of new cases of coronavirus detected will increase dramatically, but this does not mean that there is a new epidemic wave. Many people are infected with the virus without having symptoms, and some people with symptoms may not be tested because of a lack of testing capacity in the countries concerned.
This analysis represents the number of new confirmed cases / deaths / recovered per day.
So we can see that the epidemic started to slow down in France from mid-April, the lockdown in France was implemented on March 17th, so it took a month for the trend to be down in France. Containment seems therefore to have been effective in slowing down the epidemic, however, France is lagging behind in terms of the medical tests carried out to detect Covid-19 contamination. Thus, the number of confirmed cases in France is automatically underestimated.
As far as Italy is concerned, the lockdown was put in place on 9 March, one week before France. The epidemic peak was passed at the beginning of April, two weeks before France. This discrepancy can be partly explained by the much more active circulation of the virus in Italy at the end of February / beginning of March, so a large part of the population had been contaminated before the start of containment.
Spain began its lockdown on 15 March, two days before France and one week after Italy. Spain experienced its first epidemic peak at the beginning of April, then the trend was downward and a new peak occurred around mid-April.
Germany went into containment on 22 March, five days after France, one week after Spain and two weeks after Italy. However, it is the country that has been the most successful in containing this epidemic, because, according to the number of deaths, Germany has been much more spared by this crisis. What is the reason for this? Germany had the opportunity to test several hundred thousand citizens per week, so it was able to quickly identify those who were carrying the virus and invite them to stay at home or to be treated according to the symptoms they developed. This massive screening campaign seems to have been the key to Germany’s “success” compared to its European neighbours who were hit much harder by the epidemic. Added to this is Germany’s medical capacity and the number of intensive care beds available. Germany did not experience hospital overcrowding as France, Italy or Spain did. Germany therefore seemed much more prepared to deal with a health crisis of this magnitude.
Finally, it is important to take into account that European countries are gradually starting to come out of containment, so it is likely that the epidemic will increase again in the coming weeks/months. This will depend in part on the behaviour of the population and the respect of the rules of distancing.
DataCountries <- CombineCountries %>% group_by(Country.Region, type) %>%
summarise(total_cases = sum(cases)) %>%
arrange(Country.Region)
DataCountries2 <- spread(DataCountries, "type", "total_cases")
DataCountries2$confirmed <- as.numeric(DataCountries2$confirmed)
DataCountries2$death <- as.numeric(DataCountries2$death)
DataCountries2$recovered <- as.numeric(DataCountries2$recovered)
DataCountries2$"Ratio death/confirmed" <- DataCountries2$death/DataCountries2$confirmed*100
library(kableExtra)
kable(DataCountries2) %>%
kable_styling("striped", full_width = F) %>%
column_spec(3, bold = T) %>%
row_spec(1, bold = T, color = "white", background = "blue") %>%
row_spec(2, bold = T, color = "white", background = "red") %>%
row_spec(3, bold = T, color = "white", background = "green") %>%
row_spec(4, bold = T, color = "white", background = "orange")
Country.Region | confirmed | death | recovered | Ratio death/confirmed |
---|---|---|---|---|
France | 178994 | 27428 | 59719 | 15.323419 |
Germany | 174478 | 7884 | 150300 | 4.518621 |
Italy | 223096 | 31368 | 115288 | 14.060315 |
Spain | 229540 | 27321 | 143374 | 11.902501 |
This comparative table shows the number of Covid-19 cases as of 8 May 2020, we can see that France, Italy and Spain have very high mortality ratios (more than 10%). The average lethality of Covid-19 has been defined at around 2% by many health agencies. This very high rate can be explained by the lack of testing on the part of the 3 countries concerned, in fact, not having the possibility of testing all citizens, there are many more contaminated than those currently counted, so this automatically raises the mortality ratio for these countries. We also note that Germany is the country that is doing best in terms of the number of deaths (less than 10,000 while its European neighbours are at more than 25,000 deaths!). The indicator of people cared for is not really relevant because countries do not systematically count cured people. This can be explained by the fact that some people have weak symptoms and stay at home as long as these symptoms pass, so they are not counted in the treated persons.
summary_df2 <- spread(coronavirus, "type", "cases")
SpreadCountries <- filter(summary_df2, Country.Region == "France" & Province.State == "" | Country.Region == "Spain" | Country.Region == "Germany" | Country.Region == "Italy")
SpreadCountries1 <- SpreadCountries[,-1]
CountriesConfirmed <- SpreadCountries1 %>% group_by(Country.Region) %>% mutate(CumulativeConfirmed=cumsum(confirmed))
ggplot(CountriesConfirmed) +
aes(x = date, y = CumulativeConfirmed, colour = Country.Region) +
geom_line(size = 1L) +
scale_color_hue() +
labs(y = "Number of confirmed cases (cumulative)", title = "Number of people infected with Covid-19", caption = "Coronavirus API / Rami Krispin") +
ggthemes::theme_stata() ## graphique cumulatif des cas confirmés de Covid-19
On this graph, we can clearly see that Italy was the first country to see the epidemic spread, followed by Spain and then Germany and France. Regarding France, it is important to note that its low screening capacity may have contributed to “distorting” the results. It is therefore likely that the virus circulated at an earlier date than that shown on the graph.
CountriesDeath <- SpreadCountries1 %>% group_by(Country.Region) %>% mutate(CumulativeDeath=cumsum(death))
ggplot(CountriesDeath) +
aes(x = date, y = CumulativeDeath, colour = Country.Region) +
geom_line(size = 1L) +
scale_color_hue() +
labs(y = "Number of death cases (cumulative)", title = "Deaths related to Covid-19", caption = "Coronavirus API / Rami Krispin") +
ggthemes::theme_stata() ## graphique cumulatif des cas de décés dû au Covid-19
This graph is the most “interesting” because the figures given are true to reality. I.e. the number of deaths is relatively well documented by these countries. Indeed, although the figures related to the number of contaminated people are underestimated due to the lack of available screening tests, the figures of deaths recorded in relation to Covid-19 are reliable.
Here we can clearly see that Germany has succeeded in better protecting its population, in particular by screening on a massive scale. As regards the other three countries, we can see that the death curve “flattened out” during the month of April. It remains to be seen whether the end of containment will not lead to a return to an upward trend in these curves. If this were to be the case, we would then normally see a resumption of the sharp rise in these curves from mid-June onwards (i.e. approximately one month after the end of the lockdown of the countries concerned).
LeftJoin1 <- left_join(CountriesConfirmed, CountriesDeath, by = c("Country.Region", "Lat", "Long", "date", "confirmed", "death", "recovered"))
CountriesRecovered <- SpreadCountries1 %>% group_by(Country.Region) %>% mutate(CumulativeRecovered=cumsum(recovered))
LeftJoin2 <- left_join(LeftJoin1, CountriesRecovered, by = c("Country.Region", "Lat", "Long", "date", "confirmed", "death", "recovered"))
LeftJoin2$confirmed <- NULL
LeftJoin2$death <- NULL
LeftJoin2$recovered <- NULL
LeftJoin2$Lat <- NULL
LeftJoin2$Long <- NULL
LeftJoin2$CumulativeConfirmed <- as.numeric(LeftJoin2$CumulativeConfirmed)
LeftJoin2$CumulativeDeath <- as.numeric(LeftJoin2$CumulativeDeath)
LeftJoin2$CumulativeRecovered <- as.numeric(LeftJoin2$CumulativeRecovered)
FinalCumul <- LeftJoin2 %>% gather(Total, Value, -Country.Region, -date)
CombineCumul <-ggplot(FinalCumul, aes(date, Value))
CombineCumul2 <- CombineCumul + geom_bar(stat = "identity", aes(colour = Total)) +
facet_wrap(~ Country.Region) +
xlab("Date") +
ggtitle("Cumulative headcount by category (Covid-19)") +
theme_linedraw()
CombineCumul3 <- CombineCumul2 + theme(axis.title.y = element_blank())
CombineCumul3 <- ggplotly(CombineCumul3)
CombineCumul3 ## graph interactif sur les effectifs cumulés
This interactive graph shows all the previous data on a single graph. Thus, you can move the cursor on the histogram to follow the evolution of cumulative cases according to type (confirmed, death, recovered).
HospitalEmployment <- rdb(ids = c('Eurostat/hlth_rs_prshp1/A.HOSP.HC_HTHAB.IT', "Eurostat/hlth_rs_prshp1/A.HOSP.HC_HTHAB.FR", "Eurostat/hlth_rs_prshp1/A.HOSP.HC_HTHAB.ES", "Eurostat/hlth_rs_prshp1/A.HOSP.HC_HTHAB.DE")) %>%
filter(!is.na(value)) # Annual – Hospital employment – Head count per hundred thousand inhabitants
HospitalEmployment2017 <- filter(HospitalEmployment, original_period == 2017)
base::colnames(HospitalEmployment2017)[colnames(HospitalEmployment2017)=="Geopolitical entity (reporting)"] <- "Country"
HospitalEmployment2017$Country <- recode(HospitalEmployment2017$Country,
"Germany (until 1990 former territory of the FRG)" = "Germany")
ggplot(HospitalEmployment2017) +
aes(x = Country, y = value, fill = Country, label = value) +
geom_col() +
geom_text(aes(label=value, colour=Country),
position=position_dodge(width = 1),
vjust=-0.10) +
scale_fill_hue() +
labs(x = "Country", y = "Ratio for 100.000 inhabitants", title = "Number of employees in hospitals per 100,000 inhabitants", caption = "Source : Eurostat (2017)") +
ggthemes::theme_stata() +
theme(plot.title = element_text(hjust = 0.5))
This graph represents the number of employees in hospitals per 100,000 inhabitants, we can see that in this field, France seems to be the most advanced country. However, we have to take into account the working week in each country to get a true picture. For example, in France the working week is set at 35 hours, whereas in Germany, Italy and Spain it is 40 hours. Consequently, France needs more employees in hospitals to do the same work as in other countries. Nevertheless, in this period of crisis, the number of employees in hospitals is important because it is a human resource that can be mobilized beyond the planned working week. Thus, France is doing better on this criterion and did not experience a shortage of labour in hospitals during the Covid-19 crisis. By way of comparison, Italy, for example, received the assistance of Cuban doctors at the beginning of the epidemic because they lacked doctors to care for all patients.
HospitalBeds <- rdb(ids = c('Eurostat/hlth_rs_bds/A.P_HTHAB.HBEDT_CUR.FR', "Eurostat/hlth_rs_bds/A.P_HTHAB.HBEDT_CUR.IT", "Eurostat/hlth_rs_bds/A.P_HTHAB.HBEDT_CUR.ES", "Eurostat/hlth_rs_bds/A.P_HTHAB.HBEDT_CUR.DE")) %>%
filter(!is.na(value)) # Annual – Per hundred thousand inhabitants – Curative care beds in hospitals
HospitalBeds2017 <- filter(HospitalBeds, original_period == 2017)
base::colnames(HospitalBeds2017)[colnames(HospitalBeds2017)=="Geopolitical entity (reporting)"] <- "Country"
HospitalBeds2017$Country <- recode(HospitalBeds2017$Country,
"Germany (until 1990 former territory of the FRG)" = "Germany")
ggplot(HospitalBeds2017) +
aes(x = Country, y = value, fill = Country, label = value) +
geom_col() +
geom_text(aes(label=value, colour=Country),
position=position_dodge(width = 1),
vjust=-0.10) +
labs(y = "Ratio for 100.000 inhabitants", x = "Country", title = "Hospital beds in intensive care per 100.000 inhabitants", caption = "Source : Eurostat (2017)") +
ggthemes::theme_stata() +
theme(plot.title = element_text(hjust = 0.5))
This graph shows Germany’s capacity to receive patients in intensive care units. If we compare with its European neighbors, we can see that Germany was much better equipped to deal with a violent outbreak involving intensive care. As a result, Germany did not experience hospital overcrowding as France, Italy or Spain did. As an example, Germany offered to take care of French patients when there was congestion in hospitals in eastern France. Again, this was a key factor in helping to combat the epidemic effectively.
TotalPop <- rdb(ids = c('Eurostat/tps00001/A.JAN.FR', "Eurostat/tps00001/A.JAN.IT", "Eurostat/tps00001/A.JAN.ES", "Eurostat/tps00001/A.JAN.DE")) %>%
filter(!is.na(value)) # Population totale
TotalPop2019 <- filter(TotalPop, original_period == 2019)
Pop65andMore <- rdb(ids = c('Eurostat/demo_pjanbroad/A.NR.Y_GE65.T.FR', "Eurostat/demo_pjanbroad/A.NR.Y_GE65.T.IT", "Eurostat/demo_pjanbroad/A.NR.Y_GE65.T.ES", "Eurostat/demo_pjanbroad/A.NR.Y_GE65.T.DE")) %>%
filter(!is.na(value)) # 65 ans ou plus
Pop65andMore2019 <- filter(Pop65andMore, original_period == 2019)
Country <- c("France", "Italy", "Spain", "Germany")
Ratio <- Pop65andMore2019$value/TotalPop2019$value*100
TableauRatioPop <- data.frame(Country, Ratio) # Ratio 65 ans ou +
colnames(TableauRatioPop)[2] <- "Population over 65 years old ratio"
TableauRatioPop %>%
kable() %>%
kable_styling()
Country | Population over 65 years old ratio |
---|---|
France | 20.10072 |
Italy | 22.83579 |
Spain | 19.39954 |
Germany | 21.54144 |
The Covid-19 virus is particularly dangerous for people at risk, i.e. people with serious health problems or the elderly. Through this analysis we see that Italy is the country with the highest proportion of people over 65 years of age in relation to the total population. Spain is the country with the lowest proportion. This necessarily influences the number of deaths recorded, as the majority of people who die from coronavirus are elderly.
Impactbyindustry <- read_csv("./data/Impactbyindustry.csv")
Impactbyindustry1 <- ggplot(Impactbyindustry) +
aes(x = Industries, fill = Industries, weight = `Traffic evolution`) +
geom_bar() +
scale_fill_viridis_d(option = "plasma") +
labs(y = "Impact in %", title = "Impact of the Covid-19 crisis by industry", subtitle = "Week 3/30 to 4/5 compared to reference Jan 6th - Feb 16th, France") +
ggthemes::theme_fivethirtyeight()
Impactbyindustry1
Impactbyindustry1 + theme(legend.title = element_text(size = 0,
face = "bold")) + theme(plot.subtitle = element_text(size = 12,
face = "bold"), axis.text.x = element_text(size = 0),
plot.title = element_text(size = 15),
legend.text = element_text(size = 10)) +labs(y = "Impact in %", title = "Impact of the Covid-19 crisis by industry",
subtitle = "Mar 3rd - Apr 5th compared to reference Jan 6th - Feb 16th, France") + theme(axis.text.x = element_text(size = 0))
We chose to study the impact on French industries at the start of containment in order to understand the real impact it had on the different sectors. As we can see, it is the events and tourism sectors that have suffered the most from this containment. This makes perfect sense because, as far as events are concerned, government directives prohibit all gatherings of any kind, and then, as far as tourism is concerned, travel bans and border closures have contributed to the collapse of this industry.
One sector in particular has benefited’ from this situation, namely supermarkets. Indeed, the French have rushed to the supermarkets to stock up on provisions. In addition, government directives prohibiting Amazon from running its warehouses in France (which was circumvented by Amazon by delivering from its warehouses abroad) also contributed to the rise of supermarkets.
The issue now is how the negatively impacted sectors will be able to emerge from this crisis. Although the end of the containment is scheduled for 11 May, activities related to events or tourism remain on hold. Thus, the survival of many companies in this sector will depend on how long it takes before activities can be resumed normally.
get_token()
covidTwitter <- search_tweets("#deconfinement", n=1000, include_rts = FALSE, retryonratelimit = TRUE) # recherche en date du 07 mai 2020
covidTwitter
users_data(covidTwitter)
save_as_csv(covidTwitter, file_name = "TweetCovid", prepend_ids = TRUE, na = "",
fileEncoding = "UTF-8")
covidTwitter2 <- read_csv("./data/TweetCovid.csv")
tweets.deconfinement = select(covidTwitter2, screen_name, text) # sélectionner uniquement les utilisateurs et les tweets
tweets.deconfinement$stripped_text1 <- gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", tweets.deconfinement$text) # enlever les re-tweet
tweets.deconfinement$stripped_text1 <- gsub("http\\w+", "", tweets.deconfinement$stripped_text1) # enlever les liens hypertextes
tweets.deconfinement$stripped_text1 <- gsub("pictwitter\\w+ *", "", tweets.deconfinement$stripped_text1) # enlever les images
tweets.deconfinement$stripped_text1 <- gsub("t.co", "", tweets.deconfinement$stripped_text1)
tweets.deconfinement_stem <- unnest_tokens(select(tweets.deconfinement, stripped_text1), word, stripped_text1) # enlever l'id utilisateur et compter chaque mot séparément
cleaned_tweets.deconfinement <- anti_join(tweets.deconfinement_stem, stopwordslangs)
head(tweets.deconfinement$text)
[1] "Une petite douceur signée T. Waits pour cette mi-journée.\nEnjoy !\n#Deconfinement #lindon #PlusJamaisCa\nhttps://t.co/U2rbRgwiHG"
[2] "@PENELOPPE192 @mapitoma J'ai vu/lu un tweet du Dr @Drmartyufml qui lui s'y opposait.\nJe pense qu'il doit y en avoir d'autres mais à cet instant, c'est seul que j'ai pu lire sur le sujet.\n@ordre_medecins une réaction!!??🤔🙄\n#masques #deconfinement #StopCovidFlicage #11mai"
[3] "@mapitoma Faudrait sûrement qu'il se les applique ses \"sermons\" : inventer, se réinventer et surtout qu'il commence à agir plutôt que de rester indéfiniment dans la communication.😲\nSans doute ignore-t-il qu'il est président de la France !?\n🙄\n#culture #Macron #Deconfinement #PlusJamaisCa"
[4] "@BFMTV Dans le village gaulois qui résiste, Obélix n'a n'en plus besoin car tombé dedans jeune.\nPour JM Blanquer, à défaut d'être tomber dansla potion qui décuple en permanence les forces, l'a été dans une autre, celle de la mauvaise foi.\n#PLUSJAMAISLREM #11mai2020 #11mai #Deconfinement"
[5] "@Poulin2012 Je commence à rêver d'un mois de mai 2020 qui ressemblerait à s'y méprendre à un certain mois de mai 1968. Il suffit tout simplement d'une volonté populaire.\nDes #masques plutôt que du #flicage!\n#ecoles #Deconfinement #PlusJamaisCa #ousontlesmasques #11mai\nhttps://t.co/dojE0uenEW"
[6] "@Brevesdepresse Les entreprises pourront toujours les proposer, sous la forme de volontariat, aux salariés. Et zou, le tour est joué. Ça, le @gouvernementFR le sait très bien!\n#coronavirus #Deconfinement #Deconfinement11Mai #depistage #masques #PlusJamaisCa #PLUSJAMAISLREM"
cleaned_tweets.deconfinement %>% count(word, sort = TRUE) %>% top_n(20) %>% mutate(word = reorder(word,n)) %>% ggplot(aes(x=word, y=n)) + geom_col() + xlab(NULL) + coord_flip() + theme_classic() + labs(x= "Count", y="Unique words", title = "Unique words counts found in #deconfinement tweets")
bing_deconfinement = cleaned_tweets.deconfinement %>% inner_join(get_sentiments("bing")) %>% count(word, sentiment, sort = TRUE) %>% ungroup()
bing_deconfinement %>%group_by(sentiment) %>% top_n(10) %>% ungroup() %>% mutate(word = reorder(word,n)) %>% ggplot(aes(word, n, fill = sentiment)) + geom_col(show.legend = FALSE) + facet_wrap(~sentiment, scales="free_y") + labs(title = "Tweets containing #deconfinement (07/05/2020)", y="Contribution to sentiment", x=NULL) + coord_flip() + theme_bw()
Our last analysis was done through the package (rtweet). We decided to conduct a sentiment study based on 10000 tweets from the French population containing the #deconfinement. This hashtag means “end of lockdown”. This study was conducted on May 07, 4 days before the end of the lockdown.
Here is the conclusion of this study:
For negative feelings, the word contamination comes first, followed by strict, catastrophe and impatience. Thus, we can see that the French people fear contamination again (and therefore a potential relaunch of the epidemic) but are also critical of the strict laws put in place by the government to contain the epidemic.
For positive feelings, the words caution / cautious come first, followed by favorable and sustainable. We therefore note that the French are cautious about the end of the containment to come. If we make the link with the negative feelings, we can deduce a correlation between the word contamination and the need for caution highlighted in the French tweets.
At a time when countries are beginning to enter a phase of deconfinement, we must remain cautious with regard to this epidemic, which is still raging throughout the world. Indeed, until a vaccine or treatment is found and proven effective, all citizens of the world will have to adopt their lifestyles to avoid contributing to the spread of the virus.
It seems legitimate for States to deconcentrate in order to avoid aggravating their economic situation, which has already been severely damaged by this unprecedented crisis. Nevertheless, a second wave of epidemics must be avoided at all costs, which would have catastrophic consequences in both human and financial terms. If a second wave were to occur, it seems unlikely that countries would again decide to confine their populations. Nevertheless, it is clear that certain basic freedoms will be restricted in order to contain the epidemic as effectively as possible.
We have a lot to learn from this crisis. First of all, it seems important to note that the relocations implemented by developed countries can lead them to a certain dependency in the event of a major crisis and put them in delicate situations.
Secondly, since the global medical resources did not seem to be optimized to deal with such a health crisis, it is clear that medical systems around the world will organize themselves in the future to avoid a repeat of such a situation. Routine hygiene habits are also likely to change.
Thirdly, our ways of working are likely to change, as workers have learned to telework in order to be able to continue their activity, so it is very likely that this way of working will continue for years to come.
Finally, our relationship to ecology will most likely emerge changed as a result of this crisis. We have been able to observe that the partial and widespread cessation of human activities has contributed to a marked improvement in the quality of the air, but also of rivers, seas and oceans.
This crisis makes us aware that despite the technological/medical/social advances to which we have access, we are and remain vulnerable to a situation like this. It is a real lesson of humility for Humanity.
Take care of yourself and your loved ones in this difficult time.
library(dplyr)
library(tidyr)
library(leaflet)
library(leaflet.extras)
library(leaftime)
library(purrr)
library(leafpop)
library(ggplot2)
library(plotly)
library(kableExtra)
library(readr)
library(readxl)
library(dplyr)
library(rAmCharts)
library(lubridate)
library(gganimate)
library(rtweet)
library(tidytext)
library(lubridate)
library(rdbnomics)
CoronavirusWorld <- utils::read.csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus-csv/master/coronavirus_dataset.csv")
CoronavirusWorld[CoronavirusWorld == "" ] <- NA
CoronavirusWorld$name <- ifelse(is.na(CoronavirusWorld$Province.State) == TRUE,
paste0(CoronavirusWorld$Country.Region), paste0(CoronavirusWorld$Province.State))
CoronavirusWorld$date <- as.Date(CoronavirusWorld$date)
CoronavirusWorld2 <- spread(CoronavirusWorld, "type", "cases")
WorldSpreadCovid <- CoronavirusWorld2 %>% group_by(name) %>% mutate(CumulConfirmes=cumsum(confirmed)) %>% mutate(CumulMorts=cumsum(death)) %>% mutate(CumulSoignes=cumsum(recovered))
WorldSpreadCovid$category1 <- ifelse(
WorldSpreadCovid$CumulConfirmes >= -1 &
WorldSpreadCovid$CumulConfirmes <= 0,
0,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 1 &
WorldSpreadCovid$CumulConfirmes <= 100,
1,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 101 &
WorldSpreadCovid$CumulConfirmes <= 1000,
2,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 1001 &
WorldSpreadCovid$CumulConfirmes <= 3000,
4,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 3001 &
WorldSpreadCovid$CumulConfirmes <= 5000,
5,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 5001 &
WorldSpreadCovid$CumulConfirmes <= 10000,
8,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 10001 &
WorldSpreadCovid$CumulConfirmes <= 20000,
10,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 20001 &
WorldSpreadCovid$CumulConfirmes <= 50000,
15,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 50001 &
WorldSpreadCovid$CumulConfirmes <= 100000,
20,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 100001 &
WorldSpreadCovid$CumulConfirmes <= 200000,
25,
ifelse(
WorldSpreadCovid$CumulConfirmes >= 200001 &
WorldSpreadCovid$CumulConfirmes <= Inf,
30,
35
)
)
)
)
)
)
)
)
)
)
)
WorldSpreadCovid$category2 <- ifelse(
WorldSpreadCovid$CumulMorts >= -1 &
WorldSpreadCovid$CumulMorts <= 0,
0,
ifelse(
WorldSpreadCovid$CumulMorts >= 1 &
WorldSpreadCovid$CumulMorts <= 100,
1,
ifelse(
WorldSpreadCovid$CumulMorts >= 101 &
WorldSpreadCovid$CumulMorts <= 1000,
2,
ifelse(
WorldSpreadCovid$CumulMorts >= 1001 &
WorldSpreadCovid$CumulMorts <= 3000,
4,
ifelse(
WorldSpreadCovid$CumulMorts >= 3001 &
WorldSpreadCovid$CumulMorts <= 5000,
5,
ifelse(
WorldSpreadCovid$CumulMorts >= 5001 &
WorldSpreadCovid$CumulMorts <= 10000,
8,
ifelse(
WorldSpreadCovid$CumulMorts >= 10001 &
WorldSpreadCovid$CumulMorts <= 20000,
10,
ifelse(
WorldSpreadCovid$CumulMorts >= 20001 &
WorldSpreadCovid$CumulMorts <= 50000,
15,
ifelse(
WorldSpreadCovid$CumulMorts >= 50001 &
WorldSpreadCovid$CumulMorts <= 100000,
20,
ifelse(
WorldSpreadCovid$CumulMorts >= 100001 &
WorldSpreadCovid$CumulMorts <= 200000,
25,
ifelse(
WorldSpreadCovid$CumulMorts >= 200001 &
WorldSpreadCovid$CumulMorts <= Inf,
30,
35
)
)
)
)
)
)
)
)
)
)
)
WorldSpreadCovid$category3 <- ifelse(
WorldSpreadCovid$CumulSoignes >= -1 &
WorldSpreadCovid$CumulSoignes <= 0,
0,
ifelse(
WorldSpreadCovid$CumulSoignes >= 1 &
WorldSpreadCovid$CumulSoignes <= 100,
1,
ifelse(
WorldSpreadCovid$CumulSoignes >= 101 &
WorldSpreadCovid$CumulSoignes <= 1000,
2,
ifelse(
WorldSpreadCovid$CumulSoignes >= 1001 &
WorldSpreadCovid$CumulSoignes <= 3000,
4,
ifelse(
WorldSpreadCovid$CumulSoignes >= 3001 &
WorldSpreadCovid$CumulSoignes <= 5000,
5,
ifelse(
WorldSpreadCovid$CumulSoignes >= 5001 &
WorldSpreadCovid$CumulSoignes <= 10000,
8,
ifelse(
WorldSpreadCovid$CumulSoignes >= 10001 &
WorldSpreadCovid$CumulSoignes <= 20000,
10,
ifelse(
WorldSpreadCovid$CumulSoignes >= 20001 &
WorldSpreadCovid$CumulSoignes <= 50000,
15,
ifelse(
WorldSpreadCovid$CumulSoignes >= 50001 &
WorldSpreadCovid$CumulSoignes <= 100000,
20,
ifelse(
WorldSpreadCovid$CumulSoignes >= 100001 &
WorldSpreadCovid$CumulSoignes <= 200000,
25,
ifelse(
WorldSpreadCovid$CumulSoignes >= 200001 &
WorldSpreadCovid$CumulSoignes <= Inf,
30,
35
)
)
)
)
)
)
)
)
)
)
)
WorldMapCovid <- filter(WorldSpreadCovid, date == max(date))
content <- paste("<strong>", WorldMapCovid$name, "</strong>", "<br/>", "<strong>", WorldMapCovid$CumulConfirmes, "Confirmed", "</strong>", "<br/>", WorldMapCovid$CumulMorts, "Death", "<br/>", WorldMapCovid$CumulSoignes, "Recovered")
content2 <- paste("<strong>", WorldMapCovid$name, "</strong>", "<br/>", WorldMapCovid$CumulConfirmes, "Confirmed", "<br/>", "<strong>", WorldMapCovid$CumulMorts, "Death", "</strong>", "<br/>", WorldMapCovid$CumulSoignes, "Recovered")
content3 <- paste("<strong>", WorldMapCovid$name, "</strong>", "<br/>", WorldMapCovid$CumulConfirmes, "Confirmed", "<br/>", WorldMapCovid$CumulMorts, "Death", "<br/>", "<strong>", WorldMapCovid$CumulSoignes, "Recovered", "</strong>")
leaflet(WorldMapCovid) %>%
addFullscreenControl() %>%
addProviderTiles(providers$CartoDB.DarkMatter) %>%
addCircleMarkers(
radius = ~ sqrt(category1) * 5,
popup = content,
color = "orange",
stroke = FALSE,
fillOpacity = 0.6,
group = "Confirmed"
) %>%
addCircleMarkers(
radius = ~ sqrt(category3) * 5,
popup = content3,
color = "green",
stroke = FALSE,
fillOpacity = 1,
group = "Recovered"
) %>%
addCircleMarkers(
radius = ~ sqrt(category2) * 5,
popup = content2,
color = "red",
stroke = FALSE,
fillOpacity = 1,
group = "Death") %>%
addLayersControl(
overlayGroups = c("Confirmed", "Recovered", "Death"),
options = layersControlOptions(collapsed = FALSE),
position = 'topright'
)
summary_df2
summary_df2$Province.State[summary_df2$Province.State == "" ] <- NA
summary_df2$name <- ifelse(is.na(summary_df2$Province.State) == TRUE,
paste0(summary_df2$Country.Region), paste0(summary_df2$Province.State))
PlotCumsumConfirmed <- summary_df2 %>% group_by(name) %>% mutate(CumulConfirmes=cumsum(confirmed))
PlotCumsumConfirmed$CumulConfirmes <- as.numeric(PlotCumsumConfirmed$CumulConfirmes)
PlotCumsumDeath <- summary_df2 %>% group_by(name) %>%
mutate(CumulMorts=cumsum(death))
PlotCumsumDeath$CumulMorts <- as.numeric(PlotCumsumDeath$CumulMorts)
Plot_formatted <- PlotCumsumConfirmed %>%
mutate(date = ymd(date)) %>%
group_by(date) %>%
mutate(rank = rank(-CumulConfirmes),
Value_rel = CumulConfirmes/CumulConfirmes[rank==1],
Value_lbl = paste0(" ",round(CumulConfirmes))) %>%
group_by(name) %>%
filter(rank <=10) %>%
ungroup()
StaticPlotCovid = ggplot(Plot_formatted, aes(rank, group = name,
fill = as.factor(name), color = as.factor(name))) +
geom_tile(aes(y = CumulConfirmes/2,
height = CumulConfirmes,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(name, " ")), vjust = 0.2, hjust = 1) +
geom_text(aes(y=CumulConfirmes, label = Value_lbl, hjust=0)) +
coord_flip(clip = "off", expand = FALSE) +
scale_y_continuous(labels = scales::comma) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position="none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.grid.major.x = element_line( size=.1, color="grey" ),
panel.grid.minor.x = element_line( size=.1, color="grey" ),
plot.title=element_text(size=25, hjust=0.5, face="bold", colour="grey", vjust=-1),
plot.subtitle=element_text(size=18, hjust=0.5, face="italic", color="grey"),
plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
plot.background=element_blank(),
plot.margin = margin(2,2, 2, 4, "cm"))
library(gganimate)
anim = StaticPlotCovid + transition_states(date, transition_length = 4, state_length = 1) +
view_follow(fixed_x = TRUE) +
labs(title = 'Évolution du nombre de cas de Covid 19 par pays : {closest_state}',
subtitle = "10 premiers pays / régions",
caption = "Nombre de cas confirmés | Data Source: Rami Krispin/coronavirus")
animate(anim, 1000, fps = 15, width = 1200, height = 1000,
renderer = gifski_renderer("gganimcovid.gif"))
CAC40 <- read_xlsx("./data/PX1-3.xlsx")
CAC40$date <- as.Date(CAC40$date)
ggplot(CAC40) +
aes(x = date, y = cloture) +
geom_line(size = 0.78) +
scale_color_hue() +
labs(x = "Date", y = "Closing value", title = "Evolution of the stock market index CAC 40", subtitle = "Since January 2020") +
hrbrthemes::theme_modern_rc()
SP500 <- read_xlsx("./data/SPX.xlsx")
SP500$date <- as.Date(SP500$date) #mettre la colonne date sous le bon format
SP500 <- dplyr::rename(SP500, cloture = fermeture)
ggplot(SP500) +
aes(x = date, y = cloture) +
geom_line(size = 0.78, colour = "#0c4c8a") +
labs(x = "Date", y = "Closing value", title = "Evolution of the stock market index S&P 500", subtitle = "Since February 2020") +
hrbrthemes::theme_modern_rc()
coronavirus <- utils::read.csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus-csv/master/coronavirus_dataset.csv")
summary_df <- coronavirus %>% group_by(Country.Region, type) %>%
summarise(total_cases = sum(cases)) %>%
arrange(-total_cases)
summary_df %>% head(20)
coronavirus$date <- as.Date(coronavirus$date, "%Y-%m-%d")
coronavirus %>%
filter(date == max(date)) %>%
select(country = Country.Region, type, cases) %>%
group_by(country, type) %>%
summarise(total_cases = sum(cases)) %>%
pivot_wider(names_from = type,
values_from = total_cases) %>%
arrange(-confirmed)
CombineCountries <- filter(coronavirus, Country.Region == "France" | Country.Region == "Spain" | Country.Region == "Germany" | Country.Region == "Italy")
CombineGraph <-ggplot(CombineCountries, aes(date, cases))
CombineGraph2 <- CombineGraph + geom_bar(stat = "identity", aes(colour = type)) +
facet_wrap(~ Country.Region) +
xlab("Date") +
ggtitle("Coronavirus cases") +
theme_linedraw()
CombineGraph3 <- CombineGraph2 + theme(axis.title.y = element_blank())
CombineGraph3 <- ggplotly(CombineGraph3)
CombineGraph3
DataCountries <- CombineCountries %>% group_by(Country.Region, type) %>%
summarise(total_cases = sum(cases)) %>%
arrange(Country.Region)
DataCountries2 <- spread(DataCountries, "type", "total_cases")
DataCountries2$confirmed <- as.numeric(DataCountries2$confirmed)
DataCountries2$death <- as.numeric(DataCountries2$death)
DataCountries2$recovered <- as.numeric(DataCountries2$recovered)
DataCountries2$"Ratio death/confirmed" <- DataCountries2$death/DataCountries2$confirmed*100
library(kableExtra)
kable(DataCountries2) %>%
kable_styling("striped", full_width = F) %>%
column_spec(3, bold = T) %>%
row_spec(1, bold = T, color = "white", background = "blue") %>%
row_spec(2, bold = T, color = "white", background = "red") %>%
row_spec(3, bold = T, color = "white", background = "green") %>%
row_spec(4, bold = T, color = "white", background = "orange")
summary_df2 <- spread(coronavirus, "type", "cases")
SpreadCountries <- filter(summary_df2, Country.Region == "France" & Province.State == "" | Country.Region == "Spain" | Country.Region == "Germany" | Country.Region == "Italy")
SpreadCountries1 <- SpreadCountries[,-1]
CountriesConfirmed <- SpreadCountries1 %>% group_by(Country.Region) %>% mutate(CumulativeConfirmed=cumsum(confirmed))
ggplot(CountriesConfirmed) +
aes(x = date, y = CumulativeConfirmed, colour = Country.Region) +
geom_line(size = 1L) +
scale_color_hue() +
labs(y = "Number of confirmed cases (cumulative)", title = "Number of people infected with Covid-19", caption = "Coronavirus API / Rami Krispin") +
ggthemes::theme_stata() ## graphique cumulatif des cas confirmés de Covid-19
CountriesDeath <- SpreadCountries1 %>% group_by(Country.Region) %>% mutate(CumulativeDeath=cumsum(death))
ggplot(CountriesDeath) +
aes(x = date, y = CumulativeDeath, colour = Country.Region) +
geom_line(size = 1L) +
scale_color_hue() +
labs(y = "Number of death cases (cumulative)", title = "Deaths related to Covid-19", caption = "Coronavirus API / Rami Krispin") +
ggthemes::theme_stata() ## graphique cumulatif des cas de décés dû au Covid-19
LeftJoin1 <- left_join(CountriesConfirmed, CountriesDeath, by = c("Country.Region", "Lat", "Long", "date", "confirmed", "death", "recovered"))
CountriesRecovered <- SpreadCountries1 %>% group_by(Country.Region) %>% mutate(CumulativeRecovered=cumsum(recovered))
LeftJoin2 <- left_join(LeftJoin1, CountriesRecovered, by = c("Country.Region", "Lat", "Long", "date", "confirmed", "death", "recovered"))
LeftJoin2$confirmed <- NULL
LeftJoin2$death <- NULL
LeftJoin2$recovered <- NULL
LeftJoin2$Lat <- NULL
LeftJoin2$Long <- NULL
LeftJoin2$CumulativeConfirmed <- as.numeric(LeftJoin2$CumulativeConfirmed)
LeftJoin2$CumulativeDeath <- as.numeric(LeftJoin2$CumulativeDeath)
LeftJoin2$CumulativeRecovered <- as.numeric(LeftJoin2$CumulativeRecovered)
FinalCumul <- LeftJoin2 %>% gather(Total, Value, -Country.Region, -date)
CombineCumul <-ggplot(FinalCumul, aes(date, Value))
CombineCumul2 <- CombineCumul + geom_bar(stat = "identity", aes(colour = Total)) +
facet_wrap(~ Country.Region) +
xlab("Date") +
ggtitle("Cumulative headcount by category (Covid-19)") +
theme_linedraw()
CombineCumul3 <- CombineCumul2 + theme(axis.title.y = element_blank())
CombineCumul3 <- ggplotly(CombineCumul3)
CombineCumul3 ## graph interactif sur les effectifs cumulés
HospitalEmployment <- rdb(ids = c('Eurostat/hlth_rs_prshp1/A.HOSP.HC_HTHAB.IT', "Eurostat/hlth_rs_prshp1/A.HOSP.HC_HTHAB.FR", "Eurostat/hlth_rs_prshp1/A.HOSP.HC_HTHAB.ES", "Eurostat/hlth_rs_prshp1/A.HOSP.HC_HTHAB.DE")) %>%
filter(!is.na(value)) # Annual – Hospital employment – Head count per hundred thousand inhabitants
HospitalEmployment2017 <- filter(HospitalEmployment, original_period == 2017)
base::colnames(HospitalEmployment2017)[colnames(HospitalEmployment2017)=="Geopolitical entity (reporting)"] <- "Country"
HospitalEmployment2017$Country <- recode(HospitalEmployment2017$Country,
"Germany (until 1990 former territory of the FRG)" = "Germany")
ggplot(HospitalEmployment2017) +
aes(x = Country, y = value, fill = Country, label = value) +
geom_col() +
geom_text(aes(label=value, colour=Country),
position=position_dodge(width = 1),
vjust=-0.10) +
scale_fill_hue() +
labs(x = "Country", y = "Ratio for 100.000 inhabitants", title = "Number of employees in hospitals per 100,000 inhabitants", caption = "Source : Eurostat (2017)") +
ggthemes::theme_stata() +
theme(plot.title = element_text(hjust = 0.5))
HospitalBeds <- rdb(ids = c('Eurostat/hlth_rs_bds/A.P_HTHAB.HBEDT_CUR.FR', "Eurostat/hlth_rs_bds/A.P_HTHAB.HBEDT_CUR.IT", "Eurostat/hlth_rs_bds/A.P_HTHAB.HBEDT_CUR.ES", "Eurostat/hlth_rs_bds/A.P_HTHAB.HBEDT_CUR.DE")) %>%
filter(!is.na(value)) # Annual – Per hundred thousand inhabitants – Curative care beds in hospitals
HospitalBeds2017 <- filter(HospitalBeds, original_period == 2017)
base::colnames(HospitalBeds2017)[colnames(HospitalBeds2017)=="Geopolitical entity (reporting)"] <- "Country"
HospitalBeds2017$Country <- recode(HospitalBeds2017$Country,
"Germany (until 1990 former territory of the FRG)" = "Germany")
ggplot(HospitalBeds2017) +
aes(x = Country, y = value, fill = Country, label = value) +
geom_col() +
geom_text(aes(label=value, colour=Country),
position=position_dodge(width = 1),
vjust=-0.10) +
labs(y = "Ratio for 100.000 inhabitants", x = "Country", title = "Hospital beds in intensive care per 100.000 inhabitants", caption = "Source : Eurostat (2017)") +
ggthemes::theme_stata() +
theme(plot.title = element_text(hjust = 0.5))
TotalPop <- rdb(ids = c('Eurostat/tps00001/A.JAN.FR', "Eurostat/tps00001/A.JAN.IT", "Eurostat/tps00001/A.JAN.ES", "Eurostat/tps00001/A.JAN.DE")) %>%
filter(!is.na(value)) # Population totale
TotalPop2019 <- filter(TotalPop, original_period == 2019)
Pop65andMore <- rdb(ids = c('Eurostat/demo_pjanbroad/A.NR.Y_GE65.T.FR', "Eurostat/demo_pjanbroad/A.NR.Y_GE65.T.IT", "Eurostat/demo_pjanbroad/A.NR.Y_GE65.T.ES", "Eurostat/demo_pjanbroad/A.NR.Y_GE65.T.DE")) %>%
filter(!is.na(value)) # 65 ans ou plus
Pop65andMore2019 <- filter(Pop65andMore, original_period == 2019)
Country <- c("France", "Italy", "Spain", "Germany")
Ratio <- Pop65andMore2019$value/TotalPop2019$value*100
TableauRatioPop <- data.frame(Country, Ratio) # Ratio 65 ans ou +
colnames(TableauRatioPop)[2] <- "Population over 65 years old ratio"
TableauRatioPop %>%
kable() %>%
kable_styling()
Impactbyindustry <- read_csv("./data/Impactbyindustry.csv")
Impactbyindustry1 <- ggplot(Impactbyindustry) +
aes(x = Industries, fill = Industries, weight = `Traffic evolution`) +
geom_bar() +
scale_fill_viridis_d(option = "plasma") +
labs(y = "Impact in %", title = "Impact of the Covid-19 crisis by industry", subtitle = "Week 3/30 to 4/5 compared to reference Jan 6th - Feb 16th, France") +
ggthemes::theme_fivethirtyeight()
Impactbyindustry1
Impactbyindustry1 + theme(legend.title = element_text(size = 0,
face = "bold")) + theme(plot.subtitle = element_text(size = 12,
face = "bold"), axis.text.x = element_text(size = 0),
plot.title = element_text(size = 15),
legend.text = element_text(size = 10)) +labs(y = "Impact in %", title = "Impact of the Covid-19 crisis by industry",
subtitle = "Mar 3rd - Apr 5th compared to reference Jan 6th - Feb 16th, France") + theme(axis.text.x = element_text(size = 0))
get_token()
covidTwitter <- search_tweets("#deconfinement", n=1000, include_rts = FALSE, retryonratelimit = TRUE) # recherche en date du 07 mai 2020
covidTwitter
users_data(covidTwitter)
save_as_csv(covidTwitter, file_name = "TweetCovid", prepend_ids = TRUE, na = "",
fileEncoding = "UTF-8")
covidTwitter2 <- read_csv("./data/TweetCovid.csv")
tweets.deconfinement = select(covidTwitter2, screen_name, text) # sélectionner uniquement les utilisateurs et les tweets
tweets.deconfinement$stripped_text1 <- gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", tweets.deconfinement$text) # enlever les re-tweet
tweets.deconfinement$stripped_text1 <- gsub("http\\w+", "", tweets.deconfinement$stripped_text1) # enlever les liens hypertextes
tweets.deconfinement$stripped_text1 <- gsub("pictwitter\\w+ *", "", tweets.deconfinement$stripped_text1) # enlever les images
tweets.deconfinement$stripped_text1 <- gsub("t.co", "", tweets.deconfinement$stripped_text1)
tweets.deconfinement_stem <- unnest_tokens(select(tweets.deconfinement, stripped_text1), word, stripped_text1) # enlever l'id utilisateur et compter chaque mot séparément
cleaned_tweets.deconfinement <- anti_join(tweets.deconfinement_stem, stopwordslangs)
head(tweets.deconfinement$text)
cleaned_tweets.deconfinement %>% count(word, sort = TRUE) %>% top_n(20) %>% mutate(word = reorder(word,n)) %>% ggplot(aes(x=word, y=n)) + geom_col() + xlab(NULL) + coord_flip() + theme_classic() + labs(x= "Count", y="Unique words", title = "Unique words counts found in #deconfinement tweets")
bing_deconfinement = cleaned_tweets.deconfinement %>% inner_join(get_sentiments("bing")) %>% count(word, sentiment, sort = TRUE) %>% ungroup()
bing_deconfinement %>%group_by(sentiment) %>% top_n(10) %>% ungroup() %>% mutate(word = reorder(word,n)) %>% ggplot(aes(word, n, fill = sentiment)) + geom_col(show.legend = FALSE) + facet_wrap(~sentiment, scales="free_y") + labs(title = "Tweets containing #deconfinement (07/05/2020)", y="Contribution to sentiment", x=NULL) + coord_flip() + theme_bw()
Text and figures are licensed under Creative Commons Attribution CC BY 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".