最近大家不到万不得已不要出门啦,可以好好在家敲敲代码学习学习数据分析!本文介绍了如何使用
腾讯新闻提供的数据接口
[1]
绘制疫情分布的动态地图。
关于疫情的最新新闻
library(sf)
library(ggplot2)
library(tidyverse)
library(DT)
library(hrbrthemes)
# 新闻
jsonlite::fromJSON('https://mat1.gtimg.com/news/images/inews/2020/feiyan/18/data/news.json') %>%
.$news %>%
as_tibble() %>%
DT::datatable()
腾讯新闻里面的中国地图
腾讯新闻里面的中国地图是 ECharts 绘制的,使用的 GEOJSON 数据为:https://mat1.gtimg.com/news/images/inews/2020/feiyan/18/data/china-all.geo.json
我们也可以使用这个数据画个中国地图:
cn
## Reading layer `china-all.geo' from data source `/Users/czx/Desktop/新型冠状病毒肺炎实时疫情发展动态地图绘制/china-all.geo.json' using driver `GeoJSON'
## Simple feature collection with 35 features and 6 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: 73.60226 ymin: 3.414725 xmax: 134.7726 ymax: 53.56779
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
ggplot(cn) +
geom_sf(size = 0.1, color = "white", aes(fill = name)) +
scale_fill_viridis_d() +
coord_sf(crs = "+proj=laea +lat_0=23 +lon_0=113 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs") +
theme_modern_rc() +
worldtilegrid::theme_enhance_wtg() +
theme(legend.position = "none")
这个地图数据显然不好,所以腾讯新闻页面上的中国地图的九段线小地图是 P 上去的!实际上图例也是 P 上去的???
绘制最新(1月23日)的疫情分布地图
首先从腾讯新闻提供的接口提取数据:
dataurl
da
library(stringr)
(hintWords %
str_remove("'; // 建议25字以内") %>%
str_replace_all(",", ";\n"))
## [1] "澳门确诊第二例新型肺炎确诊病例;\n山东新增4例新型肺炎确诊病例;\n四川新增3例新型肺炎确诊病例"
recentTime
confirm_num
suspect_num
dead_num
print(paste0("截止", recentTime, "全国一共确诊病例:", confirm_num, " 例;疑似病例:", suspect_num, " 例;死亡:", dead_num, " 人。")) %>%
message()
## [1] "截止2020-01-23 11:20:00全国一共确诊病例:575 例;疑似病例:162 例;死亡:17 人。"
从 1 月 2 日以来的数据:
# 读入区划代码
library(readxl)
code %
mutate(编码 = as.character(编码)) %>%
dplyr::filter(str_detect(编码, "0000")) %>%
select(1:2) %>%
rename(QUHUADAIMA = 编码, area = 地址) %>%
mutate(area = str_remove_all(area, "[市省自治区回族维吾尔特别行政壮]")) %>%
add_row(QUHUADAIMA = "500000", area = "重庆")
real_data
col_names = c('day', 'time', 'country', 'area',
'confirm', 'suspect', 'dead')) %>%
mutate(area = str_remove(area, "武汉")) %>%
dplyr::filter(!is.na(area)) %>%
select(-time, -country) %>%
mutate(day = paste0("2020.", day),
day = lubridate::ymd(day)) %>%
left_join(code) %>%
nest(data = -day) %>%
arrange(day)
real_data
## # A tibble: 13 x 2
## day data
## >
## 1 2020-01-02 [9 × 5]
## 2 2020-01-11 [1 × 5]
## 3 2020-01-12 [1 × 5]
## 4 2020-01-13 [1 × 5]
## 5 2020-01-14 [1 × 5]
## 6 2020-01-15 [1 × 5]
## 7 2020-01-16 [1 × 5]
## 8 2020-01-17 [1 × 5]
## 9 2020-01-18 [1 × 5]
## 10 2020-01-19 [9 × 5]
## 11 2020-01-21 [9 × 5]
## 12 2020-01-22 [27 × 5]
## 13 2020-01-23 [29 × 5]
绘制疫情地图
首先我们绘制一幅最新的(1月23日的)疫情地图:
# 国界线数据
boundary
stringsAsFactors = FALSE)
## Reading layer `线' from data source `/Users/czx/Desktop/新型冠状病毒肺炎实时疫情发展动态地图绘制/quanguo_Line.geojson' using driver `GeoJSON'
## Simple feature collection with 96 features and 2 fields
## geometry type: LINESTRING
## dimension: XY
## bbox: xmin: 73.67795 ymin: 3.553559 xmax: 135.2094 ymax: 53.64847
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
# 省界
prov_raw
stringsAsFactors = FALSE)
## Reading layer `quanguo' from data source `/Users/czx/Desktop/新型冠状病毒肺炎实时疫情发展动态地图绘制/quanguo.geojson' using driver `GeoJSON'
## Simple feature collection with 156 features and 4 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 73.67795 ymin: 3.984257 xmax: 135.2075 ymax: 53.64847
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
# 散点
point
stringsAsFactors = FALSE)
## Reading layer `点' from data source `/Users/czx/Desktop/新型冠状病毒肺炎实时疫情发展动态地图绘制/quanguo_Point.geojson' using driver `GeoJSON'
## Simple feature collection with 34 features and 4 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 87.74327 ymin: 20.17287 xmax: 126.6557 ymax: 45.93191
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
cnfont
library(ggnewscale)
# 合并数据
prov %
left_join(real_data$data[12][[1]]) %>%
mutate(confirm = case_when(is.na(confirm) ~ 0, T ~ confirm),
suspect = case_when(is.na(suspect) ~ 0, T ~ suspect),
dead = case_when(is.na(dead) ~ 0, T ~ dead))
ggplot() +
geom_sf(data = subset(boundary, NAME == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf(data = subset(prov, FillColor != ""),
aes(geometry = geometry,
fill = confirm), alpha = 0.33,
color = "gray", size = 0.2) +
scale_fill_gradient(name = "确诊",
high = '#de2d26', low = "#ffffff") +
new_scale_fill() +
geom_sf(data = subset(prov, FillColor != ""),
aes(geometry = geometry,
fill = suspect), alpha = 0.33,
color = "gray", size = 0.2) +
scale_fill_gradient(name = "疑似",
high = "#e6550d", low = "#ffffff") +
new_scale_fill() +
geom_sf(data = subset(prov, FillColor != ""),
aes(geometry = geometry,
fill = dead), alpha = 0.33,
color = "gray", size = 0.2) +
scale_fill_gradient(name = "死亡",
high = "#636363", low = "#ffffff") +
geom_sf(data = subset(prov, FillColor == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & !str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "black", size = 2, family = cnfont) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "#6AA7DF", size = 2.5,
family = cnfont) +
theme_ipsum(base_family = cnfont) +
coord_sf(crs = "+proj=laea +lat_0=23 +lon_0=113 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs") +
worldtilegrid::theme_enhance_wtg() +
labs(title = "新型冠状病毒肺炎实时疫情:2020年1月23日",
subtitle = hintWords,
caption = "数据来源:腾讯新闻\n")
由于湖北的数量远高于其它省份,所以还是分组,分成四组:
real_data_block
col_names = c('day', 'time', 'country', 'area',
'confirm', 'suspect', 'dead')) %>%
mutate(area = str_remove(area, "武汉")) %>%
dplyr::filter(!is.na(area)) %>%
select(-time, -country) %>%
mutate(day = paste0("2020.", day),
day = lubridate::ymd(day)) %>%
left_join(code) %>%
mutate(confirm = case_when(is.na(confirm) ~ 0, T ~ confirm),
suspect = case_when(is.na(suspect) ~ 0, T ~ suspect),
dead = case_when(is.na(dead) ~ 0, T ~ dead)) %>%
mutate(
confirm = cut(confirm,
breaks = c(0.0, 0.99, 10.0, 100.0, 10000),
labels = c("无", "1~10 人", "10~100人", "> 100人"),
include.lowest = TRUE),
suspect = cut(suspect,
breaks = c(0.0, 0.99, 10.0, 100.0, 10000),
labels = c("无", "1~10 人", "10~100人", "> 100人"),
include.lowest = TRUE),
dead = cut(dead,
breaks = c(0.0, 0.99, 10.0, 100.0, 10000),
labels = c("无", "1~10 人", "10~100人", "> 100人"),
include.lowest = TRUE)
) %>%
nest(data = -day)
# 合并数据
prov %
left_join(real_data_block$data[13][[1]]) %>%
mutate(confirm = as.character(confirm),
suspect = as.character(suspect),
dead = as.character(dead)) %>%
mutate(confirm = case_when(is.na(confirm) ~ "无", T ~ confirm),
suspect = case_when(is.na(suspect) ~ "无", T ~ suspect),
dead = case_when(is.na(dead) ~ "无", T ~ dead)) %>%
mutate(
confirm = factor(confirm,
levels = c("无", "1~10 人", "10~100人", "> 100人"),
labels = c("无", "1~10 人", "10~100人", "> 100人")),
suspect = factor(suspect,
levels = c("无", "1~10 人", "10~100人", "> 100人"),
labels = c("无", "1~10 人", "10~100人", "> 100人")),
dead = factor(dead,
levels = c("无", "1~10 人", "10~100人", "> 100人"),
labels = c("无", "1~10 人", "10~100人", "> 100人"))
)
# 分三幅地图绘制
# 确诊
p1
geom_sf(data = subset(boundary, NAME == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf(data = subset(prov, FillColor != ""),
aes(geometry = geometry,
fill = confirm),
color = "gray", size = 0.2) +
scale_fill_brewer(name = "确诊", palette = "Reds") +
geom_sf(data = subset(prov, FillColor == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & !str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "black", size = 2, family = cnfont) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "#6AA7DF", size = 2.5,
family = cnfont) +
theme_ipsum(base_family = cnfont) +
coord_sf(crs = "+proj=laea +lat_0=23 +lon_0=113 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs") +
worldtilegrid::theme_enhance_wtg() +
labs(title = paste0("新型冠状病毒肺炎实时疫情:", real_data_block$day[13]),
subtitle = hintWords,
caption = " ") +
theme(legend.position = c(0.2, 0.3))
# 疑似
p2
geom_sf(data = subset(boundary, NAME == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf(data = subset(prov, FillColor != ""),
aes(geometry = geometry,
fill = suspect),
color = "gray", size = 0.2) +
scale_fill_brewer(name = "疑似", palette = "Oranges") +
geom_sf(data = subset(prov, FillColor == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & !str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "black", size = 2, family = cnfont) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "#6AA7DF", size = 2.5,
family = cnfont) +
theme_ipsum(base_family = cnfont) +
coord_sf(crs = "+proj=laea +lat_0=23 +lon_0=113 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs") +
worldtilegrid::theme_enhance_wtg() +
labs(title = " ",
subtitle = " \n \n ",
caption = " ") +
theme(legend.position = c(0.2, 0.3))
# 死亡
p3
geom_sf(data = subset(boundary, NAME == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf(data = subset(prov, FillColor != ""),
aes(geometry = geometry,
fill = dead),
color = "gray", size = 0.2) +
scale_fill_brewer(name = "死亡", palette = "Greys") +
geom_sf(data = subset(prov, FillColor == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & !str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "black", size = 2, family = cnfont) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "#6AA7DF", size = 2.5,
family = cnfont) +
theme_ipsum(base_family = cnfont) +
coord_sf(crs = "+proj=laea +lat_0=23 +lon_0=113 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs") +
worldtilegrid::theme_enhance_wtg() +
labs(title = " ",
subtitle = " \n \n ",
caption = "数据来源:腾讯新闻\n | TidyFriday®") +
theme(legend.position = c(0.2, 0.3))
library(patchwork)
p
p
# library(cowplot)
# save_plot(filename = "2020-01-23.png",
# plot = p, base_width = 12, base_height = 6)
绘制疫情发展的动态地图
绘制动态图表最好的办法就是绘制一系列静态地图然后再合并成动图,所以我们先循环绘制 13 张地图然后再合并:
for(i in 1:13){
prov %
left_join(real_data_block$data[i][[1]]) %>%
mutate(confirm = as.character(confirm),
suspect = as.character(suspect),
dead = as.character(dead)) %>%
mutate(confirm = case_when(is.na(confirm) ~ "无", T ~ confirm),
suspect = case_when(is.na(suspect) ~ "无", T ~ suspect),
dead = case_when(is.na(dead) ~ "无", T ~ dead)) %>%
mutate(
confirm = factor(confirm,
levels = c("无", "1~10 人", "10~100人", "> 100人"),
labels = c("无", "1~10 人", "10~100人", "> 100人")),
suspect = factor(suspect,
levels = c("无", "1~10 人", "10~100人", "> 100人"),
labels = c("无", "1~10 人", "10~100人", "> 100人")),
dead = factor(dead,
levels = c("无", "1~10 人", "10~100人", "> 100人"),
labels = c("无", "1~10 人", "10~100人", "> 100人"))
)
# 分三幅地图绘制
# 确诊
p1
geom_sf(data = subset(boundary, NAME == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf(data = subset(prov, FillColor != ""),
aes(geometry = geometry,
fill = confirm),
color = "gray", size = 0.2) +
scale_fill_brewer(name = "确诊", palette = "Reds") +
geom_sf(data = subset(prov, FillColor == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & !str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "black", size = 2, family = cnfont) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "#6AA7DF", size = 2.5,
family = cnfont) +
theme_ipsum(base_family = cnfont) +
coord_sf(crs = "+proj=laea +lat_0=23 +lon_0=113 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs") +
worldtilegrid::theme_enhance_wtg() +
labs(title = paste0("新型冠状病毒肺炎实时疫情:", real_data_block$day[i]),
subtitle = hintWords,
caption = " ") +
theme(legend.position = c(0.2, 0.3))
# 疑似
p2
geom_sf(data = subset(boundary, NAME == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf(data = subset(prov, FillColor != ""),
aes(geometry = geometry,
fill = suspect),
color = "gray", size = 0.2) +
scale_fill_brewer(name = "疑似", palette = "Oranges") +
geom_sf(data = subset(prov, FillColor == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & !str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "black", size = 2, family = cnfont) +
geom_sf_text(data = subset(boundary, NAME != "" & str_length(QUHUADAIMA) != 7 & str_detect(QUHUADAIMA, "hai")),
aes(geometry = geometry, label = NAME),
color = "#6AA7DF", size = 2.5,
family = cnfont) +
theme_ipsum(base_family = cnfont) +
coord_sf(crs = "+proj=laea +lat_0=23 +lon_0=113 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs") +
worldtilegrid::theme_enhance_wtg() +
labs(title = " ",
subtitle = " \n \n ",
caption = " ") +
theme(legend.position = c(0.2, 0.3))
# 死亡
p3
geom_sf(data = subset(boundary, NAME == ""),
aes(geometry = geometry),
color = "gray", size = 0.5) +
geom_sf(data = subset(prov, FillColor != ""),
aes(geometry = geometry,
fill = dead),
color = "gray", size = 0.2) +