这篇博文主要讲解如何对R包关系网挖掘,其实大家在装包的时候会注意到一些包需要好几个依赖包,而这些依赖包又有各自的依赖包,这是一个复杂的关系网。
1、数据准备
library(tidyverse)
cran ##除去重名的列
cran ##变为数据表的形式
cran
2、R包下载前10
##获取过去一个月R包下载前10
library(cranlogs)
rptop10 ##绘制条形图
ggplot(rptop10,aes(x=reorder(package,count),y=count))+
geom_bar(stat = 'identity',fill='steelblue')+
labs(x='',y='下载量',title='过去一个月R包下载top10',
subtitle=R.version.string)+
coord_flip()
3.1、ggplot2关系网
#绘制关系网络图
library(networkD3)
library(stringr)
netop10=cran[cran$Package %in% rptop10$package,]
exfun = function(x){
x=as.character(x)
pimport=unlist(str_extract_all(str_trim(unlist(str_split(
cran$Imports[cran$Package==x],','))),
'^\\w{1,}'))
psuggest=unlist(str_extract_all(str_trim(unlist(str_split(
cran$Suggests[cran$Package==x],','))),
'^\\w{1,}'))
return(c(pimport,psuggest))
}
#ggplot2依赖包关系网
test = exfun('ggplot2')
NetworkData simpleNetwork(NetworkData, fontSize = 10,linkDistance = 120,
nodeColour='red', opacity = 1,zoom = T)
3.2、ggplot2二级关系网
test2= sapply(exfun('ggplot2'),exfun)
ss=NULL
for (i in 1:length(test2)){
cc = test2[[i]][which(test2[[i]] != 'NA')]
ss=c(ss,c(rep(names(test2)[i],length(cc))))
}
tt=as.character(na.omit((unlist(test2))))
NetworkData1 Target=c(test,tt))
simpleNetwork(NetworkData1, fontSize = 10,charge = -50,
nodeColour='blue', opacity = 1,zoom = F)
4、下载量前100关系网
rptop100 netop100=cran[cran$Package %in% rptop100$package,]
topp=rptop100$package
test4= sapply(topp,exfun)
ss=NULL
for (i in 1:length(test4)){
cc = test4[[i]][which(test4[[i]] != 'NA')]
ss=c(ss,c(rep(names(test4)[i],length(cc))))
}
tt=as.character(na.omit((unlist(test4))))
NetworkData3 karate wc members karate_d3 forceNetwork(Links = karate_d3$links, Nodes = karate_d3$nodes,
Source = 'source', Target = 'target', NodeID = 'name',
Group = 'group', opacity = 1,linkDistance = 30,
fontSize = 10)
欢迎关注魔方学院QQ群