正文
R基本语法
获取帮助文档,查看命令或函数的使用方法、事例或适用范围
>>> ?command
>>> ??command
>>> example(command)
R中的变量
>
> a <- 10
> a
[1] 10
>
>
> a <- "abc"
> a
[1] "abc"
>
>
> a <- TRUE
>
> a
[1] TRUE
>
> b <- T
>
> b
[1] TRUE
>
> d <- FALSE
>
> d
[1] FALSE
>
>
> a <- vector(mode="logical", length=5)
> a
[1] FALSE FALSE FALSE FALSE FALSE
>
> a <- c(1,2,3,4)
> is.vector(a)
[1] TRUE
>
>
>
> a <- matrix(1:20,nrow=5,ncol=4,byrow=T)
> a
[,1] [,2] [,3] [,4]
[1,] 1 2 3 4
[2,] 5 6 7 8
[3,] 9 10 11 12
[4,] 13 14 15 16
[5,] 17 18 19 20
>
> is.matrix(a)
[1] TRUE
>
> dim(a)
[1] 5 4
>
>
> dim(a) <- c(4,4)
Error in dim(a) <- c(4, 4) : dims [product 16]与对象长度[20]不匹配
>
>
> a <- 1:20
> dim(a) <- c(5,4)
> a
[,1] [,2] [,3] [,4]
[1,] 1 6 11 16
[2,] 2 7 12 17
[3,] 3 8 13 18
[4,] 4 9 14 19
[5,] 5 10 15 20
>
> print(paste("矩阵a的行数", nrow(a)))
[1] "矩阵a的行数 5"
> print(paste("矩阵a的列数", ncol(a)))
[1] "矩阵a的列数 4"
>
>
> rownames(a)
NULL
> rownames(a) <- c('a','b','c','d','e')
> a
[,1] [,2] [,3] [,4]
a 1 6 11 16
b 2 7 12 17
c 3 8 13 18
d 4 9 14 19
e 5 10 15 20
> letters[1:4]
[1] "a" "b" "c" "d"
> colnames(a) <- letters[1:4]
> a
a b c d
a 1 6 11 16
b 2 7 12 17
c 3 8 13 18
d 4 9 14 19
e 5 10 15 20
>
> is.character(a)
[1] FALSE
> is.numeric(a)
[1] TRUE
> is.matrix(a)
[1] TRUE
> is.data.frame(a)
[1] FALSE
> is.data.frame(as.data.frame(a))
[1] TRUE
R中矩阵运算
> a <- c(rnorm(5), rnorm(5,1), runif(5), runif(5,-1,1), 1:5, rep(0,5), c(2,10,11,13,4), scale(1:5)[1:5])
> a
[1] -0.41253556 0.12192929 -0.47635888 -0.97171653 1.09162243 1.87789657
[7] -0.11717937 2.92953522 1.33836620 -0.03269026 0.87540920 0.13005744
[13] 0.11900686 0.76663940 0.28407356 -0.91251181 0.17997973 0.50452258
[19] 0.25961316 -0.58052230 1.00000000 2.00000000 3.00000000 4.00000000
[25] 5.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
[31] 2.00000000 10.00000000 11.00000000 13.00000000 4.00000000 -1.26491106
[37] -0.63245553 0.00000000 0.63245553 1.26491106
> a <- matrix(a, ncol=5, byrow=T)
> a
[,1] [,2] [,3] [,4] [,5]
[1,] -0.4125356 0.1219293 -0.4763589 -0.9717165 1.09162243
[2,] 1.8778966 -0.1171794 2.9295352 1.3383662 -0.03269026
[3,] 0.8754092 0.1300574 0.1190069 0.7666394 0.28407356
[4,] -0.9125118 0.1799797 0.5045226 0.2596132 -0.58052230
[5,] 1.0000000 2.0000000 3.0000000 4.0000000 5.00000000
[6,] 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000
[7,] 2.0000000 10.0000000 11.0000000 13.0000000 4.00000000
[8,] -1.2649111 -0.6324555 0.0000000 0.6324555 1.26491106
> rowSums(a)
[1] -0.6470593 5.9959284 2.1751865 -0.5489186 15.0000000 0.0000000 40.0000000
[8] 0.0000000
> a <- a[rowSums(abs(a)!=0,]
错误: 意外的']' in "a <- a[rowSums(abs(a)!=0,]"
> a <- a[rowSums(abs(a))!=0,]
>
R中矩阵筛选合并
> sampleInfo = "Samp;Group;Genotype
+ A;Control;WT
+ B;Control;WT
+ D;Treatment;Mutant
+ C;Treatment;Mutant
+ E;Treatment;WT
+ F;Treatment;WT"
> phenoData = read.table(text=sampleInfo,sep=";", header=T, row.names=1, quote="")
> phenoData
Group Genotype
A Control WT
B Control WT
D Treatment Mutant
C Treatment Mutant
E Treatment WT
F Treatment WT
its first argument in its second.
> phenoData[match(rownames(expr), rownames(phenoData)),]
Group Genotype
A Control WT
B Control WT
C Treatment Mutant
D Treatment Mutant
E Treatment WT
returns a logical vector indicating if there is a match or not for
its left operand.
> phenoData = phenoData[rownames(phenoData) %in% rownames(expr),]
> phenoData
Group Genotype
A Control WT
B Control WT
C Treatment Mutant
D Treatment Mutant
E Treatment WT
> merge_data = merge(expr, phenoData, by=0, all.x=T)
> merge_data
Row.names Gene_a Gene_b Gene_c Gene_d Gene_e Group Genotype
1 A 2 1.87789657 1 -1.2649111 -0.4125356 Control WT
2 B 10 -0.11717937 2 -0.6324555 0.1219293 Control WT
3 C 11 2.92953522 3 0.0000000 -0.4763589 Treatment Mutant
4 D 13 1.33836620 4 0.6324555 -0.9717165 Treatment Mutant
5 E 4 -0.03269026 5 1.2649111 1.0916224 Treatment WT
> rownames(merge_data) <- merge_data$Row.names
> merge_data
Row.names Gene_a Gene_b Gene_c Gene_d Gene_e Group Genotype
A A 2 1.87789657 1 -1.2649111 -0.4125356 Control WT
B B 10 -0.11717937 2 -0.6324555 0.1219293 Control WT
C C 11 2.92953522 3 0.0000000 -0.4763589 Treatment Mutant
D D 13 1.33836620 4 0.6324555 -0.9717165 Treatment Mutant
E E 4 -0.03269026 5 1.2649111 1.0916224 Treatment WT
> merge_data = merge_data[,-1]
> merge_data
Gene_a Gene_b Gene_c Gene_d Gene_e Group Genotype
A 2 1.87789657 1 -1.2649111 -0.4125356 Control WT
B 10 -0.11717937 2 -0.6324555 0.1219293 Control WT
C 11 2.92953522 3 0.0000000 -0.4763589 Treatment Mutant
D 13 1.33836620 4 0.6324555 -0.9717165 Treatment Mutant
E 4 -0.03269026 5 1.2649111 1.0916224 Treatment WT
> merge_data[sapply(merge_data, is.numeric)]
Gene_a Gene_b Gene_c Gene_d Gene_e
A 2 1.87789657 1 -1.2649111 -0.4125356
B 10 -0.11717937 2 -0.6324555 0.1219293
C 11 2.92953522 3 0.0000000 -0.4763589
D 13 1.33836620 4 0.6324555 -0.9717165
E 4 -0.03269026 5 1.2649111 1.0916224
str
的应用
str
: Compactly display the internal structure of an R object, a
diagnostic function and an alternative to ‘summary (and to some
extent, ‘dput’). Ideally, only one line for each ‘basic’
structure is displayed. It is especially well suited to compactly
display the (abbreviated) contents of (possibly nested) lists.
The idea is to give reasonable output for any R object. It
calls ‘args’ for (non-primitive) function objects.
str
用来告诉结果的构成方式,对于不少Bioconductor的包,或者复杂的R函数的输出,都是一堆列表的嵌套,str(complex_result)
会输出每个列表的名字,方便提取对应的信息。
> str(list(a = "A", L = as.list(1:100)), list.len = 9)
List of 2
$ a: chr "A"
$ L:List of 100
..$ : int 1
..$ : int 2
..$ : int 3
..$ : int 4
..$ : int 5
..$ : int 6
..$ : int 7
..$ : int 8
..$ : int 9
.. [list output truncated]
> pca_result <- prcomp(expr)
> pca_result
Standard deviations:
[1] 4.769900e+00 1.790861e+00 1.072560e+00 1.578391e-01 2.752128e-16
Rotation:
PC1 PC2 PC3 PC4 PC5
Gene_a 0.99422750 -0.02965529 0.078809521 0.01444655 0.06490461
Gene_b 0.04824368 -0.44384942 -0.885305329 0.03127940 0.12619948
Gene_c 0.08258192 0.81118590 -0.451360828 0.05440417 -0.35842886
Gene_d -0.01936958 0.30237826 -0.079325524 -0.66399283 0.67897952
Gene_e -0.04460135 0.22948437 -0.002097256 0.74496081 0.62480128
> str(pca_result)
List of 5
$ sdev : num [1:5] 4.77 1.79 1.07 1.58e-01 2.75e-16
$ rotation: num [1:5, 1:5] 0.9942 0.0482 0.0826 -0.0194 -0.0446 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:5] "Gene_a" "Gene_b" "Gene_c" "Gene_d" ...
.. ..$ : chr [1:5] "PC1" "PC2" "PC3" "PC4" ...
$ center : Named num [1:5] 8 1.229 3 0.379 0.243
..- attr(*, "names")= chr [1:5] "Gene_a" "Gene_b" "Gene_c" "Gene_d" ...
$ scale : logi FALSE
$ x : num [1:5, 1:5] -6.08 1.86 3.08 5.06 -3.93 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:5] "A" "B" "C" "D" ...
.. ..$ : chr [1:5] "PC1" "PC2" "PC3" "PC4" ...
- attr(*, "class")= chr "prcomp"
> pca_result$sdev
[1] 4.769900e+00 1.790861e+00 1.072560e+00 1.578391e-01 2.752128e-16
R的包管理
> library('unExistedPackage')
Error in library("unExistedPackage") :
不存在叫‘unExistedPackage’这个名字的程辑包
> install.packages("package_name")
> install.packages("package_name", repo="http://cran.us.r-project.org")
> source('https://bioconductor.org/biocLite.R')
> biocLite('BiocInstaller')
> biocLite(c("RUVSeq","pcaMethods"))
> install.packages("devtools")
> devtools::install_github("JustinaZ/pcaReduce")
ct@ehbio:~$ R CMD INSTALL package.tar.gz
>remove.packages("package_name")
>library()
> installed.packages()[c("DESeq2"), c("Package", "Version")]
Package Version
"DESeq2" "1.14.1"
>
>.libPaths()
>library(package_name)