标签:ams 实战 矩阵 day 数据处理 ntile row 其他 nic
2.4 字符处理函数
> x <- c("ab", "cde", "fghij")
> length(x)
[1] 3
> nchar(x[3])
[1] 5
>
>
> x <- "abcdef"
> substr(x, 2, 4)
[1] "bcd"
> substr(x, 2, 4) <- "22222"
> x
[1] "a222ef"
>
>
> grep("A", c("b", "A", "c"), fixed=TRUE)
[1] 2
>
>
> sub("\\s", ".", "Hello There")
[1] "Hello.There"
>
>
> y <- strsplit("abc", "")
> y
[[1]]
[1] "a" "b" "c"
> unlist(y)[2]
[1] "b"
> sapply(y, "[", 2)
[1] "b"
>
>
> paste("x", 1:3, sep="")
[1] "x1" "x2" "x3"
> paste("x", 1:3, sep="M")
[1] "xM1" "xM2" "xM3"
> paste("Today is", date())
[1] "Today is Sun Sep 10 20:39:26 2017"
>
>
> toupper("abc")
[1] "ABC"
> tolower("ABC")
[1] "abc"
>
2.5 其他实用函数
> x <- c(2, 5, 6, 9)
> length(x)
[1] 4
>
>
> indices <- seq(1, 10, 2)
> indices
[1] 1 3 5 7 9
>
>
> y <- rep(1:3, 2)
> y
[1] 1 2 3 1 2 3
>
>
> z <- cut(y, 3)
> z
[1] (0.998,1.67] (1.67,2.33] (2.33,3] (0.998,1.67] (1.67,2.33]
[6] (2.33,3]
Levels: (0.998,1.67] (1.67,2.33] (2.33,3]
>
>
> u <- pretty(y, 3)
> u
[1] 1.0 1.5 2.0 2.5 3.0
>
>
> firstname <- c("Jane")
> cat("Hello", firstname, "\n")
Hello Jane
>
>
> name <- "Bob"
> cat("Hello", name, "\b.\n", "Isn\‘t R", "\t", "GREAT?\n")
Hello Bob.
Isn‘t R GREAT?
>
2.6 将函数应用于矩阵和数据框
> a <- 5
> sqrt(a)
[1] 2.24
> b <- c(1.243, 5.654, 2.99)
> round(b)
[1] 1 6 3
> c <- matrix(runif(12), nrow=3)
> c
[,1] [,2] [,3] [,4]
[1,] 0.9636 0.216 0.289 0.913
[2,] 0.2068 0.240 0.804 0.353
[3,] 0.0862 0.197 0.378 0.931
> log(c)
[,1] [,2] [,3] [,4]
[1,] -0.0371 -1.53 -1.241 -0.0912
[2,] -1.5762 -1.43 -0.218 -1.0402
[3,] -2.4511 -1.62 -0.972 -0.0710
> mean(c)
[1] 0.465
>
> mydata <- matrix(rnorm(30), nrow=6)
> mydata
[,1] [,2] [,3] [,4] [,5]
[1,] 0.459 1.203 1.234 0.591 -0.281
[2,] -1.261 0.769 -1.891 -0.435 0.812
[3,] -0.527 0.238 -0.223 -0.251 -0.208
[4,] -0.557 -1.415 0.768 -0.926 1.451
[5,] -0.374 2.934 0.388 1.087 0.841
[6,] -0.604 0.935 0.609 -1.944 -0.866
> apply(mydata, 1, mean)
[1] 0.641 -0.401 -0.194 -0.136 0.975 -0.374
> apply(mydata, 2, mean)
[1] -0.478 0.777 0.148 -0.313 0.292
> apply(mydata, 2, mean, trim=0.2)
[1] -0.516 0.786 0.386 -0.255 0.291
>
3. 数据处理难题的一套解决方案
> options(digits=2)
>
> Student <- c("John Davis", "Angela Williams", "Bullwinkle Moose",
+ "David Jones", "Janice Markhammer", "Cheryl Cushing",
+ "Reuven Ytzrhak", "Greg Knox", "Joel England",
+ "Mary Rayburn")
> Math <- c(502, 600, 412, 358, 495, 512, 410, 625, 573, 522)
> Science <- c(95, 99, 80, 82, 75, 85, 80, 95, 89, 86)
> English <- c(25, 22, 18, 15, 20, 28, 15, 30, 27, 18)
> roster <- data.frame(Student, Math, Science, English,
+ stringsAsFactors=FALSE)
> roster
Student Math Science English
1 John Davis 502 95 25
2 Angela Williams 600 99 22
3 Bullwinkle Moose 412 80 18
4 David Jones 358 82 15
5 Janice Markhammer 495 75 20
6 Cheryl Cushing 512 85 28
7 Reuven Ytzrhak 410 80 15
8 Greg Knox 625 95 30
9 Joel England 573 89 27
10 Mary Rayburn 522 86 18
> z <- scale(roster[, 2:4])
> z
Math Science English
[1,] 0.013 1.078 0.587
[2,] 1.143 1.591 0.037
[3,] -1.026 -0.847 -0.697
[4,] -1.649 -0.590 -1.247
[5,] -0.068 -1.489 -0.330
[6,] 0.128 -0.205 1.137
[7,] -1.049 -0.847 -1.247
[8,] 1.432 1.078 1.504
[9,] 0.832 0.308 0.954
[10,] 0.243 -0.077 -0.697
attr(,"scaled:center")
Math Science English
501 87 22
attr(,"scaled:scale")
Math Science English
86.7 7.8 5.5
> score <- apply(z, 1, mean)
> score
[1] 0.56 0.92 -0.86 -1.16 -0.63 0.35 -1.05 1.34 0.70 -0.18
> roster <- cbind(roster, score)
> roster
Student Math Science English score
1 John Davis 502 95 25 0.56
2 Angela Williams 600 99 22 0.92
3 Bullwinkle Moose 412 80 18 -0.86
4 David Jones 358 82 15 -1.16
5 Janice Markhammer 495 75 20 -0.63
6 Cheryl Cushing 512 85 28 0.35
7 Reuven Ytzrhak 410 80 15 -1.05
8 Greg Knox 625 95 30 1.34
9 Joel England 573 89 27 0.70
10 Mary Rayburn 522 86 18 -0.18
> y <- quantile(roster$score, c(.8, .6, .4, .2))
> y
80% 60% 40% 20%
0.74 0.44 -0.36 -0.89
> roster$grade[score >= y[1]] <- "A"
> roster$grade[score < y[1] & score >= y[2]] <- "B"
> roster$grade[score < y[2] & score >= y[3]] <- "C"
> roster$grade[score < y[3] & score >= y[4]] <- "D"
> roster$grade[score < y[4]] <- "F"
> roster
Student Math Science English score grade
1 John Davis 502 95 25 0.56 B
2 Angela Williams 600 99 22 0.92 A
3 Bullwinkle Moose 412 80 18 -0.86 D
4 David Jones 358 82 15 -1.16 F
5 Janice Markhammer 495 75 20 -0.63 D
6 Cheryl Cushing 512 85 28 0.35 C
7 Reuven Ytzrhak 410 80 15 -1.05 F
8 Greg Knox 625 95 30 1.34 A
9 Joel England 573 89 27 0.70 B
10 Mary Rayburn 522 86 18 -0.18 C
> name <- strsplit((roster$Student), " ")
> name
[[1]]
[1] "John" "Davis"
[[2]]
[1] "Angela" "Williams"
[[3]]
[1] "Bullwinkle" "Moose"
[[4]]
[1] "David" "Jones"
[[5]]
[1] "Janice" "Markhammer"
[[6]]
[1] "Cheryl" "Cushing"
[[7]]
[1] "Reuven" "Ytzrhak"
[[8]]
[1] "Greg" "Knox"
[[9]]
[1] "Joel" "England"
[[10]]
[1] "Mary" "Rayburn"
> Firstname <- sapply(name, "[", 1)
> Firstname
[1] "John" "Angela" "Bullwinkle" "David" "Janice"
[6] "Cheryl" "Reuven" "Greg" "Joel" "Mary"
> Lastname <- sapply(name, "[", 2)
> Lastname
[1] "Davis" "Williams" "Moose" "Jones" "Markhammer"
[6] "Cushing" "Ytzrhak" "Knox" "England" "Rayburn"
> roster <- cbind(Firstname, Lastname, roster[ , -1])
> roster
Firstname Lastname Math Science English score grade
1 John Davis 502 95 25 0.56 B
2 Angela Williams 600 99 22 0.92 A
3 Bullwinkle Moose 412 80 18 -0.86 D
4 David Jones 358 82 15 -1.16 F
5 Janice Markhammer 495 75 20 -0.63 D
6 Cheryl Cushing 512 85 28 0.35 C
7 Reuven Ytzrhak 410 80 15 -1.05 F
8 Greg Knox 625 95 30 1.34 A
9 Joel England 573 89 27 0.70 B
10 Mary Rayburn 522 86 18 -0.18 C
> roster[order(Lastname, Firstname), ]
Firstname Lastname Math Science English score grade
6 Cheryl Cushing 512 85 28 0.35 C
1 John Davis 502 95 25 0.56 B
9 Joel England 573 89 27 0.70 B
4 David Jones 358 82 15 -1.16 F
8 Greg Knox 625 95 30 1.34 A
5 Janice Markhammer 495 75 20 -0.63 D
3 Bullwinkle Moose 412 80 18 -0.86 D
10 Mary Rayburn 522 86 18 -0.18 C
2 Angela Williams 600 99 22 0.92 A
7 Reuven Ytzrhak 410 80 15 -1.05 F
>
quantile() http://blog.csdn.net/u012543538/article/details/17025789
scale() http://blog.sina.com.cn/s/blog_b623d3f40102v2zg.html
标签:ams 实战 矩阵 day 数据处理 ntile row 其他 nic
原文地址:http://www.cnblogs.com/wnzhong/p/7502571.html