1 Build-in Constants
> LETTERS
[1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U" "V"
[23] "W" "X" "Y" "Z"
> letters
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v"
[23] "w" "x" "y" "z"
> month.abb
[1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
> month.name
[1] "January" "February" "March" "April" "May" "June" "July"
[8] "August" "September" "October" "November" "December"
> pi
[1] 3.141593
2 Object creation
> sz <- 26
> x <- 4
> # length 1 vector
> t <- 'c'
> a <- letters[ceiling(runif(sz, 0.00001, 26))]; a
[1] "m" "d" "v" "i" "a" "a" "t" "a" "i" "n" "y" "y" "x" "j" "h" "m" "j" "i" "i" "h" "a" "w"
[23] "t" "s" "g" "o"
> i <- 1:sz
> j <- i + rnorm(sz, 0, 2);j
[1] 0.725392 3.795510 4.998845 3.118527 5.323526 7.283787 7.834817 9.087679
[9] 8.830070 9.439148 12.960340 10.442182 12.580495 18.454917 17.299115 18.321839
[17] 15.656129 19.342379 20.866082 16.008389 19.242640 22.343947 25.057231 26.150223
[25] 27.198469 28.704727
> names(a) <- LETTERS[1:sz];a
A B C D E F G H I J K L M N O P Q R S T U V W
"m" "d" "v" "i" "a" "a" "t" "a" "i" "n" "y" "y" "x" "j" "h" "m" "j" "i" "i" "h" "a" "w" "t"
X Y Z
"s" "g" "o"
> #complex number
> l <- exp((0+1i)*pi) + (1+0i);l
[1] 0+1.224647e-16i
> d <- as.Date('2010-01-01') + seq(1,sz);d
[1] "2010-01-02" "2010-01-03" "2010-01-04" "2010-01-05" "2010-01-06" "2010-01-07"
[7] "2010-01-08" "2010-01-09" "2010-01-10" "2010-01-11" "2010-01-12" "2010-01-13"
[13] "2010-01-14" "2010-01-15" "2010-01-16" "2010-01-17" "2010-01-18" "2010-01-19"
[19] "2010-01-20" "2010-01-21" "2010-01-22" "2010-01-23" "2010-01-24" "2010-01-25"
[25] "2010-01-26" "2010-01-27"
> f <- factor(rep(1:x, sz/x), levels=x:1); f
[1] 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4
Levels: 4 3 2 1
> df <- data.frame(a=a, f=f, i=i, j=j); df
Error in data.frame(a = a, f = f, i = i, j = j) :
arguments imply differing number of rows: 26, 24
> m <- matrix(rnorm(x^2), nrow=x, ncol=x); m
[,1] [,2] [,3] [,4]
[1,] 1.1105554 0.6154485 0.6311158 -2.13814813
[2,] 0.9578517 0.8107677 -1.2327885 -0.65443181
[3,] -0.6565348 1.8245487 -1.4426480 -0.45362514
[4,] 0.2563039 1.2790322 1.2530851 0.04705056
> l <- list(1:10,LETTERS); l
[[1]]
[1] 1 2 3 4 5 6 7 8 9 10
[[2]]
[1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U" "V"
[23] "W" "X" "Y" "Z"
3 Object Inspection
> names(x)
NULL
> dimnames(x)
NULL
> colnames(x)
NULL
> rownames(x)
NULL
> dim(x)
NULL
> nrow(x)
NULL
> ncol(x)
NULL
> is.list(x)
[1] FALSE
> is.factor(x)
[1] FALSE
> is.complex(x)
[1] FALSE
> is.character(x)
[1] FALSE
> is.matrix(x)
[1] FALSE
> is.numeric(x)
[1] TRUE
> is.integer(x)
[1] FALSE
> is.vector(x)
[1] TRUE
> is.data.frame(x)
[1] FALSE
> is.ordered(x)
[1] FALSE
4 Utility Function
> assign('variablename', 5)
> c(1,2)
[1] 1 2
> rep(NA,10)
[1] NA NA NA NA NA NA NA NA NA NA
> append(l, list(c(1,2,3)))
[[1]]
[1] 0+1.224647e-16i
[[2]]
[1] 1 2 3
> seq(15,100,5)
[1] 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100
>
> a=rnorm(10)
> sort(a)
[1] -1.49263272 -1.34121858 -1.05170988 -0.92195631 -0.61930516 -0.29196316 0.01090951
[8] 0.68823271 1.09200068 1.13104951
> order(a)
[1] 8 3 6 5 9 1 7 4 10 2
> rank(a)
[1] 6 10 2 8 4 3 7 1 5 9
> rev(a)
[1] 1.09200068 -0.61930516 -1.49263272 0.01090951 -1.05170988 -0.92195631 0.68823271
[8] -1.34121858 1.13104951 -0.29196316
>
> i=3
> any(i %in% c(1,3,5));
[1] TRUE
> all(i %in% c(1,3,5));
[1] TRUE
> which(i %in% c(1,3,5));
[1] 1
> match('c', a)
[1] NA
> df <- transform(df, k=j+1)[1:10,]
> df
a f i j k
1 r 1 1 2.2447932 3.2447932
2 v 2 2 -1.5417236 -0.5417236
3 s 3 3 -0.3591784 0.6408216
4 c 4 4 5.4814561 6.4814561
5 f 1 5 7.1022692 8.1022692
6 i 2 6 5.3284810 6.3284810
7 p 3 7 5.0873649 6.0873649
8 n 4 8 8.5368351 9.5368351
9 v 1 9 11.9734547 12.9734547
10 r 2 10 10.0198377 11.0198377
> df <- within(df, s<- i/j)
> df
a f i j k s
1 r 1 1 2.2447932 3.2447932 0.4454753
2 v 2 2 -1.5417236 -0.5417236 -1.2972494
3 s 3 3 -0.3591784 0.6408216 -8.3523948
4 c 4 4 5.4814561 6.4814561 0.7297331
5 f 1 5 7.1022692 8.1022692 0.7040003
6 i 2 6 5.3284810 6.3284810 1.1260245
7 p 3 7 5.0873649 6.0873649 1.3759579
8 n 4 8 8.5368351 9.5368351 0.9371154
9 v 1 9 11.9734547 12.9734547 0.7516628
10 r 2 10 10.0198377 11.0198377 0.9980202
> x <- with(df, j+5)
> x
[1] 7.244793 3.458276 4.640822 10.481456 12.102269 10.328481 10.087365 13.536835
[9] 16.973455 15.019838
> z <- rep(1,length(x))
> df <- cbind(df, z)
> df
a f i j k s z
1 r 1 1 2.2447932 3.2447932 0.4454753 1
2 v 2 2 -1.5417236 -0.5417236 -1.2972494 1
3 s 3 3 -0.3591784 0.6408216 -8.3523948 1
4 c 4 4 5.4814561 6.4814561 0.7297331 1
5 f 1 5 7.1022692 8.1022692 0.7040003 1
6 i 2 6 5.3284810 6.3284810 1.1260245 1
7 p 3 7 5.0873649 6.0873649 1.3759579 1
8 n 4 8 8.5368351 9.5368351 0.9371154 1
9 v 1 9 11.9734547 12.9734547 0.7516628 1
10 r 2 10 10.0198377 11.0198377 0.9980202 1
> row.df <- head(df, 1)
> rbind(df, row.df)
a f i j k s z
1 r 1 1 2.2447932 3.2447932 0.4454753 1
2 v 2 2 -1.5417236 -0.5417236 -1.2972494 1
3 s 3 3 -0.3591784 0.6408216 -8.3523948 1
4 c 4 4 5.4814561 6.4814561 0.7297331 1
5 f 1 5 7.1022692 8.1022692 0.7040003 1
6 i 2 6 5.3284810 6.3284810 1.1260245 1
7 p 3 7 5.0873649 6.0873649 1.3759579 1
8 n 4 8 8.5368351 9.5368351 0.9371154 1
9 v 1 9 11.9734547 12.9734547 0.7516628 1
10 r 2 10 10.0198377 11.0198377 0.9980202 1
11 r 1 1 2.2447932 3.2447932 0.4454753 1
> df$f <- reorder(df$f, df$j, mean)
> df
a f i j k s z
1 r 1 1 2.2447932 3.2447932 0.4454753 1
2 v 2 2 -1.5417236 -0.5417236 -1.2972494 1
3 s 3 3 -0.3591784 0.6408216 -8.3523948 1
4 c 4 4 5.4814561 6.4814561 0.7297331 1
5 f 1 5 7.1022692 8.1022692 0.7040003 1
6 i 2 6 5.3284810 6.3284810 1.1260245 1
7 p 3 7 5.0873649 6.0873649 1.3759579 1
8 n 4 8 8.5368351 9.5368351 0.9371154 1
9 v 1 9 11.9734547 12.9734547 0.7516628 1
10 r 2 10 10.0198377 11.0198377 0.9980202 1
5 Math Function
> is.na(i)
[1] FALSE
> is.nan(j)
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[15] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
> is.null(d)
[1] FALSE
> is.finite(j)
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[18] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
>
> abs(-1)
[1] 1
> sqrt(10)
[1] 3.162278
> log(1)
[1] 0
> log10(10)
[1] 1
> exp(1)
[1] 2.718282
> ceiling(5.3)
[1] 6
> floor(2.3)
[1] 2
> round(3.22,digits=1)
[1] 3.2
> trunc(4.5)
[1] 4
> sin(pi/2)
[1] 1
> cos(pi)
[1] -1
> tan(pi/2)
[1] 1.633124e+16
> asin(0.5)
[1] 0.5235988
> acos(0.5)
[1] 1.047198
> atan(1)
[1] 0.7853982
> sum(1:10)
[1] 55
> prod(1:10)
[1] 3628800
> cumsum(1:10)
[1] 1 3 6 10 15 21 28 36 45 55
> cumprod(1:10)
[1] 1 2 6 24 120 720 5040 40320 362880 3628800
6 Stat Function
> length(0:10)
[1] 11
> sum(0:10)
[1] 55
> min(0:10)
[1] 0
> max(0:10)
[1] 10
> range(0:10)
[1] 0 10
> cut(0:10,5)
[1] (-0.01,2] (-0.01,2] (-0.01,2] (2,4] (2,4] (4,6] (4,6] (6,8]
[9] (6,8] (8,10] (8,10]
Levels: (-0.01,2] (2,4] (4,6] (6,8] (8,10]
>
> mean(0:10)
[1] 5
> median(0:10)
[1] 5
> sd(0:10)
[1] 3.316625
> var(0:10)
[1] 11
> cov(0:10,1:11)
[1] 11
> cor(0:10,1:11)
[1] 1
> diff(1:10, lag=1, diff=1)
[1] 1 1 1 1 1 1 1 1 1
> rnorm(n=10, mean=0, sd=1)
[1] 0.50660408 0.34007608 -0.07856213 0.87086844 0.68152240 0.80075731 -0.57385601
[8] 0.99361746 1.18958557 -1.87070403
> runif(n=10, min=1, max=100)
[1] 46.977626 55.666696 69.418533 8.874474 15.755576 52.141463 57.587816 85.232902
[9] 69.724438 3.472987
>
> r <- lm(j ~ i, data=as.data.frame(df));
> summary(r)
Call:
lm(formula = j ~ i, data = as.data.frame(df))
Residuals:
Min 1Q Median 3Q Max
-3.3564 -1.7633 -0.2411 2.3067 3.2870
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.74123 0.44095 -3.949 0.000145 ***
i 1.08017 0.02855 37.831 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.184 on 102 degrees of freedom
Multiple R-squared: 0.9335, Adjusted R-squared: 0.9328
F-statistic: 1431 on 1 and 102 DF, p-value: < 2.2e-16
> anova(r)
Analysis of Variance Table
Response: j
Df Sum Sq Mean Sq F value Pr(>F)
i 1 6825.6 6825.6 1431.2 < 2.2e-16 ***
Residuals 102 486.5 4.8
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> residuals(r)
1 2 3 4 5 6 7 8
3.2870286 0.2328002 -3.2515109 -0.7149405 -2.0039734 1.8895235 1.0512190 -3.3564285
9 10 11 12 13 14 15 16
3.1063883 -1.2399755 -1.6117162 -1.0684410 2.3715878 2.3066557 1.3768941 -3.2480162
17 18 19 20 21 22 23 24
-2.9309619 2.7913712 0.7080538 2.8996443 -0.9851891 2.3818433 -1.7632818 -1.9259796
25 26 27 28 29 30 31 32
-1.2913915 0.9887964 3.2870286 0.2328002 -3.2515109 -0.7149405 -2.0039734 1.8895235
33 34 35 36 37 38 39 40
1.0512190 -3.3564285 3.1063883 -1.2399755 -1.6117162 -1.0684410 2.3715878 2.3066557
41 42 43 44 45 46 47 48
1.3768941 -3.2480162 -2.9309619 2.7913712 0.7080538 2.8996443 -0.9851891 2.3818433
49 50 51 52 53 54 55 56
-1.7632818 -1.9259796 -1.2913915 0.9887964 3.2870286 0.2328002 -3.2515109 -0.7149405
57 58 59 60 61 62 63 64
-2.0039734 1.8895235 1.0512190 -3.3564285 3.1063883 -1.2399755 -1.6117162 -1.0684410
65 66 67 68 69 70 71 72
2.3715878 2.3066557 1.3768941 -3.2480162 -2.9309619 2.7913712 0.7080538 2.8996443
73 74 75 76 77 78 79 80
-0.9851891 2.3818433 -1.7632818 -1.9259796 -1.2913915 0.9887964 3.2870286 0.2328002
81 82 83 84 85 86 87 88
-3.2515109 -0.7149405 -2.0039734 1.8895235 1.0512190 -3.3564285 3.1063883 -1.2399755
89 90 91 92 93 94 95 96
-1.6117162 -1.0684410 2.3715878 2.3066557 1.3768941 -3.2480162 -2.9309619 2.7913712
97 98 99 100 101 102 103 104
0.7080538 2.8996443 -0.9851891 2.3818433 -1.7632818 -1.9259796 -1.2913915 0.9887964
> coef(r)
(Intercept) i
-1.741227 1.080172
> plot(r)
Hit <Return> to see next plot: plot(r$fitted.values)
Hit <Return> to see next plot: plot(r$residuals)
Hit <Return> to see next plot:
# Also, glm() gam() lme() lmer() nls()
7 Character Function
> as.character(1)
[1] "1"
> toString(10)
[1] "10"
> nchar('qwe')
[1] 3
> B<-toupper('bbb')
> b<-tolower('BBB')
> s <- 'the cow jumped over the moon'
> sub('the', 'a', s)
[1] "a cow jumped over the moon"
> gsub('the', 'a', s)
[1] "a cow jumped over a moon"
> substr(s,5,7)
[1] "cow"
> substr(s,5,7) <- "dog"
> s
[1] "the dog jumped over the moon"
> substr(s,5,7) <- "monkey"
> s
[1] "the mon jumped over the moon"
> paste('a', 'b', 'c', sep=";")
[1] "a;b;c"
> strsplit(s, ' ')
[[1]]
[1] "the" "mon" "jumped" "over" "the" "moon"
> grep('the', s)
[1] 1
> make.unique(a)
[1] "q" "a" "e" "b" "s" "x" "k" "u" "n" "w" "b.1" "k.1" "e.1" "b.2"
[15] "a.1" "u.1" "r" "y" "e.2" "v" "h" "m" "n.1" "y.1" "x.1" "g"
> format(j, digits=2)
[1] " 2.63" " 0.65" "-1.75" " 1.86" " 1.66" " 6.63" " 6.87" " 3.54" "11.09" " 7.82" " 8.53"
[12] "10.15" "14.67" "15.69" "15.84" "12.29" "13.69" "20.49" "19.49" "22.76" "19.96" "24.40"
[23] "21.34" "22.26" "23.97" "27.33"
> sprintf("%d: %s", i, a)
[1] "1: q" "2: a" "3: e" "4: b" "5: s" "6: x" "7: k" "8: u" "9: n" "10: w" "11: b"
[12] "12: k" "13: e" "14: b" "15: a" "16: u" "17: r" "18: y" "19: e" "20: v" "21: h" "22: m"
[23] "23: n" "24: y" "25: x" "26: g"
> format(d, format="%A %Y-%b-%d")
[1] "Saturday 2010-Jan-02" "Sunday 2010-Jan-03" "Monday 2010-Jan-04"
[4] "Tuesday 2010-Jan-05" "Wednesday 2010-Jan-06" "Thursday 2010-Jan-07"
[7] "Friday 2010-Jan-08" "Saturday 2010-Jan-09" "Sunday 2010-Jan-10"
[10] "Monday 2010-Jan-11" "Tuesday 2010-Jan-12" "Wednesday 2010-Jan-13"
[13] "Thursday 2010-Jan-14" "Friday 2010-Jan-15" "Saturday 2010-Jan-16"
[16] "Sunday 2010-Jan-17" "Monday 2010-Jan-18" "Tuesday 2010-Jan-19"
[19] "Wednesday 2010-Jan-20" "Thursday 2010-Jan-21" "Friday 2010-Jan-22"
[22] "Saturday 2010-Jan-23" "Sunday 2010-Jan-24" "Monday 2010-Jan-25"
[25] "Tuesday 2010-Jan-26" "Wednesday 2010-Jan-27"
8 Dates
> x <- as.Date('03-06-1920', format='%d-%m-%Y')
> Sys.Date()
[1] "2015-10-29"
> days.apart <- d-x
> weekdays(d)
[1] "Wednesday" "Thursday" "Friday" "Saturday" "Sunday" "Monday" "Tuesday"
[8] "Wednesday" "Thursday" "Friday" "Saturday" "Sunday" "Monday" "Tuesday"
[15] "Wednesday" "Thursday" "Friday" "Saturday" "Sunday" "Monday" "Tuesday"
[22] "Wednesday" "Thursday" "Friday" "Saturday" "Sunday"
> months(d)
[1] "January" "January" "January" "January" "January" "January" "January" "January"
[9] "January" "January" "January" "January" "January" "January" "January" "January"
[17] "January" "January" "January" "January" "January" "January" "January" "January"
[25] "January" "January"
> dp <- as.POSIXlt(d);
> dp
[1] "2008-01-02 UTC" "2008-01-03 UTC" "2008-01-04 UTC" "2008-01-05 UTC" "2008-01-06 UTC"
[6] "2008-01-07 UTC" "2008-01-08 UTC" "2008-01-09 UTC" "2008-01-10 UTC" "2008-01-11 UTC"
[11] "2008-01-12 UTC" "2008-01-13 UTC" "2008-01-14 UTC" "2008-01-15 UTC" "2008-01-16 UTC"
[16] "2008-01-17 UTC" "2008-01-18 UTC" "2008-01-19 UTC" "2008-01-20 UTC" "2008-01-21 UTC"
[21] "2008-01-22 UTC" "2008-01-23 UTC" "2008-01-24 UTC" "2008-01-25 UTC" "2008-01-26 UTC"
[26] "2008-01-27 UTC"
> dp$year <- dp$year -1
> d <- as.Date(dp)
> names(unclass(dp))
[1] "sec" "min" "hour" "mday" "mon" "year" "wday" "yday" "isdst"
> Sys.time()
[1] "2015-10-29 01:24:33 EDT"
> date()
[1] "Thu Oct 29 01:24:33 2015"
# Really useful: zoo and lubridate packages
9 I/O and the file system
> cat(i)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
> i
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
> print(j)
[1] 2.6259729 0.6519162 -1.7522234 1.8645187 1.6556576 6.6293262 6.8711933 3.5437175
[9] 11.0867059 7.8205138 8.5289449 10.1523917 14.6725922 15.6878317 15.8382418 12.2935032
[17] 13.6907292 20.4932340 19.4900883 22.7618505 19.9571887 24.4043928 21.3394394 22.2569133
[25] 23.9716730 27.3320326
> getwd()
[1] "/Users/lingduoduo"
> list.files()
[1] "Applications" "Desktop" "Documents" "Downloads"
[5] "Downloads.rar.dmg" "Eclipse" "Library" "Movies"
[9] "Music" "Pictures" "Public"
> list.dirs()
> dir()
[1] "Applications" "Desktop" "Documents" "Downloads"
[5] "Downloads.rar.dmg" "Eclipse" "Library" "Movies"
[9] "Music" "Pictures" "Public"
> Sys.glob()
4 Utility Function
> assign('variablename', 5)
> c(1,2)
[1] 1 2
> rep(NA,10)
[1] NA NA NA NA NA NA NA NA NA NA
> append(l, list(c(1,2,3)))
[[1]]
[1] 0+1.224647e-16i
[[2]]
[1] 1 2 3
> seq(15,100,5)
[1] 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100
>
> a=rnorm(10)
> sort(a)
[1] -1.49263272 -1.34121858 -1.05170988 -0.92195631 -0.61930516 -0.29196316 0.01090951
[8] 0.68823271 1.09200068 1.13104951
> order(a)
[1] 8 3 6 5 9 1 7 4 10 2
> rank(a)
[1] 6 10 2 8 4 3 7 1 5 9
> rev(a)
[1] 1.09200068 -0.61930516 -1.49263272 0.01090951 -1.05170988 -0.92195631 0.68823271
[8] -1.34121858 1.13104951 -0.29196316
>
> i=3
> any(i %in% c(1,3,5));
[1] TRUE
> all(i %in% c(1,3,5));
[1] TRUE
> which(i %in% c(1,3,5));
[1] 1
> match('c', a)
[1] NA
> df <- transform(df, k=j+1)[1:10,]
> df
a f i j k
1 r 1 1 2.2447932 3.2447932
2 v 2 2 -1.5417236 -0.5417236
3 s 3 3 -0.3591784 0.6408216
4 c 4 4 5.4814561 6.4814561
5 f 1 5 7.1022692 8.1022692
6 i 2 6 5.3284810 6.3284810
7 p 3 7 5.0873649 6.0873649
8 n 4 8 8.5368351 9.5368351
9 v 1 9 11.9734547 12.9734547
10 r 2 10 10.0198377 11.0198377
> df <- within(df, s<- i/j)
> df
a f i j k s
1 r 1 1 2.2447932 3.2447932 0.4454753
2 v 2 2 -1.5417236 -0.5417236 -1.2972494
3 s 3 3 -0.3591784 0.6408216 -8.3523948
4 c 4 4 5.4814561 6.4814561 0.7297331
5 f 1 5 7.1022692 8.1022692 0.7040003
6 i 2 6 5.3284810 6.3284810 1.1260245
7 p 3 7 5.0873649 6.0873649 1.3759579
8 n 4 8 8.5368351 9.5368351 0.9371154
9 v 1 9 11.9734547 12.9734547 0.7516628
10 r 2 10 10.0198377 11.0198377 0.9980202
> x <- with(df, j+5)
> x
[1] 7.244793 3.458276 4.640822 10.481456 12.102269 10.328481 10.087365 13.536835
[9] 16.973455 15.019838
> z <- rep(1,length(x))
> df <- cbind(df, z)
> df
a f i j k s z
1 r 1 1 2.2447932 3.2447932 0.4454753 1
2 v 2 2 -1.5417236 -0.5417236 -1.2972494 1
3 s 3 3 -0.3591784 0.6408216 -8.3523948 1
4 c 4 4 5.4814561 6.4814561 0.7297331 1
5 f 1 5 7.1022692 8.1022692 0.7040003 1
6 i 2 6 5.3284810 6.3284810 1.1260245 1
7 p 3 7 5.0873649 6.0873649 1.3759579 1
8 n 4 8 8.5368351 9.5368351 0.9371154 1
9 v 1 9 11.9734547 12.9734547 0.7516628 1
10 r 2 10 10.0198377 11.0198377 0.9980202 1
> row.df <- head(df, 1)
> rbind(df, row.df)
a f i j k s z
1 r 1 1 2.2447932 3.2447932 0.4454753 1
2 v 2 2 -1.5417236 -0.5417236 -1.2972494 1
3 s 3 3 -0.3591784 0.6408216 -8.3523948 1
4 c 4 4 5.4814561 6.4814561 0.7297331 1
5 f 1 5 7.1022692 8.1022692 0.7040003 1
6 i 2 6 5.3284810 6.3284810 1.1260245 1
7 p 3 7 5.0873649 6.0873649 1.3759579 1
8 n 4 8 8.5368351 9.5368351 0.9371154 1
9 v 1 9 11.9734547 12.9734547 0.7516628 1
10 r 2 10 10.0198377 11.0198377 0.9980202 1
11 r 1 1 2.2447932 3.2447932 0.4454753 1
> df$f <- reorder(df$f, df$j, mean)
> df
a f i j k s z
1 r 1 1 2.2447932 3.2447932 0.4454753 1
2 v 2 2 -1.5417236 -0.5417236 -1.2972494 1
3 s 3 3 -0.3591784 0.6408216 -8.3523948 1
4 c 4 4 5.4814561 6.4814561 0.7297331 1
5 f 1 5 7.1022692 8.1022692 0.7040003 1
6 i 2 6 5.3284810 6.3284810 1.1260245 1
7 p 3 7 5.0873649 6.0873649 1.3759579 1
8 n 4 8 8.5368351 9.5368351 0.9371154 1
9 v 1 9 11.9734547 12.9734547 0.7516628 1
10 r 2 10 10.0198377 11.0198377 0.9980202 1
5 Math Function
> is.na(i)
[1] FALSE
> is.nan(j)
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[15] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
> is.null(d)
[1] FALSE
> is.finite(j)
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[18] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
>
> abs(-1)
[1] 1
> sqrt(10)
[1] 3.162278
> log(1)
[1] 0
> log10(10)
[1] 1
> exp(1)
[1] 2.718282
> ceiling(5.3)
[1] 6
> floor(2.3)
[1] 2
> round(3.22,digits=1)
[1] 3.2
> trunc(4.5)
[1] 4
> sin(pi/2)
[1] 1
> cos(pi)
[1] -1
> tan(pi/2)
[1] 1.633124e+16
> asin(0.5)
[1] 0.5235988
> acos(0.5)
[1] 1.047198
> atan(1)
[1] 0.7853982
> sum(1:10)
[1] 55
> prod(1:10)
[1] 3628800
> cumsum(1:10)
[1] 1 3 6 10 15 21 28 36 45 55
> cumprod(1:10)
[1] 1 2 6 24 120 720 5040 40320 362880 3628800
6 Stat Function
> length(0:10)
[1] 11
> sum(0:10)
[1] 55
> min(0:10)
[1] 0
> max(0:10)
[1] 10
> range(0:10)
[1] 0 10
> cut(0:10,5)
[1] (-0.01,2] (-0.01,2] (-0.01,2] (2,4] (2,4] (4,6] (4,6] (6,8]
[9] (6,8] (8,10] (8,10]
Levels: (-0.01,2] (2,4] (4,6] (6,8] (8,10]
>
> mean(0:10)
[1] 5
> median(0:10)
[1] 5
> sd(0:10)
[1] 3.316625
> var(0:10)
[1] 11
> cov(0:10,1:11)
[1] 11
> cor(0:10,1:11)
[1] 1
> diff(1:10, lag=1, diff=1)
[1] 1 1 1 1 1 1 1 1 1
> rnorm(n=10, mean=0, sd=1)
[1] 0.50660408 0.34007608 -0.07856213 0.87086844 0.68152240 0.80075731 -0.57385601
[8] 0.99361746 1.18958557 -1.87070403
> runif(n=10, min=1, max=100)
[1] 46.977626 55.666696 69.418533 8.874474 15.755576 52.141463 57.587816 85.232902
[9] 69.724438 3.472987
>
> r <- lm(j ~ i, data=as.data.frame(df));
> summary(r)
Call:
lm(formula = j ~ i, data = as.data.frame(df))
Residuals:
Min 1Q Median 3Q Max
-3.3564 -1.7633 -0.2411 2.3067 3.2870
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.74123 0.44095 -3.949 0.000145 ***
i 1.08017 0.02855 37.831 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.184 on 102 degrees of freedom
Multiple R-squared: 0.9335, Adjusted R-squared: 0.9328
F-statistic: 1431 on 1 and 102 DF, p-value: < 2.2e-16
> anova(r)
Analysis of Variance Table
Response: j
Df Sum Sq Mean Sq F value Pr(>F)
i 1 6825.6 6825.6 1431.2 < 2.2e-16 ***
Residuals 102 486.5 4.8
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> residuals(r)
1 2 3 4 5 6 7 8
3.2870286 0.2328002 -3.2515109 -0.7149405 -2.0039734 1.8895235 1.0512190 -3.3564285
9 10 11 12 13 14 15 16
3.1063883 -1.2399755 -1.6117162 -1.0684410 2.3715878 2.3066557 1.3768941 -3.2480162
17 18 19 20 21 22 23 24
-2.9309619 2.7913712 0.7080538 2.8996443 -0.9851891 2.3818433 -1.7632818 -1.9259796
25 26 27 28 29 30 31 32
-1.2913915 0.9887964 3.2870286 0.2328002 -3.2515109 -0.7149405 -2.0039734 1.8895235
33 34 35 36 37 38 39 40
1.0512190 -3.3564285 3.1063883 -1.2399755 -1.6117162 -1.0684410 2.3715878 2.3066557
41 42 43 44 45 46 47 48
1.3768941 -3.2480162 -2.9309619 2.7913712 0.7080538 2.8996443 -0.9851891 2.3818433
49 50 51 52 53 54 55 56
-1.7632818 -1.9259796 -1.2913915 0.9887964 3.2870286 0.2328002 -3.2515109 -0.7149405
57 58 59 60 61 62 63 64
-2.0039734 1.8895235 1.0512190 -3.3564285 3.1063883 -1.2399755 -1.6117162 -1.0684410
65 66 67 68 69 70 71 72
2.3715878 2.3066557 1.3768941 -3.2480162 -2.9309619 2.7913712 0.7080538 2.8996443
73 74 75 76 77 78 79 80
-0.9851891 2.3818433 -1.7632818 -1.9259796 -1.2913915 0.9887964 3.2870286 0.2328002
81 82 83 84 85 86 87 88
-3.2515109 -0.7149405 -2.0039734 1.8895235 1.0512190 -3.3564285 3.1063883 -1.2399755
89 90 91 92 93 94 95 96
-1.6117162 -1.0684410 2.3715878 2.3066557 1.3768941 -3.2480162 -2.9309619 2.7913712
97 98 99 100 101 102 103 104
0.7080538 2.8996443 -0.9851891 2.3818433 -1.7632818 -1.9259796 -1.2913915 0.9887964
> coef(r)
(Intercept) i
-1.741227 1.080172
> plot(r)
Hit <Return> to see next plot: plot(r$fitted.values)
Hit <Return> to see next plot: plot(r$residuals)
Hit <Return> to see next plot:
# Also, glm() gam() lme() lmer() nls()
7 Character Function
> as.character(1)
[1] "1"
> toString(10)
[1] "10"
> nchar('qwe')
[1] 3
> B<-toupper('bbb')
> b<-tolower('BBB')
> s <- 'the cow jumped over the moon'
> sub('the', 'a', s)
[1] "a cow jumped over the moon"
> gsub('the', 'a', s)
[1] "a cow jumped over a moon"
> substr(s,5,7)
[1] "cow"
> substr(s,5,7) <- "dog"
> s
[1] "the dog jumped over the moon"
> substr(s,5,7) <- "monkey"
> s
[1] "the mon jumped over the moon"
> paste('a', 'b', 'c', sep=";")
[1] "a;b;c"
> strsplit(s, ' ')
[[1]]
[1] "the" "mon" "jumped" "over" "the" "moon"
> grep('the', s)
[1] 1
> make.unique(a)
[1] "q" "a" "e" "b" "s" "x" "k" "u" "n" "w" "b.1" "k.1" "e.1" "b.2"
[15] "a.1" "u.1" "r" "y" "e.2" "v" "h" "m" "n.1" "y.1" "x.1" "g"
> format(j, digits=2)
[1] " 2.63" " 0.65" "-1.75" " 1.86" " 1.66" " 6.63" " 6.87" " 3.54" "11.09" " 7.82" " 8.53"
[12] "10.15" "14.67" "15.69" "15.84" "12.29" "13.69" "20.49" "19.49" "22.76" "19.96" "24.40"
[23] "21.34" "22.26" "23.97" "27.33"
> sprintf("%d: %s", i, a)
[1] "1: q" "2: a" "3: e" "4: b" "5: s" "6: x" "7: k" "8: u" "9: n" "10: w" "11: b"
[12] "12: k" "13: e" "14: b" "15: a" "16: u" "17: r" "18: y" "19: e" "20: v" "21: h" "22: m"
[23] "23: n" "24: y" "25: x" "26: g"
> format(d, format="%A %Y-%b-%d")
[1] "Saturday 2010-Jan-02" "Sunday 2010-Jan-03" "Monday 2010-Jan-04"
[4] "Tuesday 2010-Jan-05" "Wednesday 2010-Jan-06" "Thursday 2010-Jan-07"
[7] "Friday 2010-Jan-08" "Saturday 2010-Jan-09" "Sunday 2010-Jan-10"
[10] "Monday 2010-Jan-11" "Tuesday 2010-Jan-12" "Wednesday 2010-Jan-13"
[13] "Thursday 2010-Jan-14" "Friday 2010-Jan-15" "Saturday 2010-Jan-16"
[16] "Sunday 2010-Jan-17" "Monday 2010-Jan-18" "Tuesday 2010-Jan-19"
[19] "Wednesday 2010-Jan-20" "Thursday 2010-Jan-21" "Friday 2010-Jan-22"
[22] "Saturday 2010-Jan-23" "Sunday 2010-Jan-24" "Monday 2010-Jan-25"
[25] "Tuesday 2010-Jan-26" "Wednesday 2010-Jan-27"
8 Dates
> x <- as.Date('03-06-1920', format='%d-%m-%Y')
> Sys.Date()
[1] "2015-10-29"
> days.apart <- d-x
> weekdays(d)
[1] "Wednesday" "Thursday" "Friday" "Saturday" "Sunday" "Monday" "Tuesday"
[8] "Wednesday" "Thursday" "Friday" "Saturday" "Sunday" "Monday" "Tuesday"
[15] "Wednesday" "Thursday" "Friday" "Saturday" "Sunday" "Monday" "Tuesday"
[22] "Wednesday" "Thursday" "Friday" "Saturday" "Sunday"
> months(d)
[1] "January" "January" "January" "January" "January" "January" "January" "January"
[9] "January" "January" "January" "January" "January" "January" "January" "January"
[17] "January" "January" "January" "January" "January" "January" "January" "January"
[25] "January" "January"
> dp <- as.POSIXlt(d);
> dp
[1] "2008-01-02 UTC" "2008-01-03 UTC" "2008-01-04 UTC" "2008-01-05 UTC" "2008-01-06 UTC"
[6] "2008-01-07 UTC" "2008-01-08 UTC" "2008-01-09 UTC" "2008-01-10 UTC" "2008-01-11 UTC"
[11] "2008-01-12 UTC" "2008-01-13 UTC" "2008-01-14 UTC" "2008-01-15 UTC" "2008-01-16 UTC"
[16] "2008-01-17 UTC" "2008-01-18 UTC" "2008-01-19 UTC" "2008-01-20 UTC" "2008-01-21 UTC"
[21] "2008-01-22 UTC" "2008-01-23 UTC" "2008-01-24 UTC" "2008-01-25 UTC" "2008-01-26 UTC"
[26] "2008-01-27 UTC"
> dp$year <- dp$year -1
> d <- as.Date(dp)
> names(unclass(dp))
[1] "sec" "min" "hour" "mday" "mon" "year" "wday" "yday" "isdst"
> Sys.time()
[1] "2015-10-29 01:24:33 EDT"
> date()
[1] "Thu Oct 29 01:24:33 2015"
# Really useful: zoo and lubridate packages
9 I/O and the file system
> cat(i)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
> i
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
> print(j)
[1] 2.6259729 0.6519162 -1.7522234 1.8645187 1.6556576 6.6293262 6.8711933 3.5437175
[9] 11.0867059 7.8205138 8.5289449 10.1523917 14.6725922 15.6878317 15.8382418 12.2935032
[17] 13.6907292 20.4932340 19.4900883 22.7618505 19.9571887 24.4043928 21.3394394 22.2569133
[25] 23.9716730 27.3320326
> getwd()
[1] "/Users/lingduoduo"
> list.files()
[1] "Applications" "Desktop" "Documents" "Downloads"
[5] "Downloads.rar.dmg" "Eclipse" "Library" "Movies"
[9] "Music" "Pictures" "Public"
> list.dirs()
> dir()
[1] "Applications" "Desktop" "Documents" "Downloads"
[5] "Downloads.rar.dmg" "Eclipse" "Library" "Movies"
[9] "Music" "Pictures" "Public"
> Sys.glob()
save(z, file='z.bin')
load('z.bin')
unlink('z.bin')
con <- file('f.txt', 'rt')
y <- readLines(con, 1)
writeLines(text, con=c, sep="\n")
write.csv(df, file="df.csv")
10 Script and package management
source('program.R')
install.packages(ggplot2)
library('ggplot2')
require('ggplot2')
11 In the Workspace
ls();
rm(z);
help('help');
help.search('help');
q();
12 Debug Functions
browser();
debug();
trace();
stopifnot(i[1]==1);
warning('message');
stop('message');
No comments:
Post a Comment