Monday, November 2, 2015

R Basics 2 - Basic List of Useful Functions in R

1 Build-in Constants
> LETTERS
 [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U" "V"
[23] "W" "X" "Y" "Z"
> letters
 [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v"
[23] "w" "x" "y" "z"
> month.abb
 [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
> month.name
 [1] "January"   "February"  "March"     "April"     "May"       "June"      "July"  
 [8] "August"    "September" "October"   "November"  "December"
> pi
[1] 3.141593

2 Object creation
> sz <- 26
> x <- 4
> # length 1 vector
> t <- 'c'
> a <- letters[ceiling(runif(sz, 0.00001, 26))]; a
 [1] "m" "d" "v" "i" "a" "a" "t" "a" "i" "n" "y" "y" "x" "j" "h" "m" "j" "i" "i" "h" "a" "w"
[23] "t" "s" "g" "o"
> i <- 1:sz
> j <- i + rnorm(sz, 0, 2);j
 [1]  0.725392  3.795510  4.998845  3.118527  5.323526  7.283787  7.834817  9.087679
 [9]  8.830070  9.439148 12.960340 10.442182 12.580495 18.454917 17.299115 18.321839
[17] 15.656129 19.342379 20.866082 16.008389 19.242640 22.343947 25.057231 26.150223
[25] 27.198469 28.704727
> names(a) <- LETTERS[1:sz];a
  A   B   C   D   E   F   G   H   I   J   K   L   M   N   O   P   Q   R   S   T   U   V   W
"m" "d" "v" "i" "a" "a" "t" "a" "i" "n" "y" "y" "x" "j" "h" "m" "j" "i" "i" "h" "a" "w" "t"
  X   Y   Z
"s" "g" "o"
> #complex number
> l <- exp((0+1i)*pi) + (1+0i);l
[1] 0+1.224647e-16i
> d <- as.Date('2010-01-01') + seq(1,sz);d
 [1] "2010-01-02" "2010-01-03" "2010-01-04" "2010-01-05" "2010-01-06" "2010-01-07"
 [7] "2010-01-08" "2010-01-09" "2010-01-10" "2010-01-11" "2010-01-12" "2010-01-13"
[13] "2010-01-14" "2010-01-15" "2010-01-16" "2010-01-17" "2010-01-18" "2010-01-19"
[19] "2010-01-20" "2010-01-21" "2010-01-22" "2010-01-23" "2010-01-24" "2010-01-25"
[25] "2010-01-26" "2010-01-27"
> f <- factor(rep(1:x, sz/x), levels=x:1); f
 [1] 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4
Levels: 4 3 2 1
> df <- data.frame(a=a, f=f, i=i, j=j); df
Error in data.frame(a = a, f = f, i = i, j = j) :
  arguments imply differing number of rows: 26, 24
> m <- matrix(rnorm(x^2), nrow=x, ncol=x); m
           [,1]      [,2]       [,3]        [,4]
[1,]  1.1105554 0.6154485  0.6311158 -2.13814813
[2,]  0.9578517 0.8107677 -1.2327885 -0.65443181
[3,] -0.6565348 1.8245487 -1.4426480 -0.45362514
[4,]  0.2563039 1.2790322  1.2530851  0.04705056
> l <- list(1:10,LETTERS); l
[[1]]
 [1]  1  2  3  4  5  6  7  8  9 10

[[2]]
 [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U" "V"
[23] "W" "X" "Y" "Z"

3 Object Inspection
> names(x)
NULL
> dimnames(x)
NULL
> colnames(x)
NULL
> rownames(x)
NULL
> dim(x)
NULL
> nrow(x)
NULL
> ncol(x)
NULL
> is.list(x)
[1] FALSE
> is.factor(x)
[1] FALSE
> is.complex(x)
[1] FALSE
> is.character(x)
[1] FALSE
> is.matrix(x)
[1] FALSE
> is.numeric(x)
[1] TRUE
> is.integer(x)
[1] FALSE
> is.vector(x)
[1] TRUE
> is.data.frame(x)
[1] FALSE
> is.ordered(x)
[1] FALSE

4 Utility Function
> assign('variablename', 5)
> c(1,2)
[1] 1 2
> rep(NA,10)
 [1] NA NA NA NA NA NA NA NA NA NA
> append(l, list(c(1,2,3)))
[[1]]
[1] 0+1.224647e-16i

[[2]]
[1] 1 2 3

> seq(15,100,5)
 [1]  15  20  25  30  35  40  45  50  55  60  65  70  75  80  85  90  95 100
>
> a=rnorm(10)
> sort(a)
 [1] -1.49263272 -1.34121858 -1.05170988 -0.92195631 -0.61930516 -0.29196316  0.01090951
 [8]  0.68823271  1.09200068  1.13104951
> order(a)
 [1]  8  3  6  5  9  1  7  4 10  2
> rank(a)
 [1]  6 10  2  8  4  3  7  1  5  9
> rev(a)
 [1]  1.09200068 -0.61930516 -1.49263272  0.01090951 -1.05170988 -0.92195631  0.68823271
 [8] -1.34121858  1.13104951 -0.29196316
>
> i=3
> any(i %in% c(1,3,5));
[1] TRUE
> all(i %in% c(1,3,5));
[1] TRUE
> which(i %in% c(1,3,5));
[1] 1
> match('c', a)
[1] NA

> df <- transform(df, k=j+1)[1:10,]
> df
   a f  i          j          k
1  r 1  1  2.2447932  3.2447932
2  v 2  2 -1.5417236 -0.5417236
3  s 3  3 -0.3591784  0.6408216
4  c 4  4  5.4814561  6.4814561
5  f 1  5  7.1022692  8.1022692
6  i 2  6  5.3284810  6.3284810
7  p 3  7  5.0873649  6.0873649
8  n 4  8  8.5368351  9.5368351
9  v 1  9 11.9734547 12.9734547
10 r 2 10 10.0198377 11.0198377
> df <- within(df, s<- i/j)
> df
   a f  i          j          k          s
1  r 1  1  2.2447932  3.2447932  0.4454753
2  v 2  2 -1.5417236 -0.5417236 -1.2972494
3  s 3  3 -0.3591784  0.6408216 -8.3523948
4  c 4  4  5.4814561  6.4814561  0.7297331
5  f 1  5  7.1022692  8.1022692  0.7040003
6  i 2  6  5.3284810  6.3284810  1.1260245
7  p 3  7  5.0873649  6.0873649  1.3759579
8  n 4  8  8.5368351  9.5368351  0.9371154
9  v 1  9 11.9734547 12.9734547  0.7516628
10 r 2 10 10.0198377 11.0198377  0.9980202
> x <- with(df, j+5)
> x
 [1]  7.244793  3.458276  4.640822 10.481456 12.102269 10.328481 10.087365 13.536835
 [9] 16.973455 15.019838
> z <- rep(1,length(x))
> df <- cbind(df, z)
> df
   a f  i          j          k          s z
1  r 1  1  2.2447932  3.2447932  0.4454753 1
2  v 2  2 -1.5417236 -0.5417236 -1.2972494 1
3  s 3  3 -0.3591784  0.6408216 -8.3523948 1
4  c 4  4  5.4814561  6.4814561  0.7297331 1
5  f 1  5  7.1022692  8.1022692  0.7040003 1
6  i 2  6  5.3284810  6.3284810  1.1260245 1
7  p 3  7  5.0873649  6.0873649  1.3759579 1
8  n 4  8  8.5368351  9.5368351  0.9371154 1
9  v 1  9 11.9734547 12.9734547  0.7516628 1
10 r 2 10 10.0198377 11.0198377  0.9980202 1
> row.df <- head(df, 1)
> rbind(df, row.df)
   a f  i          j          k          s z
1  r 1  1  2.2447932  3.2447932  0.4454753 1
2  v 2  2 -1.5417236 -0.5417236 -1.2972494 1
3  s 3  3 -0.3591784  0.6408216 -8.3523948 1
4  c 4  4  5.4814561  6.4814561  0.7297331 1
5  f 1  5  7.1022692  8.1022692  0.7040003 1
6  i 2  6  5.3284810  6.3284810  1.1260245 1
7  p 3  7  5.0873649  6.0873649  1.3759579 1
8  n 4  8  8.5368351  9.5368351  0.9371154 1
9  v 1  9 11.9734547 12.9734547  0.7516628 1
10 r 2 10 10.0198377 11.0198377  0.9980202 1
11 r 1  1  2.2447932  3.2447932  0.4454753 1
> df$f <- reorder(df$f, df$j, mean)
> df
   a f  i          j          k          s z
1  r 1  1  2.2447932  3.2447932  0.4454753 1
2  v 2  2 -1.5417236 -0.5417236 -1.2972494 1
3  s 3  3 -0.3591784  0.6408216 -8.3523948 1
4  c 4  4  5.4814561  6.4814561  0.7297331 1
5  f 1  5  7.1022692  8.1022692  0.7040003 1
6  i 2  6  5.3284810  6.3284810  1.1260245 1
7  p 3  7  5.0873649  6.0873649  1.3759579 1
8  n 4  8  8.5368351  9.5368351  0.9371154 1
9  v 1  9 11.9734547 12.9734547  0.7516628 1
10 r 2 10 10.0198377 11.0198377  0.9980202 1

5 Math Function
> is.na(i)
[1] FALSE
> is.nan(j)
 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[15] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
> is.null(d)
[1] FALSE
> is.finite(j)
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[18] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
>
> abs(-1)
[1] 1
> sqrt(10)
[1] 3.162278
> log(1)
[1] 0
> log10(10)
[1] 1
> exp(1)
[1] 2.718282
> ceiling(5.3)
[1] 6
> floor(2.3)
[1] 2
> round(3.22,digits=1)
[1] 3.2
> trunc(4.5)
[1] 4
> sin(pi/2)
[1] 1
> cos(pi)
[1] -1
> tan(pi/2)
[1] 1.633124e+16
> asin(0.5)
[1] 0.5235988
> acos(0.5)
[1] 1.047198
> atan(1)
[1] 0.7853982
> sum(1:10)
[1] 55
> prod(1:10)
[1] 3628800
> cumsum(1:10)
 [1]  1  3  6 10 15 21 28 36 45 55
> cumprod(1:10)
 [1]       1       2       6      24     120     720    5040   40320  362880 3628800

6 Stat Function
> length(0:10)
[1] 11
> sum(0:10)
[1] 55
> min(0:10)
[1] 0
> max(0:10)
[1] 10
> range(0:10)
[1]  0 10
> cut(0:10,5)
 [1] (-0.01,2] (-0.01,2] (-0.01,2] (2,4]     (2,4]     (4,6]     (4,6]     (6,8]
 [9] (6,8]     (8,10]    (8,10]
Levels: (-0.01,2] (2,4] (4,6] (6,8] (8,10]
>
> mean(0:10)
[1] 5
> median(0:10)
[1] 5
> sd(0:10)
[1] 3.316625
> var(0:10)
[1] 11
> cov(0:10,1:11)
[1] 11
> cor(0:10,1:11)
[1] 1
> diff(1:10, lag=1, diff=1)
[1] 1 1 1 1 1 1 1 1 1
> rnorm(n=10, mean=0, sd=1)
 [1]  0.50660408  0.34007608 -0.07856213  0.87086844  0.68152240  0.80075731 -0.57385601
 [8]  0.99361746  1.18958557 -1.87070403
> runif(n=10, min=1, max=100)
 [1] 46.977626 55.666696 69.418533  8.874474 15.755576 52.141463 57.587816 85.232902
 [9] 69.724438  3.472987
>
> r <- lm(j ~ i, data=as.data.frame(df));
> summary(r)

Call:
lm(formula = j ~ i, data = as.data.frame(df))

Residuals:
    Min      1Q  Median      3Q     Max
-3.3564 -1.7633 -0.2411  2.3067  3.2870

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.74123    0.44095  -3.949 0.000145 ***
i            1.08017    0.02855  37.831  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.184 on 102 degrees of freedom
Multiple R-squared:  0.9335, Adjusted R-squared:  0.9328
F-statistic:  1431 on 1 and 102 DF,  p-value: < 2.2e-16

> anova(r)
Analysis of Variance Table

Response: j
           Df Sum Sq Mean Sq F value    Pr(>F)
i           1 6825.6  6825.6  1431.2 < 2.2e-16 ***
Residuals 102  486.5     4.8                  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> residuals(r)
         1          2          3          4          5          6          7          8
 3.2870286  0.2328002 -3.2515109 -0.7149405 -2.0039734  1.8895235  1.0512190 -3.3564285
         9         10         11         12         13         14         15         16
 3.1063883 -1.2399755 -1.6117162 -1.0684410  2.3715878  2.3066557  1.3768941 -3.2480162
        17         18         19         20         21         22         23         24
-2.9309619  2.7913712  0.7080538  2.8996443 -0.9851891  2.3818433 -1.7632818 -1.9259796
        25         26         27         28         29         30         31         32
-1.2913915  0.9887964  3.2870286  0.2328002 -3.2515109 -0.7149405 -2.0039734  1.8895235
        33         34         35         36         37         38         39         40
 1.0512190 -3.3564285  3.1063883 -1.2399755 -1.6117162 -1.0684410  2.3715878  2.3066557
        41         42         43         44         45         46         47         48
 1.3768941 -3.2480162 -2.9309619  2.7913712  0.7080538  2.8996443 -0.9851891  2.3818433
        49         50         51         52         53         54         55         56
-1.7632818 -1.9259796 -1.2913915  0.9887964  3.2870286  0.2328002 -3.2515109 -0.7149405
        57         58         59         60         61         62         63         64
-2.0039734  1.8895235  1.0512190 -3.3564285  3.1063883 -1.2399755 -1.6117162 -1.0684410
        65         66         67         68         69         70         71         72
 2.3715878  2.3066557  1.3768941 -3.2480162 -2.9309619  2.7913712  0.7080538  2.8996443
        73         74         75         76         77         78         79         80
-0.9851891  2.3818433 -1.7632818 -1.9259796 -1.2913915  0.9887964  3.2870286  0.2328002
        81         82         83         84         85         86         87         88
-3.2515109 -0.7149405 -2.0039734  1.8895235  1.0512190 -3.3564285  3.1063883 -1.2399755
        89         90         91         92         93         94         95         96
-1.6117162 -1.0684410  2.3715878  2.3066557  1.3768941 -3.2480162 -2.9309619  2.7913712
        97         98         99        100        101        102        103        104
 0.7080538  2.8996443 -0.9851891  2.3818433 -1.7632818 -1.9259796 -1.2913915  0.9887964
> coef(r)
(Intercept)           i
  -1.741227    1.080172
> plot(r)
Hit <Return> to see next plot: plot(r$fitted.values)
Hit <Return> to see next plot: plot(r$residuals)
Hit <Return> to see next plot:
# Also, glm() gam() lme() lmer() nls()

7 Character Function
> as.character(1)
[1] "1"
> toString(10)
[1] "10"
> nchar('qwe')
[1] 3
> B<-toupper('bbb')
> b<-tolower('BBB')
> s <- 'the cow jumped over the moon'
> sub('the', 'a', s)
[1] "a cow jumped over the moon"
> gsub('the', 'a', s)
[1] "a cow jumped over a moon"
> substr(s,5,7)
[1] "cow"
> substr(s,5,7) <- "dog"
> s
[1] "the dog jumped over the moon"
> substr(s,5,7) <- "monkey"
> s
[1] "the mon jumped over the moon"
> paste('a', 'b', 'c', sep=";")
[1] "a;b;c"
> strsplit(s, ' ')
[[1]]
[1] "the"    "mon"    "jumped" "over"   "the"    "moon"

> grep('the', s)
[1] 1
> make.unique(a)
 [1] "q"   "a"   "e"   "b"   "s"   "x"   "k"   "u"   "n"   "w"   "b.1" "k.1" "e.1" "b.2"
[15] "a.1" "u.1" "r"   "y"   "e.2" "v"   "h"   "m"   "n.1" "y.1" "x.1" "g"
> format(j, digits=2)
 [1] " 2.63" " 0.65" "-1.75" " 1.86" " 1.66" " 6.63" " 6.87" " 3.54" "11.09" " 7.82" " 8.53"
[12] "10.15" "14.67" "15.69" "15.84" "12.29" "13.69" "20.49" "19.49" "22.76" "19.96" "24.40"
[23] "21.34" "22.26" "23.97" "27.33"
> sprintf("%d: %s", i, a)
 [1] "1: q"  "2: a"  "3: e"  "4: b"  "5: s"  "6: x"  "7: k"  "8: u"  "9: n"  "10: w" "11: b"
[12] "12: k" "13: e" "14: b" "15: a" "16: u" "17: r" "18: y" "19: e" "20: v" "21: h" "22: m"
[23] "23: n" "24: y" "25: x" "26: g"
> format(d, format="%A %Y-%b-%d")
 [1] "Saturday 2010-Jan-02"  "Sunday 2010-Jan-03"    "Monday 2010-Jan-04"
 [4] "Tuesday 2010-Jan-05"   "Wednesday 2010-Jan-06" "Thursday 2010-Jan-07"
 [7] "Friday 2010-Jan-08"    "Saturday 2010-Jan-09"  "Sunday 2010-Jan-10"
[10] "Monday 2010-Jan-11"    "Tuesday 2010-Jan-12"   "Wednesday 2010-Jan-13"
[13] "Thursday 2010-Jan-14"  "Friday 2010-Jan-15"    "Saturday 2010-Jan-16"
[16] "Sunday 2010-Jan-17"    "Monday 2010-Jan-18"    "Tuesday 2010-Jan-19"
[19] "Wednesday 2010-Jan-20" "Thursday 2010-Jan-21"  "Friday 2010-Jan-22"
[22] "Saturday 2010-Jan-23"  "Sunday 2010-Jan-24"    "Monday 2010-Jan-25"
[25] "Tuesday 2010-Jan-26"   "Wednesday 2010-Jan-27"

8 Dates
> x <- as.Date('03-06-1920', format='%d-%m-%Y')
> Sys.Date()
[1] "2015-10-29"
> days.apart <- d-x
> weekdays(d)
 [1] "Wednesday" "Thursday"  "Friday"    "Saturday"  "Sunday"    "Monday"    "Tuesday"
 [8] "Wednesday" "Thursday"  "Friday"    "Saturday"  "Sunday"    "Monday"    "Tuesday"
[15] "Wednesday" "Thursday"  "Friday"    "Saturday"  "Sunday"    "Monday"    "Tuesday"
[22] "Wednesday" "Thursday"  "Friday"    "Saturday"  "Sunday"
> months(d)
 [1] "January" "January" "January" "January" "January" "January" "January" "January"
 [9] "January" "January" "January" "January" "January" "January" "January" "January"
[17] "January" "January" "January" "January" "January" "January" "January" "January"
[25] "January" "January"
> dp <- as.POSIXlt(d);
> dp
 [1] "2008-01-02 UTC" "2008-01-03 UTC" "2008-01-04 UTC" "2008-01-05 UTC" "2008-01-06 UTC"
 [6] "2008-01-07 UTC" "2008-01-08 UTC" "2008-01-09 UTC" "2008-01-10 UTC" "2008-01-11 UTC"
[11] "2008-01-12 UTC" "2008-01-13 UTC" "2008-01-14 UTC" "2008-01-15 UTC" "2008-01-16 UTC"
[16] "2008-01-17 UTC" "2008-01-18 UTC" "2008-01-19 UTC" "2008-01-20 UTC" "2008-01-21 UTC"
[21] "2008-01-22 UTC" "2008-01-23 UTC" "2008-01-24 UTC" "2008-01-25 UTC" "2008-01-26 UTC"
[26] "2008-01-27 UTC"
> dp$year <- dp$year -1
> d <- as.Date(dp)
> names(unclass(dp))
[1] "sec"   "min"   "hour"  "mday"  "mon"   "year"  "wday"  "yday"  "isdst"
> Sys.time()
[1] "2015-10-29 01:24:33 EDT"
> date()
[1] "Thu Oct 29 01:24:33 2015"
# Really useful: zoo and lubridate packages

9 I/O and the file system
> cat(i)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
> i
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
> print(j)
 [1]  2.6259729  0.6519162 -1.7522234  1.8645187  1.6556576  6.6293262  6.8711933  3.5437175
 [9] 11.0867059  7.8205138  8.5289449 10.1523917 14.6725922 15.6878317 15.8382418 12.2935032
[17] 13.6907292 20.4932340 19.4900883 22.7618505 19.9571887 24.4043928 21.3394394 22.2569133
[25] 23.9716730 27.3320326
> getwd()
[1] "/Users/lingduoduo"
> list.files()
 [1] "Applications"      "Desktop"           "Documents"         "Downloads"    
 [5] "Downloads.rar.dmg" "Eclipse"           "Library"           "Movies"      
 [9] "Music"             "Pictures"          "Public"      
> list.dirs()
> dir()
 [1] "Applications"      "Desktop"           "Documents"         "Downloads"    
 [5] "Downloads.rar.dmg" "Eclipse"           "Library"           "Movies"      
 [9] "Music"             "Pictures"          "Public"      
> Sys.glob()
save(z, file='z.bin')
load('z.bin')
unlink('z.bin')
con <- file('f.txt', 'rt')
y <- readLines(con, 1)
writeLines(text, con=c, sep="\n")
write.csv(df, file="df.csv")

10 Script and package management
source('program.R')
install.packages(ggplot2)
library('ggplot2')
require('ggplot2')

11 In the Workspace
ls();
rm(z);
help('help');
help.search('help');
q();

12 Debug Functions
browser();
debug();
trace();
stopifnot(i[1]==1);
warning('message');
stop('message');

No comments:

Post a Comment

Blog Archive