- Project 1 is due today
- Read The Art of R Programming (Chapters 6 and 7)
Patrick D. Schloss, PhD (microbialinformatics.github.io)
Department of Microbiology & Immunology
ps <- list(name=c("Pat", "Schloss"), position="Associate Professor", time.at.um=5)
ps$name
## [1] "Pat" "Schloss"
ps$position
## [1] "Associate Professor"
ps$time.at.um
## [1] 5
str(ps)
## List of 3
## $ name : chr [1:2] "Pat" "Schloss"
## $ position : chr "Associate Professor"
## $ time.at.um: num 5
length(ps)
## [1] 3
names(ps)
## [1] "name" "position" "time.at.um"
unlist(ps)
## name1 name2 position
## "Pat" "Schloss" "Associate Professor"
## time.at.um
## "5"
is.vector(unlist(ps))
## [1] TRUE
ps$position
## [1] "Associate Professor"
ps[["position"]]
## [1] "Associate Professor"
ps[[2]]
## [1] "Associate Professor"
ps[2]
## $position
## [1] "Associate Professor"
ps$kids.names <- c("mary", "patrick", "joe", "john", "ruth", "jacob", "peter")
ps$kids[3]
## [1] "joe"
ps[[4]][3]
## [1] "joe"
ps$married <- TRUE
ps
## $name
## [1] "Pat" "Schloss"
##
## $position
## [1] "Associate Professor"
##
## $time.at.um
## [1] 5
##
## $kids.names
## [1] "mary" "patrick" "joe" "john" "ruth" "jacob" "peter"
##
## $married
## [1] TRUE
ps$position <- NULL
ps
## $name
## [1] "Pat" "Schloss"
##
## $time.at.um
## [1] 5
##
## $kids.names
## [1] "mary" "patrick" "joe" "john" "ruth" "jacob" "peter"
##
## $married
## [1] TRUE
data <- list(x=1:10, y=100:110)
lapply(data, median) #returns a list
## $x
## [1] 5.5
##
## $y
## [1] 105
sapply(data, median) #returns a vector
## x y
## 5.5 105.0
x <- runif(100)
y <- c(rep("red", 50), rep("blue", 50))
t <- t.test(x~y)
str(t)
## List of 9
## $ statistic : Named num 0.105
## ..- attr(*, "names")= chr "t"
## $ parameter : Named num 97.6
## ..- attr(*, "names")= chr "df"
## $ p.value : num 0.916
## $ conf.int : atomic [1:2] -0.107 0.119
## ..- attr(*, "conf.level")= num 0.95
## $ estimate : Named num [1:2] 0.471 0.465
## ..- attr(*, "names")= chr [1:2] "mean in group blue" "mean in group red"
## $ null.value : Named num 0
## ..- attr(*, "names")= chr "difference in means"
## $ alternative: chr "two.sided"
## $ method : chr "Welch Two Sample t-test"
## $ data.name : chr "x by y"
## - attr(*, "class")= chr "htest"
t$statistic
## t
## 0.4470755
t$parameter
## df
## 97.01836
t$p.value
## [1] 0.655817
t$estimate
## mean in group blue mean in group red
## 0.4656561 0.4411594
name <- c("Schloss", "Young" , "Mobley", "SwansonJ")
rank <- c("Asst", "Assoc", "Chair", "Full")
cool <- c(10, 1, 9, 8)
directory <- data.frame(name, rank, cool)
directory
## name rank cool
## 1 Schloss Asst 10
## 2 Young Assoc 1
## 3 Mobley Chair 9
## 4 SwansonJ Full 8
directory[,1]
## [1] Schloss Young Mobley SwansonJ
## Levels: Mobley Schloss SwansonJ Young
directory[,"name"]
## [1] Schloss Young Mobley SwansonJ
## Levels: Mobley Schloss SwansonJ Young
directory[1,]
## name rank cool
## 1 Schloss Asst 10
directory[directory$name=="Schloss",]
## name rank cool
## 1 Schloss Asst 10
rownames(directory) <- directory$name
directory["Schloss",]
## name rank cool
## Schloss Schloss Asst 10
x <- factor(c("a", "b", "b", "c", "d"))
x
## [1] a b b c d
## Levels: a b c d
str(x)
## Factor w/ 4 levels "a","b","c","d": 1 2 2 3 4
levels(x)
## [1] "a" "b" "c" "d"
x[2] <- "e"
## Warning in `[<-.factor`(`*tmp*`, 2, value = "e"): invalid factor level, NA
## generated
x
## [1] a <NA> b c d
## Levels: a b c d
x <- factor(c("a", "b", "b", "c", "d"), levels=c("a", "b", "c", "d", "e"))
x
## [1] a b b c d
## Levels: a b c d e
levels(x)
## [1] "a" "b" "c" "d" "e"
table(x)
## x
## a b c d e
## 1 2 1 1 0
x <- factor(c("a", "b", "b", "c", "d"), levels=c("a", "e", "b", "c", "d"))
x
## [1] a b b c d
## Levels: a e b c d
levels(x)
## [1] "a" "e" "b" "c" "d"
table(x)
## x
## a e b c d
## 1 0 2 1 1
set.seed(2)
ages <- sample(20:40, 20, replace=TRUE)
gender <- factor(sample(c("Female", "Male"), 20, replace=TRUE))
pol <- factor(sample(c("D", "R", "I"), 20, replace=TRUE))
split(ages, gender)
## $Female
## [1] 34 23 39 39 22 37 31 31 25 24 21
##
## $Male
## [1] 23 32 29 35 23 28 37 40 29
range <- seq(20,40,5)
segments <- cut(ages, range) #(20,30]: 20 < x <= 30
table(segments)
## segments
## (20,25] (25,30] (30,35] (35,40]
## 5 7 5 2
segments <- cut(ages, 5)
table(segments)
## segments
## (20,23.6] (23.6,27.2] (27.2,30.8] (30.8,34.4] (34.4,38]
## 1 10 2 4 3
range <- quantile(ages)
segments <- cut(ages, range)
table(segments)
## segments
## (20,25] (25,26.5] (26.5,33] (33,38]
## 5 4 7 3