- Homework is due on November 22nd
- work with a partner
- no more than one explicit loop
- Will have lab period on Friday
- Read Chapters 11 in TAoRP for background material on what is discussed today
Patrick D. Schloss, PhD (microbialinformatics.github.io)
Department of Microbiology & Immunology
metadata <- read.table(file="wild.metadata.txt", header=T)
head(metadata)
## Group Date ET Station SP Sex Age Repro Weight Ear
## 1 5_25m3 5_25 3 BB18 PL M J ABD 7.5 13
## 2 5_25m4 5_25 4 K19 PL M A SCR 16.0 15
## 3 5_26m1 5_26 1 A12 PL F A NE 19.5 14
## 4 5_26m9 5_26 9 M9 PL F A NE 25.0 13
## 5 5_31m11 5_31 11 F2 PMG F J NT 16.0 18
## 6 5_31m2 5_31 2 CC4 PL M SA ABD 15.0 14
Date
column is the date that the mice were captured in M_DD
format. Can you convert this column into "Month Day, Year" format? Assume the year was 2011.metadata <- read.table(file="wild.metadata.txt", header=T)
fixDate <- function(m_d, year=2011){
m.d <- unlist(strsplit(x=m_d, split="_"))
m.d <- as.numeric(m.d)
month <- month.name[m.d[1]]
day <- m.d[1]
format.date <- paste0(month, " ", day, ", ", year)
return(format.date)
}
date <- as.character(metadata$Date)
nice.dates <- sapply(date, fixDate)
names(nice.dates) <- NULL
## [1] "May 5, 2011" "May 5, 2011" "May 5, 2011" "May 5, 2011"
## [5] "May 5, 2011" "May 5, 2011" "May 5, 2011" "May 5, 2011"
## [9] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [13] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [17] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [21] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [25] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [29] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [33] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [37] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [41] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [45] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [49] "June 6, 2011" "June 6, 2011" "June 6, 2011" "June 6, 2011"
## [53] "June 6, 2011" "June 6, 2011" "July 7, 2011" "July 7, 2011"
## [57] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [61] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [65] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [69] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [73] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [77] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [81] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [85] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [89] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [93] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [97] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [101] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [105] "July 7, 2011" "July 7, 2011" "July 7, 2011" "July 7, 2011"
## [109] "July 7, 2011" "July 7, 2011" "July 7, 2011"
How would you...
Regular expressions!
+
- Matches preceeding character 1 or more timesgrep("a+", c("baa", "woof"))
## [1] 1
?
- Matches preceeding character 0 or 1 timegrep("colou?r", c("color", "colour"))
## [1] 1 2
*
- Matches preceeding character 0 or more timesgrep("ab*c", c("ac", "abc", "abbc", "abbbc"))
## [1] 1 2 3 4
{}
- Matches user defined number of timesgrep("ab{2}c", c("ac", "abc", "abbc", "abbbc"))
## [1] 3
{,}
- Matches user defined number of times (range)grep("ab{1,2}c", c("ac", "abc", "abbc", "abbbc"))
## [1] 2 3
grep("ab{,2}c", c("ac", "abc", "abbc", "abbbc"))
## [1] 1 2 3 4
.
- Any character\\d
- Any number\\w
- Any alphanumeric character\\s
- Any whitespace characters (<space>
, \\t
, \\n
)\\D
- Anything but a number\\W
- Any whitespace character\\S
- Any non-whitespace character.
- Any charactergrep("A.G", c("ACG", "ATG", "ATTG"))
## [1] 1 2
grep("A.+G", c("ACG", "ATG", "ATTG"))
## [1] 1 2 3
\\d
- Any numbergrep("\\d", c("ATG", "123"))
## [1] 2
\\w
- Any alphanumeric charactergrep("\\w", c("ATG", "123"))
## [1] 1 2
\\s
- Whitespace charactersgrep("\\s", c("A G", "ATG"))
## [1] 1
\\D
- Any non-numeric charactersgrep("\\D", c("ATG", "123"))
## [1] 1
\\W
- Any non-alphanumeric charactersgrep("\\W", c("ATG", "123"))
## integer(0)
\\S
- Any non-space charactersgrep("\\S", c("A G", "ATG")) #why does this come up as 1,2?
## [1] 1 2
\\
- When used to precede a quantifier or metacharacter, it expresses that charactergrep("\\+", c("2+2", "2-2", "2.2"))
## [1] 1
grep("\\.", c("2+2", "2-2", "2.2"))
## [1] 3
grep("\\(\\d{3}\\)\\d{3}-\\d{4}", "(734)867-5301")
## [1] 1
[]
- Match any of the characters in the bracketsgrep("[ATGCU]", c("ATG", "123"))
## [1] 1
grep("[AG2]", c("ATG", "123"))
## [1] 1 2
[-]
- Match any of the characters including & between them...grep("[a-z]", c("ATG", "123"))
## integer(0)
grep("[a-zA-Z]", c("ATG", "123"))
## [1] 1
grep("[a-zA-Z0-9]", c("ATG", "123"))
## [1] 1 2
[^]
- Don't match any of the characters in the brackets...grep("[^AGTC]", c("ATG", "123"))
## [1] 2
grep("[^NU]", c("ATG", "AUG", "ANN"))
## [1] 1 2 3
sub
/gsub
gsub("ATG(CAG)", "AAA\\1", "ATGCAG")
## [1] "AAACAG"
gsub("(ATG)(CAG)", "\\1AAA\\2", "ATGCAG")
## [1] "ATGAAACAG"
gsub("(A.G)(C.G)", "\\1AAA\\2", c("ATGCAG","AAGCTG"))
## [1] "ATGAAACAG" "AAGAAACTG"
metadata <- read.table(file="wild.metadata.txt", header=T)
fixDate <- function(m_d, year=2011){
m.d <- unlist(strsplit(x=m_d, split="_"))
m.d <- as.numeric(m.d)
month <- month.name[m.d[1]]
day <- m.d[1]
format.date <- paste0(month, " ", day, ", ", year)
return(format.date)
}
date <- as.character(metadata$Date)
nice.dates <- sapply(date, fixDate)
names(nice.dates) <- NULL
month <- as.numeric(gsub("^(\\d+)_\\d+", "\\1", metadata$Date))
day <- gsub("^\\d+_(\\d+)", "\\1", metadata$Date)
year <- "2011"
paste0(month.name[month], " ", day, ", ", year)
## [1] "May 25, 2011" "May 25, 2011" "May 26, 2011" "May 26, 2011"
## [5] "May 31, 2011" "May 31, 2011" "May 31, 2011" "May 31, 2011"
## [9] "June 14, 2011" "June 14, 2011" "June 15, 2011" "June 15, 2011"
## [13] "June 15, 2011" "June 15, 2011" "June 15, 2011" "June 15, 2011"
## [17] "June 15, 2011" "June 15, 2011" "June 15, 2011" "June 15, 2011"
## [21] "June 16, 2011" "June 16, 2011" "June 16, 2011" "June 16, 2011"
## [25] "June 16, 2011" "June 16, 2011" "June 17, 2011" "June 17, 2011"
## [29] "June 17, 2011" "June 1, 2011" "June 1, 2011" "June 1, 2011"
## [33] "June 29, 2011" "June 29, 2011" "June 29, 2011" "June 29, 2011"
## [37] "June 29, 2011" "June 29, 2011" "June 29, 2011" "June 2, 2011"
## [41] "June 2, 2011" "June 2, 2011" "June 2, 2011" "June 30, 2011"
## [45] "June 30, 2011" "June 30, 2011" "June 30, 2011" "June 30, 2011"
## [49] "June 30, 2011" "June 5, 2011" "June 5, 2011" "June 5, 2011"
## [53] "June 5, 2011" "June 5, 2011" "July 13, 2011" "July 13, 2011"
## [57] "July 13, 2011" "July 13, 2011" "July 13, 2011" "July 13, 2011"
## [61] "July 13, 2011" "July 13, 2011" "July 13, 2011" "July 13, 2011"
## [65] "July 13, 2011" "July 13, 2011" "July 14, 2011" "July 14, 2011"
## [69] "July 14, 2011" "July 14, 2011" "July 14, 2011" "July 14, 2011"
## [73] "July 14, 2011" "July 14, 2011" "July 14, 2011" "July 14, 2011"
## [77] "July 14, 2011" "July 14, 2011" "July 14, 2011" "July 14, 2011"
## [81] "July 14, 2011" "July 14, 2011" "July 14, 2011" "July 14, 2011"
## [85] "July 14, 2011" "July 14, 2011" "July 14, 2011" "July 14, 2011"
## [89] "July 14, 2011" "July 2, 2011" "July 2, 2011" "July 2, 2011"
## [93] "July 2, 2011" "July 2, 2011" "July 2, 2011" "July 2, 2011"
## [97] "July 2, 2011" "July 2, 2011" "July 2, 2011" "July 2, 2011"
## [101] "July 2, 2011" "July 3, 2011" "July 3, 2011" "July 3, 2011"
## [105] "July 3, 2011" "July 3, 2011" "July 3, 2011" "July 3, 2011"
## [109] "July 3, 2011" "July 3, 2011" "July 3, 2011"