- A new homework has been posted and is due on November 22nd
- work with a partner
- no more than one explicit loop
- Will have lab period on Friday
- Read Chapters 11 in TAoRP for background material on what is discussed today and Tuesday
Patrick D. Schloss, PhD (microbialinformatics.github.io)
Department of Microbiology & Immunology
write.table
, read.table
write
, scan
, readLines
Strings are atomic variables made up of characters, numbers, punctuation, etc.
You can form a string by puttting information between "
and "
.
name <- "pat"
name
## [1] "pat"
name[1]
name[1]
## [1] "pat"
name
is a vector and so name[1]
will return the first element of that vector, not the first character of the vector.substr
commandsubstr(x, start, stop)
x
is the string of intereststart
is the position within the string where you want the substring to startstop
is the position within the string where you want the substring to endsubstr(name, 3, 3)
## [1] "t"
substr(name, 2, 3)
## [1] "at"
substr(name, 1, 1)
## [1] "p"
substr(name, 2, 4)
## [1] "at"
names <- c("pat", "sarah", "john", "emily", "mary", "susan")
substr(names, 1,2)
## [1] "pa" "sa" "jo" "em" "ma" "su"
nchar
command:name.length <- nchar(names)
substr(names, name.length-2, name.length)
## [1] "pat" "rah" "ohn" "ily" "ary" "san"
nchar
command:name.length <- nchar(names)
substr(names, name.length-1, name.length)
## [1] "at" "ah" "hn" "ly" "ry" "an"
names <- c("Pat Schloss", "Mary O'Riordan", "Vince Young", "Kathy Spindler", "Harry Mobley", "Oveta Fuller", "Adam Lauring")
strsplit
functionstrsplit(x, split)
x
is the stringsplit
is the delimeter to split onsplit.names <- strsplit(names, " ")
split.names
## [[1]]
## [1] "Pat" "Schloss"
##
## [[2]]
## [1] "Mary" "O'Riordan"
##
## [[3]]
## [1] "Vince" "Young"
##
## [[4]]
## [1] "Kathy" "Spindler"
##
## [[5]]
## [1] "Harry" "Mobley"
##
## [[6]]
## [1] "Oveta" "Fuller"
##
## [[7]]
## [1] "Adam" "Lauring"
strsplit
?strsplit("11/8/2012", split="/")
## [[1]]
## [1] "11" "8" "2012"
strsplit("ATGCATCTGA", split="")
## [[1]]
## [1] "A" "T" "G" "C" "A" "T" "C" "T" "G" "A"
-
'spaste
function to stitch the vector togetherpaste(x, y, sep=" ", collapse=NULL)
x
and y
are two vectors - need only supply onesep
is the character to use to paste the two vectors to each othercollapse
is the character to use to merge the elements of the final vectordate <- unlist(strsplit("11/8/2012", split="/"))
date
## [1] "11" "8" "2012"
paste(date, collapse="-")
## [1] "11-8-2012"
paste("Today is", date, sep=":", collapse="-")
## [1] "Today is:11-Today is:8-Today is:2012"
paste("Today is", paste(date, collapse="-"), sep=": ")
## [1] "Today is: 11-8-2012"
paste("Today is", paste(date, collapse="-"), sep=": ")
## [1] "Today is: 11-8-2012"
paste("Today is: ", paste(date, collapse="-"), sep="")
## [1] "Today is: 11-8-2012"
paste0("Today is: ", paste(date, collapse="-"))
## [1] "Today is: 11-8-2012"
First Last
format, can you convert them to Last, First
format and then alphabetize them?names
vector## Oveta Fuller Adam Lauring Harry Mobley Mary O'Riordan
## "Fuller, Oveta" "Lauring, Adam" "Mobley, Harry" "O'Riordan, Mary"
## Pat Schloss Kathy Spindler Vince Young
## "Schloss, Pat" "Spindler, Kathy" "Young, Vince"
last.first <- function(name){
split.names <- unlist(strsplit(name, " "))
l.f <- paste(split.names[2], split.names[1], sep=", ")
return(l.f)
}
convert.names <- sapply(names, last.first)
sort(convert.names)
## Oveta Fuller Adam Lauring Harry Mobley Mary O'Riordan
## "Fuller, Oveta" "Lauring, Adam" "Mobley, Harry" "O'Riordan, Mary"
## Pat Schloss Kathy Spindler Vince Young
## "Schloss, Pat" "Spindler, Kathy" "Young, Vince"
sprintf
i <- 8
sprintf("the square of %d is %d", i, i^2)
## [1] "the square of 8 is 64"
sprintf("the square root of %d is %6.2f", i, sqrt(i))
## [1] "the square root of 8 is 2.83"
sprintf("%d times 1e6 is %.3e", i, i * 1e6)
## [1] "8 times 1e6 is 8.000e+06"
%s
reserves the place for an string%d
reserves the place for an integer%f
reserves the place for an decimal number%e
reserves the place for an number in scientific notation%f
and %e
the format is %m.n
. n
indicates the number of values to the right of the decimal place to include and m
indicates the total number of spaces to allot the stringformat(x, trim = FALSE, digits = NULL, nsmall = 0L,
justify = c("left", "right", "centre", "none"),
width = NULL, na.encode = TRUE, scientific = NA,
big.mark = "", big.interval = 3L,
small.mark = "", small.interval = 5L,
decimal.mark = ".", zero.print = NULL,
drop0trailing = FALSE, ...)`
x
is a numbertrim
is whether to right justify numbers to a common widthdigits
is the maximum number of significant digitsnsmall
is the minimum number of digits to the right of the decimal