In version 4.0.0
of R raw strings were introduced. From the CRAN Page “There is a new syntax for specifying raw character constants similar to the one used in C++: r"(…)" with … any character sequence not containing the sequence )”. This makes it easier to write strings that contain backslashes or both single and double quotes. For more details see ?Quotes."
Everything inside the delimiters r"(
and )"
are considered literally.
r"( Here's a string that contains "double quotes" and single quotes "Ain't that nifty" )"
## [1] " Here's a string that contains \"double quotes\" and single quotes \"Ain't that nifty\" "
Even the space I added after the opening bracket r"( and before the closing bracket )" are viewed literally in the string.
folder <- r"(C:\Current-Work\code-snippets\raw-strings)"
folder
## [1] "C:\\Current-Work\\code-snippets\\raw-strings"
writeLines(folder)
## C:\Current-Work\code-snippets\raw-strings
From O’Reilly
# Simple regex
simple_ipv4 <- r"(^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$)"
stringr::str_detect("255.255.255.255", simple_ipv4)
## [1] TRUE
# Accurate regex
acc_ipv4 <- r"(^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$)"
stringr::str_detect("255.255.255.255", acc_ipv4)
## [1] TRUE
stringr::str_detect("192.0.2.146", acc_ipv4)
## [1] TRUE
# What about CIDR blocks?
stringr::str_detect("10.0.0.0/8", acc_ipv4) # nope
## [1] FALSE
# If we extract the network part of the CIDR block i.e. the 10.0.0.0 part?
stringr::str_detect(
# extract the network part i.e. the 10.0.0.0
stringr::str_extract(
"10.0.0.0/8",
# pattern to extract using raw string
r"(^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))"
),
# can we detect an ip address in the extracted part?
acc_ipv4
) # it is valid after extraction
## [1] TRUE
# Why is 224.260.9.32 an invalid IP address?
# https://en.wikipedia.org/wiki/IP_address
# An 8 bit group (octet) may not have a value higher than 255, therefore 260 is not a valid number.
stringr::str_detect("224.260.9.32", acc_ipv4)
## [1] FALSE
email_regex <- r"((^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$))"
stringr::str_detect(c("testing@gmail.com",
"test@graw.co.za",
"s@m@testdomain.ac.za",
"test#testing@gmail.com",
"legit@gmail,com"),
email_regex)
## [1] TRUE TRUE FALSE FALSE FALSE
shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")
stringr::str_extract(shopping_list, "\\d") # original way - escape the digit \ by using \\d
## [1] "4" NA NA "2"
stringr::str_extract(shopping_list,
r"(\d)" # raw string way
)
## [1] "4" NA NA "2"
stringr::str_extract(shopping_list, "\\b[a-z]{1,4}\\b") # original way with escape
## [1] NA "bag" "bag" "milk"
stringr::str_extract(shopping_list, r"(\b[a-z]{1,4}\b)") # raw string way
## [1] NA "bag" "bag" "milk"
stringr::str_extract_all(shopping_list, "\\b[a-z]+\\b") # original way with escape
## [[1]]
## [1] "apples"
##
## [[2]]
## [1] "bag" "of" "flour"
##
## [[3]]
## [1] "bag" "of" "sugar"
##
## [[4]]
## [1] "milk"
stringr::str_extract_all(shopping_list, r"(\b[a-z]+\b)") # raw string way
## [[1]]
## [1] "apples"
##
## [[2]]
## [1] "bag" "of" "flour"
##
## [[3]]
## [1] "bag" "of" "sugar"
##
## [[4]]
## [1] "milk"