input
.
replace
(
/two/
,
"($')"
);
// "One ( three) three"
input
.
replace
(
/two/
,
"($$)"
);
// "One ($) three"
These replacement macros are often neglected, but I’ve seen them used in very clever
solutions, so don’t forget about them!
Function Replacements
This is my favorite feature of regexes, which often allows you to break down a very
complex regex into some simpler regexes.
Let’s consider again the practical example of modifying HTML elements. Imagine
you’re writing a program that converts all
<a>
links into a very specific format: you
want to preserve the
class
,
id
, and
href
attributes, but remove everything else. The
problem is, your input is possibly messy. The attributes aren’t always present, and
when they are, you can’t guarantee they’ll be in the same order. So you have to con‐
sider the following input variations (among many):
const
html
=
`<a class="foo" href="/foo" id="foo">Foo</a>\n`
+
`<A href='/foo' Class="foo">Foo</a>\n`
+
`<a href="/foo">Foo</a>\n`
+
`<a onclick="javascript:alert('foo!')" href="/foo">Foo</a>`
;
By now, you should be realizing that this is a daunting task to accomplish with a
regex: there are just too many possible variations! However, we can significantly
reduce the number of variations by breaking this up into two regexes: one to recog‐
nize
<a>
tags, and another to replace the contents of an
<a>
tag with only what you
want.
Let’s consider the second problem first. If all you had was a single
<a>
tag, and you
wanted to discard all attributes other than
class
,
id
, and
href
, the problem is easier.
Even still, as we saw earlier, this can cause problems if we can’t guarantee the
attributes come in a particular order. There are multiple ways to solve this problem,
but we’ll use
String.prototype.split
so we can consider attributes one at a time:
function
sanitizeATag
(
aTag
) {
// get the parts of the tag...
const
parts
=
aTag
.
match
(
/<a\s+(.*?)>(.*?)<\/a>/i
);
// parts[1] are the attributes of the opening <a> tag
// parts[2] are what's between the <a> and </a> tags
const
attributes
=
parts
[
1
]
// then we split into individual attributes
.
split
(
/\s+/
);
return
'<a '
+
attributes
// we only want class, id, and href attributes
.
filter
(
attr
=>
/^(?:class|id|href)[\s=]/i
.
test
(
attr
))
// joined by spaces
.
join
(
' '
)
Function Replacements | 251