Overview
I'm building a website that has two textareas input
and output
The concept is fairly straight-forward I want to:
- Parse the input using regex to match any IPv4/IPv6 addresses
- Redact them to [IPv4], [IPv6] using .replace or any other method
- Display the output in
output
I have it working perfectly for IPv4, I can paste in a huge block of text in and it redacts all IPv4 addresses as expected.
The problem
The regex, or possibly the javascript replace function for IPv6 is not working correctly.
HTML to recreate problem
<html>
<head>
</head>
<body>
<div>
<label for="input" class= "left">Input Text</label>
<label for="output" class = "right">Obfuscated Text - Click Text to Copy</label>
<span></span>
<textarea id="input"></textarea>
<textarea id="output"></textarea>
</div>
<br>
<button onclick="obfuscate()">Obfuscate</button>
<script>
function obfuscate() {
// Get the input and output textareas
var input = document.getElementById("input");
var output = document.getElementById("output");
// Obfuscate the text in the input textarea
var obfuscatedText = input.value.replace(/\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/g, "[IPv4]")
var lines = obfuscatedText.split('\n')
const modifiedLines = lines.map(line => line.replace(/\b(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\b/,"[IPv6]"));
// Set the value of the output textarea to the obfuscated text
output.value = modifiedLines.join('\n');
}
</script>
</body>
</html>
Notes
Originally for [IPv6] nothing was redacted, I read online somewhere that replace can't handle very large strings. So I broke it down into individual lines. This works better, but still doesn't replace all the matches.
Current Behavior
Given even a small input
of
subnet6 3ffe:501:ffff:100::/64 {
# Two addresses available to clients
# (the third client should get NoAddrsAvail)
range6 3ffe:501:ffff:100::10 3ffe:501:ffff:100::11;
# Use the whole /64 prefix for temporary addresses
# (i.e., direct application of RFC 4941)
range6 3ffe:501:ffff:100:: temporary;
# Some /64 prefixes available for Prefix Delegation (RFC 3633)
prefix6 3ffe:501:ffff:100:: 3ffe:501:ffff:111:: /64;
}
# A second subnet behind a relay agent
subnet6 3ffe:501:ffff:101::/64 {
range6 3ffe:501:ffff:101::10 3ffe:501:ffff:101::11;
# Override of the global definitions,
# works only when a resource (address or prefix) is assigned
option dhcp6.name-servers 3ffe:501:ffff:101:200:ff:fe00:3f3e;
}
It fails to redact all v6 addresses
Output:
subnet6 3ffe:501:ffff:100::/64 {
# Two addresses available to clients
# (the third client should get NoAddrsAvail)
range6 [IPv6]10 3ffe:501:ffff:100::11;
# Use the whole /64 prefix for temporary addresses
# (i.e., direct application of RFC 4941)
range6 3ffe:501:ffff:100:: temporary;
# Some /64 prefixes available for Prefix Delegation (RFC 3633)
prefix6 3ffe:501:ffff:100:: 3ffe:501:ffff:111:: /64;
}
# A second subnet behind a relay agent
subnet6 3ffe:501:ffff:101::/64 {
range6 [IPv6]10 3ffe:501:ffff:101::11;
# Override of the global definitions,
# works only when a resource (address or prefix) is assigned
option dhcp6.name-servers [IPv6];
}
I suck at regex
I borrowed the regex to match IPv6 from ihateregex.io site.
On their site when I test it with the same input sample from before, it matches all of the addresses perfectly so that's where I'm stuck.
There are 2 similar questions on SO, one is using php and the other is using python. If possible I would like to do this all in javascript so the text remains client-side.
CodePudding user response:
I borrowed the regex to match IPv6 from ihateregex.io site.
...but you added \b
to that regex. For the \b
at the start of the regex this is no problem, but for the end of the regex it is. The thing is that an IPv6 address may end with a colon. In that case \b
will require that this final colon is followed by an alphanumerical! This is the reason why some IPv6 addresses in the text are not matched.
So... remove that \b
at the end of the regex.
Secondly, you need to provide the g
modifier if you expect more than one match.
And now it is not necessary to split the text into lines:
var input = document.getElementById("input");
var output = document.getElementById("output");
function obfuscate() {
output.value = input.value
.replace(/\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/g, "[IPv4]")
.replace(/\b(([\da-f]{1,4}:){7}[\da-f]{1,4}|([\da-f]{1,4}:){1,7}:|([\da-f]{1,4}:){1,6}:[\da-f]{1,4}|([\da-f]{1,4}:){1,5}(:[\da-f]{1,4}){1,2}|([\da-f]{1,4}:){1,4}(:[\da-f]{1,4}){1,3}|([\da-f]{1,4}:){1,3}(:[\da-f]{1,4}){1,4}|([\da-f]{1,4}:){1,2}(:[\da-f]{1,4}){1,5}|[\da-f]{1,4}:((:[\da-f]{1,4}){1,6})|:((:[\da-f]{1,4}){1,7}|:)|fe80:(:[\da-f]{0,4}){0,4}%[\da-z] |(::(ffff(:0{1,4})?:)?|([\da-f]{1,4}:){1,4}:)((25[0-5]|(2[0-4]|1?\d)?\d)\.){3}(25[0-5]|(2[0-4]|1?\d)?\d))/g,"[IPv6]");
}
input.value = `subnet6 3ffe:501:ffff:100::/64 {
# Two addresses available to clients
# (the third client should get NoAddrsAvail)
range6 3ffe:501:ffff:100::10 3ffe:501:ffff:100::11;
# Use the whole /64 prefix for temporary addresses
# (i.e., direct application of RFC 4941)
range6 3ffe:501:ffff:100:: temporary;
# Some /64 prefixes available for Prefix Delegation (RFC 3633)
prefix6 3ffe:501:ffff:100:: 3ffe:501:ffff:111:: /64;
}
# A second subnet behind a relay agent
subnet6 3ffe:501:ffff:101::/64 {
range6 3ffe:501:ffff:101::10 3ffe:501:ffff:101::11;
# Override of the global definitions,
# works only when a resource (address or prefix) is assigned
option dhcp6.name-servers 3ffe:501:ffff:101:200:ff:fe00:3f3e;
}`;
obfuscate();
table { border-collapse: collapse; margin: 10px 0 10px 0 }
textarea { width: 45vw; height: 75vh }
<table>
<tr><th>Input Text<button onclick="obfuscate()">Obfuscate</button></th><th>Obfuscated Text</th></tr>
<tr><td><textarea id="input"></textarea></td><td><textarea id="output"></textarea></tr>
</table>