Optimized Character Escaper for Java

I wanted to write my own character escaper for requests going to Apache Lucent.
After writing my own, I also received feedback from programmers-stackexchange which resulted in the fastest implementation, also posted here. (Thanks to MrSmit42)

Below is my application that tests 6 different slightly different implementations over 30,000,000 executions each and outputs the time in MS.

The results in MS (on my computer) were the following:

Method 1 (Boolean Array)->    7574
Method 0 (Switch Stmt)->    9778
Method 4 (Switch Stmt Ugly – BreakPoints after evert case)->    12451
Method 6 (HashSet)->    17273
Method 5 (EnumMap)->    39516
Method 3 (Pattern/Regex)->    98229

This proves that for production systems that have many requests, jumping directly on the RegEx bandwagon for even trivial problems is not always the solution.
Even a 0,5 MS reduction per request leads to 50MS saved over 100 requests (per second) and 500MS over 1000 requests.

[polldaddy poll=7418939]

import java.util.EnumMap;
import java.util.HashSet;
import java.util.regex.Pattern;

public class TestEscapeCharacters {

static int TEST_TIMES = 30000000;
static final String query = “http://This+*is||a&&test(whatever!!!!!!)”;
static boolean printQuery2Result = false;

public static void main(String[] args) {

long startTime, endTime;

startTime = System.currentTimeMillis();
method1_booleanArray();
endTime = System.currentTimeMillis();
System.out.println(“Method 1 (Boolean Array)->t” + (endTime-startTime));

startTime = System.currentTimeMillis();
method4_switchUgly();
endTime = System.currentTimeMillis();
System.out.println(“Method 4 (Switch Stmt Ugly – BreakPoints after evert case)->t” + (endTime-startTime));

startTime = System.currentTimeMillis();
method0_switch();
endTime = System.currentTimeMillis();
System.out.println(“Method 0 (Switch Stmt)->t” + ( endTime-startTime));

startTime = System.currentTimeMillis();
method6_HashSet();
endTime = System.currentTimeMillis();
System.out.println(“Method 6 (HashSet)->t” + (endTime-startTime));

startTime = System.currentTimeMillis();
method5_Enum();
endTime = System.currentTimeMillis();
System.out.println(“Method 5 (EnumMap)->t” + (endTime-startTime));

startTime = System.currentTimeMillis();
method3_pattern();
endTime = System.currentTimeMillis();
System.out.println(“Method 3 (Pattern)->t” + (endTime-startTime));

}

//———————————————————————————————–

static void method0_switch()
{

String query2=””;

for(int j=0; j<TEST_TIMES; j++)
{
query2 = query;

char[] queryCharArray = new char[query.length()*2];
char c;
int length = query.length();
int currentIndex = 0;
for (int i = 0; i < length; i++)
{
c = query.charAt(i);
switch (c) {
case ‘:’:
case ‘\’:
case ‘?’:
case ‘+’:
case ‘-‘:
case ‘!’:
case ‘(‘:
case ‘)’:
case ‘{‘:
case ‘}’:
case ‘[‘:
case ‘]’:
case ‘^’:
case ‘”‘:
case ‘~’:
case ‘*’:
queryCharArray[currentIndex++] = ‘\’;
queryCharArray[currentIndex++] = c;
break;

case ‘&’:
case ‘|’:
if(i+1 < length && query.charAt(i+1) == c)
{
queryCharArray[currentIndex++] = ‘\’;
queryCharArray[currentIndex++] = c;
queryCharArray[currentIndex++] = c;
i++;
}
break;

default:
queryCharArray[currentIndex++] = c;

}

}

query2 = new String(queryCharArray,0,currentIndex);
}

if(printQuery2Result)System.out.println(query2);
}

//———————————————————————————————–

private static final boolean[] mustBeEscaped = new boolean[65536];
static{
for(char c: “:\?+-!(){}[]^”~*&|”.toCharArray()){
mustBeEscaped[c]=true;
}
}

static void method1_booleanArray() {

String query2=””;

for (int j = 0; j < TEST_TIMES; j++) {
query2 = query;
char[] queryCharArray = new char[query.length() * 2];
char c;
int length = query.length();
int currentIndex = 0;
for (int i = 0; i < length; i++) {
c = query.charAt(i);
if (c< 65536 && mustBeEscaped[c]) {
if (‘&’ == c || ‘|’ == c) {
if (i + 1 < length && query.charAt(i + 1) == c) {
queryCharArray[currentIndex++] = ‘\’;
queryCharArray[currentIndex++] = c;
queryCharArray[currentIndex++] = c;
i++;
}
} else {
queryCharArray[currentIndex++] = ‘\’;
queryCharArray[currentIndex++] = c;
}
} else {
queryCharArray[currentIndex++] = c;
}
}

query2 = new String(queryCharArray, 0, currentIndex);
}

if(printQuery2Result)System.out.println(query2);

}

//———————————————————————————————–

private static final String LUCENE_ESCAPE_CHARS = “[\\+\-\!\(\)\:\^\]\{\}\~\*\?]”;
private static final Pattern LUCENE_PATTERN = Pattern.compile(LUCENE_ESCAPE_CHARS);
private static final String REPLACEMENT_STRING = “\\$0″;

static void method3_pattern() {

String query2=””;

for (int j = 0; j < TEST_TIMES; j++) {
query2 = query;
query2= LUCENE_PATTERN.matcher(query2).replaceAll(REPLACEMENT_STRING);
}

if(printQuery2Result)System.out.println(query2);

}

//———————————————————————————————–

static void method4_switchUgly()
{

String query2=””;

for(int j=0; j<TEST_TIMES; j++)
{
query2 = query;

char[] queryCharArray = new char[query.length()*2];
char c;
int length = query.length();
int currentIndex = 0;
for (int i = 0; i < length; i++)
{
c = query.charAt(i);
switch (c) {
case ‘:’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘\’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘?’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘+’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘-‘:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘!’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘(‘:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘)’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘{‘:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘}’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘[‘:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘]’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘^’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘”‘:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘~’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘*’:queryCharArray[currentIndex++] = ‘\’;queryCharArray[currentIndex++] = c;break;
case ‘&’:
case ‘|’:
if(i+1 < length && query.charAt(i+1) == c)
{
queryCharArray[currentIndex++] = ‘\’;
queryCharArray[currentIndex++] = c;
queryCharArray[currentIndex++] = c;
i++;
}
break;

default:
queryCharArray[currentIndex++] = c;

}

}

query2 = new String(queryCharArray,0,currentIndex);
}

if(printQuery2Result)System.out.println(query2);
}

//———————————————————————————————–

public static enum EscapeChars {
COLON(‘:’), SLASH(‘\’), QUESTION(‘?’), PLUS(‘+’), MINUS(‘-‘), EXCLAMATION(
‘!’), LEFT_PARENTHESIS(‘(‘), RIGHT_PARENTHESIS(‘)’), LEFT_CURLY(
‘{‘), RIGHT_CURLY(‘}’), LEFT_SQUARE(‘[‘), RIGHT_SQUARE(‘]’), UP(
‘^’), QUOTE(‘”‘), TILD(‘~’), ASTERISK(‘*’), PIPE(‘|’), AMPERSEND(
‘&’);

private final char character;

EscapeChars(char character) {
this.character = character;
}

public char getCharacter() {
return character;
}
}

static EnumMap<EscapeChars, Integer> EnumMap = new EnumMap<EscapeChars, Integer>(
EscapeChars.class);

static {
for (EscapeChars ec : EscapeChars.values()) {
EnumMap.put(ec, (int) ec.character);
}
}

static void method5_Enum() {

String query2 = “”;

for (int j = 0; j < TEST_TIMES; j++) {
query2 = query;
char[] queryCharArray = new char[query.length() * 2];
char c;
int length = query.length();
int currentIndex = 0;
for (int i = 0; i < length; i++) {
c = query.charAt(i);
if (EnumMap.containsValue((int) c)) {
if (‘&’ == c || ‘|’ == c) {
if (i + 1 < length && query.charAt(i + 1) == c) {
queryCharArray[currentIndex++] = ‘\’;
queryCharArray[currentIndex++] = c;
queryCharArray[currentIndex++] = c;
i++;
}
} else {
queryCharArray[currentIndex++] = ‘\’;
queryCharArray[currentIndex++] = c;
}
} else {
queryCharArray[currentIndex++] = c;
}
}

query2 = new String(queryCharArray, 0, currentIndex);
}

if(printQuery2Result)System.out.println(query2);

}

//———————————————————————————————–

static HashSet<Integer> EscapeCharSet = new HashSet<Integer>();

static {
for(char c: “:\?+-!(){}[]^”~*&|”.toCharArray())
EscapeCharSet.add((int)c);

}

static void method6_HashSet() {

String query2 = “”;

for (int j = 0; j < TEST_TIMES; j++) {
query2 = query;
char[] queryCharArray = new char[query.length() * 2];
char c;
int length = query.length();
int currentIndex = 0;
for (int i = 0; i < length; i++) {
c = query.charAt(i);
if (EscapeCharSet.contains((int) c)) {
if (‘&’ == c || ‘|’ == c) {
if (i + 1 < length && query.charAt(i + 1) == c) {
queryCharArray[currentIndex++] = ‘\’;
queryCharArray[currentIndex++] = c;
queryCharArray[currentIndex++] = c;
i++;
}
} else {
queryCharArray[currentIndex++] = ‘\’;
queryCharArray[currentIndex++] = c;
}
} else {
queryCharArray[currentIndex++] = c;
}
}

query2 = new String(queryCharArray, 0, currentIndex);
}

if(printQuery2Result)System.out.println(query2);

}

}



Menelaos Bakopoulos

Mr. Menelaos Bakopoulos is currently pursuing his PhD both at Center for TeleInFrastruktur (CTiF) at Aalborg University (AAU) in Denmark and Athens Information Technology (AIT) in Athens, Greece. He received a Master in Information Technology and Telecommunications Systems from Athens Information Technology and a B.Sc. in Computer Science & Management Information Systems from the American College of Thessaloniki. Since April 2008 he has been a member of the Multimedia, Knowledge, and Web Technologies Group.

More Posts