<font face="georgia, serif">The goal of the clause is to have a mechanism for using hex values for character literals. </font><span class="Apple-style-span" style="font-family: georgia, serif; ">That is, you should be able to take a code point from 0 to 10FFFF, get a hex value for that, embed it in some syntax, and concatenate it into a pattern, and have it work as a literal.</span><div>
<div><font face="georgia, serif"><br></font></div><div><font face="georgia, serif">For example:</font></div><div><font face="georgia, serif"><br></font></div>
<blockquote class="webkit-indent-blockquote" style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><font face="georgia, serif">String pattern = first_part + &quot;\\x{&quot; + hex(myCodePoint) + &quot;}&quot; + second_part; // for *some* hex notation</font></div>
<div><font face="georgia, serif">...</font></div><div><font face="georgia, serif">Matcher m = Pattern.compile(pattern, Pattern.COMMENTS).matcher(target);</font></div><div><font face="georgia, serif">...</font></div></blockquote>

<div>
<font face="georgia, serif"><br></font></div><div><font face="georgia, serif"><meta charset="utf-8"><span class="Apple-style-span" style="font-family: arial; "><font class="Apple-style-span" face="georgia, serif">As far as I can tell, Java really doesn&#39;t supply that capability for non-BMP, because the \u notation doesn&#39;t work above FFFF, except insofar as the preprocessor maps a surrogate pair in hex to literals, which happen all to work because they aren&#39;t syntax characters.<br>
</font></span></font></div><div><font face="georgia, serif"><span class="Apple-style-span" style="font-family: arial; "><font class="Apple-style-span" face="georgia, serif"><br></font></span></font></div><div><font face="georgia, serif">What you can do with Java is:</font></div>
<div><ol><li><span style="font-family:georgia, serif">embed the character itself, not the hex representation, which works some of the time (fails for 18 characters; syntax characters, as expected).</span></li>
<li><span style="font-family:georgia, serif">in constant expressions only, utilize the Java preprocessor with \u.... or \u....\u....).</span></li><li><span style="font-family:georgia, serif">for BMP characters, use &quot;\u&quot; + hex(myCodePoint,4)</span></li>

</ol><div><font face="georgia, serif">Here is a quick and dirty test; let me know if I&#39;ve missed something.</font></div></div><div><font face="georgia, serif"><br></font></div>
<div><font face="georgia, serif"><b>Output:</b></font></div><div><font face="georgia, serif"><br></font></div><div><font face="georgia, serif">










<p><font size="1">










</font></p><p><font size="1">LITERALS Failures: 18</font></p><font size="1">
<p><span></span>        set: [\u0009-\u000D\ #\$(-+?\[\\\^\{|]</p>
<p><span></span><meta charset="utf-8">        example1: a<span>        </span>b</p>
<p><span></span><meta charset="utf-8">        exampleN: a|b</p>
<p>INLINE Failures: 1048576</p>
<p><span></span><meta charset="utf-8">        set: [\U00010000-\U0010FFFF]</p>
<p><span></span><meta charset="utf-8">        example1: a\uD800\uDC00b</p>
<p><span></span><meta charset="utf-8">        exampleN: a\uDBFF\uDFFFb</p>
<p>INRANGE Failures: 1048576</p>
<p><span></span><meta charset="utf-8">        set: [\U00010000-\U0010FFFF]</p>
<p><span></span><meta charset="utf-8">        example1: a[\uD800\uDC00]b</p>
<p><span></span><meta charset="utf-8">        exampleN: a[\uDBFF\uDFFF]b</p></font><p></p><p><br></p><p></p><div style="font-family:arial"><font face="georgia, serif"><b>Code:</b></font></div>
<p></p></font></div><div><font class="Apple-style-span" face="georgia, serif"><br></font></div><div><font face="georgia, serif" size="1">










<p>    <span>public</span> <span>void</span> TestRegex() {</p>
<p><span>        logln(</span>&quot;Check patterns for Unicodeset&quot;<span>);</span></p>
<p><br></p>
<p>        <span>for</span> (<span>int</span> i = 0; i &lt;= 0x10FFFF; ++i) {</p>
<p><br></p>
<p><span>            </span>// The goal is to make a <span>regex</span> with hex digits, and have it match the corresponding character</p>
<p><span>            </span>// We check two different environments: <span>inline</span> (&quot;aXb&quot;) and in a range (&quot;a[X]b&quot;)</p>
<p><br></p>
<p>            String s = <span>new</span> StringBuilder().appendCodePoint(i).toString();</p>
<p><br></p>
<p>            String hexPattern = i &lt;= 0xFFFF ? <span>&quot;\\u&quot;</span> + Utility.hex(i,4) </p>
<p>                    : <span>&quot;\\u&quot;</span> + Utility.hex(Character.toChars(i)[0],4) + <span>&quot;\\u&quot;</span> + Utility.hex(Character.toChars(i)[1],4);</p>
<p><br></p>
<p>            String target = <span>&quot;a&quot;</span> + s + <span>&quot;b&quot;</span>;</p>
<p><br></p>
<p>            Failures.<span>LITERALS</span>.checkMatch(i, <span>&quot;a&quot;</span> + s + <span>&quot;b&quot;</span>, target);</p>
<p>            Failures.<span>INLINE</span>.checkMatch(i, <span>&quot;a&quot;</span> + hexPattern + <span>&quot;b&quot;</span>, target);</p>
<p>            Failures.<span>INRANGE</span>.checkMatch(i, <span>&quot;a[&quot;</span> + hexPattern + <span>&quot;]b&quot;</span>, target);</p>
<p>        }</p>
<p>        Failures.<span>LITERALS</span>.showFailures();</p>
<p>        Failures.<span>INLINE</span>.showFailures();</p>
<p>        Failures.<span>INRANGE</span>.showFailures();</p>
<p>    }</p>
<p><br></p>
<p>    <span>enum</span> Failures {</p>
<p><span>        </span>LITERALS<span>, </span>INLINE<span>, </span>INRANGE<span>;</span></p>
<p>        UnicodeSet <span>failureSet</span> = <span>new</span> UnicodeSet();</p>
<p><span>        String </span>firstSampleFailure<span>;</span></p>
<p><span>        String </span>lastSampleFailure<span>;</span></p>
<p><br></p>
<p>        <span>void</span> checkMatch(<span>int</span> codePoint, String pattern, String target) {</p>
<p>            <span>if</span> (!matches(pattern, target)) {</p>
<p>                <span>failureSet</span>.add(codePoint);</p>
<p>                <span>if</span> (<span>firstSampleFailure</span> == <span>null</span>) {</p>
<p>                    <span>firstSampleFailure</span> = pattern;</p>
<p>                }</p>
<p>                <span>lastSampleFailure</span> = pattern;</p>
<p>            }</p>
<p>        }</p>
<p>        <span>boolean</span> matches(String hexPattern, String target) {</p>
<p>            <span>try</span> {</p>
<p><span>                </span>// use COMMENTS to get the &#39;worst case&#39;</p>
<p>                <span>return</span> Pattern.compile(hexPattern, Pattern.<span>COMMENTS</span>).matcher(target).matches();</p>
<p>            } <span>catch</span> (Exception e) {</p>
<p>                <span>return</span> <span>false</span>;</p>
<p>            }</p>
<p>        }</p>
<p>        <span>void</span> showFailures() {</p>
<p>










</p><p style="color:rgb(0, 0, 0)"><span>            System.</span><span>out</span><span>.format(</span><span>this</span><span> + </span>&quot; Failures: %s\n\tset: %s\n\texample1: %s\n\texampleN: %s\n&quot;<span>, </span></p>


<font color="#000000"><span style="color:rgb(19, 43, 195)"><span>                    </span>failureSet<span>.size(), </span>failureSet<span>, </span>firstSampleFailure<span>, </span>lastSampleFailure<span>);</span></span>        }</font><p>

</p>
<p>    }</p></font></div>
</div>