diff -r 563d6852da45 src/java.base/share/classes/java/util/jar/Attributes.java
--- a/src/java.base/share/classes/java/util/jar/Attributes.java Mon Mar 09 21:43:01 2020 +0100
+++ b/src/java.base/share/classes/java/util/jar/Attributes.java Tue Mar 10 08:08:19 2020 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -301,7 +301,6 @@
/*
* Writes the current attributes to the specified data output stream.
- * XXX Need to handle UTF8 values and break up lines longer than 72 bytes
*/
void write(DataOutputStream out) throws IOException {
StringBuilder buffer = new StringBuilder(72);
@@ -310,7 +309,7 @@
buffer.append(e.getKey().toString());
buffer.append(": ");
buffer.append(e.getValue());
- Manifest.println72(out, buffer.toString());
+ Manifest.printLine72(out, buffer.toString());
}
Manifest.println(out); // empty line after individual section
}
@@ -319,8 +318,6 @@
* Writes the current attributes to the specified data output stream,
* make sure to write out the MANIFEST_VERSION or SIGNATURE_VERSION
* attributes first.
- *
- * XXX Need to handle UTF8 values and break up lines longer than 72 bytes
*/
void writeMain(DataOutputStream out) throws IOException {
StringBuilder buffer = new StringBuilder(72);
@@ -350,7 +347,7 @@
buffer.append(name);
buffer.append(": ");
buffer.append(e.getValue());
- Manifest.println72(out, buffer.toString());
+ Manifest.printLine72(out, buffer.toString());
}
}
diff -r 563d6852da45 src/java.base/share/classes/java/util/jar/Manifest.java
--- a/src/java.base/share/classes/java/util/jar/Manifest.java Mon Mar 09 21:43:01 2020 +0100
+++ b/src/java.base/share/classes/java/util/jar/Manifest.java Tue Mar 10 08:08:19 2020 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,6 +32,8 @@
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import sun.nio.cs.UTF_8;
import sun.security.util.SecurityProperties;
@@ -207,7 +209,7 @@
buffer.setLength(0);
buffer.append("Name: ");
buffer.append(e.getKey());
- println72(dos, buffer.toString());
+ printLine72(dos, buffer.toString());
e.getValue().write(dos);
}
dos.flush();
@@ -216,7 +218,7 @@
/**
* Adds line breaks to enforce a maximum of 72 bytes per line.
*
- * @deprecation Replaced with {@link #println72}.
+ * @deprecation Replaced with {@link #printLine72}.
*/
@Deprecated(since = "13")
static void make72Safe(StringBuffer line) {
@@ -229,31 +231,90 @@
}
}
+ private static final Pattern CHARACTER_REGEX = Pattern.compile("\\X");
+
/**
* Writes {@code line} to {@code out} with line breaks and continuation
- * spaces within the limits of 72 bytes of contents per line followed
- * by a line break.
+ * spaces within the limits of 72 bytes of contents per line keeping byte
+ * sequences of characters encoded in UTF-8 together also if the same
+ * character is encoded with more than one byte or consists of a character
+ * sequence containing combining diacritical marks followed by a line break.
+ *
+ * Combining diacritical marks may be separated from the associated base
+ * character or other combining diacritical marks of that base character
+ * by a continuation line break ("{@code \r\n }") if the whole sequence of
+ * base character and all the combining diacritical marks belonging to it
+ * exceed 71 bytes in their binary form encoded with UTF-8. This limit is
+ * only 71 bytes rather than 72 because continuation lines start with a
+ * space that uses the first byte of the 72 bytes each line can hold up to
+ * and the first line provides even less space for the value because it
+ * starts with the name ({@see #printChar72}).
*/
- static void println72(OutputStream out, String line) throws IOException {
- if (!line.isEmpty()) {
- byte[] lineBytes = line.getBytes(UTF_8.INSTANCE);
- int length = lineBytes.length;
- // first line can hold one byte more than subsequent lines which
- // start with a continuation line break space
- out.write(lineBytes[0]);
- int pos = 1;
- while (length - pos > 71) {
- out.write(lineBytes, pos, 71);
- pos += 71;
- println(out);
- out.write(' ');
- }
- out.write(lineBytes, pos, length - pos);
+ static void printLine72(OutputStream out, String line) throws IOException {
+ int linePos = 0; // number of bytes already put out on current line
+ Matcher charMatcher = CHARACTER_REGEX.matcher(line);
+ while (charMatcher.find()) {
+ linePos = printChar72(out, linePos, charMatcher.group());
}
println(out);
}
/**
+ * Breaks a string at code point boundaries within the limit of 72 bytes
+ * per line.
+ */
+ private static int printChar72(OutputStream out, int linePos,
+ String characterString) throws IOException {
+ byte[] characterBytes = characterString.getBytes(UTF_8.INSTANCE);
+ int characterLength = characterBytes.length;
+ int characterPos = 0; // number of bytes of current character
+ // already put out
+
+ // Put out a break onto a new line if the character or rather combining
+ // character sequence does not fit on the current line anymore but fits
+ // on a new line. In other words, only if the current character does not
+ // fit on one whole line alone, fill the current line first before
+ // breaking inside of the combining character sequence onto a new line.
+ if (linePos + characterLength > 72 && characterLength < 72) {
+ println(out);
+ out.write(' ');
+ linePos = 1;
+ }
+
+ // Break exceptionally large combining character sequences that don't
+ // fit on one line at code point boundaries.
+ int nextBreakPos;
+ while (characterLength - characterPos > (nextBreakPos = 72 - linePos)) {
+ while (isContinuation(characterBytes[characterPos + nextBreakPos])) {
+ nextBreakPos--;
+ }
+ out.write(characterBytes, characterPos, nextBreakPos);
+ characterPos += nextBreakPos;
+ println(out);
+ out.write(' ');
+ linePos = 1;
+ }
+
+ int remainder = characterLength - characterPos;
+ out.write(characterBytes, characterPos, remainder);
+ return linePos + remainder;
+ }
+
+ /**
+ * Returns {@code true} if the passed byte as parameter {@code b}
+ * is not the first (or only) byte of a Unicode character encoded in UTF-8
+ * and {@code false} otherwise.
+ *
+ * @see
+ * RFC 3629 - UTF-8, a transformation format of ISO 10646
+ * @see StringCoding#isNotContinuation(int)
+ * @see sun.nio.cs.UTF_8.Decoder#isNotContinuation(int)
+ */
+ private static boolean isContinuation(byte b) {
+ return (b & 0xc0) == 0x80;
+ }
+
+ /**
* Writes a line break to {@code out}.
*/
static void println(OutputStream out) throws IOException {
diff -r 563d6852da45 src/java.base/share/classes/java/util/regex/Grapheme.java
--- a/src/java.base/share/classes/java/util/regex/Grapheme.java Mon Mar 09 21:43:01 2020 +0100
+++ b/src/java.base/share/classes/java/util/regex/Grapheme.java Tue Mar 10 08:08:19 2020 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,19 +30,6 @@
final class Grapheme {
/**
- * Determines if there is an extended grapheme cluster boundary between two
- * continuing characters {@code cp1} and {@code cp2}.
- *
- * See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
- * for the extended grapheme cluster boundary rules
- *
- * Note: this method does not take care of stateful breaking.
- */
- static boolean isBoundary(int cp1, int cp2) {
- return rules[getType(cp1)][getType(cp2)];
- }
-
- /**
* Look for the next extended grapheme cluster boundary in a CharSequence. It assumes
* the start of the char sequence is a boundary.
*
@@ -59,15 +46,14 @@
static int nextBoundary(CharSequence src, int off, int limit) {
Objects.checkFromToIndex(off, limit, src.length());
- int ch0 = Character.codePointAt(src, 0);
- int ret = Character.charCount(ch0);
- int ch1;
+ int ch0 = Character.codePointAt(src, off);
+ int ret = off + Character.charCount(ch0);
// indicates whether gb11 or gb12 is underway
int t0 = getGraphemeType(ch0);
int riCount = t0 == RI ? 1 : 0;
boolean gb11 = t0 == EXTENDED_PICTOGRAPHIC;
while (ret < limit) {
- ch1 = Character.codePointAt(src, ret);
+ int ch1 = Character.codePointAt(src, ret);
int t1 = getGraphemeType(ch1);
if (gb11 && t0 == ZWJ && t1 == EXTENDED_PICTOGRAPHIC) {
@@ -177,7 +163,8 @@
cp == 0xAA7B || cp == 0xAA7D;
}
- private static int getGraphemeType(int cp) {
+ @SuppressWarnings("fallthrough")
+ static int getGraphemeType(int cp) {
if (cp < 0x007F) { // ASCII
if (cp < 32) { // Control characters
if (cp == 0x000D)
@@ -188,11 +175,7 @@
}
return OTHER;
}
- return getType(cp);
- }
- @SuppressWarnings("fallthrough")
- private static int getType(int cp) {
if (EmojiData.isExtendedPictographic(cp)) {
return EXTENDED_PICTOGRAPHIC;
}
diff -r 563d6852da45 src/java.base/share/classes/java/util/regex/Pattern.java
--- a/src/java.base/share/classes/java/util/regex/Pattern.java Mon Mar 09 21:43:01 2020 +0100
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java Tue Mar 10 08:08:19 2020 +0100
@@ -4037,17 +4037,8 @@
if (i < matcher.to) {
int ch0 = Character.codePointAt(seq, i);
int n = Character.charCount(ch0);
- int j = i + n;
- // Fast check if it's necessary to call Normalizer;
- // testing Grapheme.isBoundary is enough for this case
- while (j < matcher.to) {
- int ch1 = Character.codePointAt(seq, j);
- if (Grapheme.isBoundary(ch0, ch1))
- break;
- ch0 = ch1;
- j += Character.charCount(ch1);
- }
- if (i + n == j) { // single, assume nfc cp
+ int j = Grapheme.nextBoundary(seq, i, matcher.to);
+ if (i + n == j) { // single cp grapheme, assume nfc
if (predicate.is(ch0))
return next.match(matcher, j, seq);
} else {
@@ -4111,13 +4102,12 @@
endIndex = matcher.getTextLength();
}
if (i == startIndex) {
- return next.match(matcher, i, seq);
- }
- if (i < endIndex) {
- if (Character.isSurrogatePair(seq.charAt(i-1), seq.charAt(i)) ||
- Grapheme.nextBoundary(seq,
- i - Character.charCount(Character.codePointBefore(seq, i)),
- i + Character.charCount(Character.codePointAt(seq, i))) > i) {
+ // continue with return statment below
+ } else if (i < endIndex) {
+ if (Character.isSurrogatePair(seq.charAt(i - 1), seq.charAt(i))) {
+ return false;
+ }
+ if (Grapheme.nextBoundary(seq, matcher.last, endIndex) > i) {
return false;
}
} else {
diff -r 563d6852da45 test/jdk/java/util/jar/Manifest/PrintChar72.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/util/jar/Manifest/PrintChar72.java Tue Mar 10 08:08:19 2020 +0100
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.jar.Attributes;
+import java.util.jar.Manifest;
+import java.util.jar.Attributes.Name;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+import org.testng.annotations.*;
+import static org.testng.Assert.*;
+
+/**
+ * @test
+ * @bug 6443578 6202130
+ * @compile ../../../../sun/security/tools/jarsigner/Utils.java
+ * @run testng PrintChar72
+ * @summary Tests {@link Manifest#printChar72} breaking manifest header values
+ * across lines in conjunction with Unicode characters encoded in UTF-8 with a
+ * variable number of bytes when reading and writing jar manifests results in
+ * valid UTF-8.
+ *
+ * The manifest line length limit (72 bytes) may be reached at a position
+ * between multiple bytes of a single UTF-8 encoded character. Although
+ * characters should not be broken across lines according to the specification
+ * the previous {@link Manifest} implementation did.
+ *
+ * This test makes sure that no Unicode code point character is broken apart
+ * across a line break when writing manifests and also that manifests are still
+ * read correctly whether or not characters encoded in UTF-8 with more than one
+ * byte are interrupted with and continued after a line break for compatibility
+ * when reading older manifests.
+ */
+public class PrintChar72 {
+
+ static final int MANIFEST_LINE_CONTENT_WIDTH_BYTES = 72;
+
+ /**
+ * Character string that has one byte size in its UTF-8 encoded form to
+ * yield one byte of position offset.
+ */
+ static final String FILL1BYTE = "x";
+ static final String MARK_BEFORE = "y";
+ static final String MARK_AFTER = "z";
+
+ /**
+ * Four byte name.
+ * By using header names of four characters length the same values can be
+ * used for testing line breaks in both headers (in main attributes as well
+ * as named sections) as well as section names because a named section name
+ * is represented basically like any other header but follows an empty line
+ * and the key is always "Name".
+ * Relative to the start of the value, this way the same offset to the
+ * character to test breaking can be used in all cases.
+ */
+ static final String FOUR_BYTE_NAME = "Name";
+
+ /**
+ * Distinguishes main attributes headers, section names, and headers in
+ * named sections because an implementation might make a difference.
+ */
+ enum PositionInManifest {
+ /**
+ * @see Attributes#writeMain
+ */
+ MAIN_ATTRIBUTES,
+ /**
+ * @see Attributes#write
+ */
+ SECTION_NAME,
+ /**
+ * @see Manifest#write
+ */
+ NAMED_SECTION;
+ }
+
+ static String numByteUnicodeCharacter(int numBytes) {
+ String string;
+ switch (numBytes) {
+ case 1: string = "i"; break;
+ case 2: string = "\u00EF"; break; // small letter i with diaresis
+ case 3: string = "\uFB00"; break; // small double f ligature
+ case 4: string = Character.toString(0x2070E); break; // ?
+ default: throw new RuntimeException();
+ }
+ assertEquals(string.getBytes(UTF_8).length, numBytes,
+ "self-test failed: unexpected UTF-8 encoded character length");
+ return string;
+ }
+
+ /**
+ * Produces test cases with all combinations of circumstances covered in
+ * which a character could possibly be attempted to be broken across a line
+ * break onto a continuation line:
+ * - different sizes of a UTF-8 encoded characters: one, two, three, and
+ * four bytes,
+ * - all possible positions of the character to test breaking with
+ * relative respect to the 72-byte line length limit including immediately
+ * before that character and immediately after the character and every
+ * position in between for multi-byte UTF-8 encoded characters,
+ * - different number of preceding line breaks in the same value
+ * - at the end of the value or followed by another character
+ * - in a main attributes header value, section name, or named section
+ * header value (see also {@link #PositionInManifest})
+ *
+ * The same set of test parameters is used to write and read manifests
+ * once without breaking characters apart
+ * ({@link #testWriteLineBreaksKeepCharactersTogether(int, int, int, int,
+ * PositionInManifest, String, String)}) and once with doing so
+ * ({@link #readCharactersBrokenAcrossLines(int, int, int, int,
+ * PositionInManifest, String, String)}).
+ * The latter case covers backwards compatibility and involves writing
+ * manifests like they were written before resolution of bug 6443578.
+ */
+ @DataProvider(name = "lineBreakParameters")
+ public static Object[][] lineBreakParameters() {
+ return Stream.of(new Object[] { null }).flatMap(o ->
+ // b: number of line breaks before character under test
+ IntStream.rangeClosed(0, 3).mapToObj(
+ b -> new Object[] { b })
+ ).flatMap(o ->
+ // c: unicode character UTF-8 encoded length in bytes
+ IntStream.rangeClosed(1, 4).mapToObj(
+ c -> new Object[] { o[0], c })
+ ).flatMap(o ->
+ // p: potential break position offset in bytes
+ // p == 0 => before character,
+ // p == c => after character, and
+ // 0 < p < c => character potentially broken across line break
+ // within the character
+ IntStream.rangeClosed(0, (int) o[1]).mapToObj(
+ p -> new Object[] { o[0], o[1], p })
+ ).flatMap(o ->
+ // a: no or one character following the one under test
+ // (a == 0 meaning the character under test is the end of
+ // the value which is followed by a line break in the
+ // resulting manifest without continuation line space which
+ // concludes the value)
+ IntStream.rangeClosed(0, 1).mapToObj(
+ a -> new Object[] { o[0], o[1], o[2], a })
+ ).flatMap(o ->
+ Stream.of(PositionInManifest.values()).map(
+ i -> new Object[] { o[0], o[1], o[2], o[3], i })
+ ).map(o -> {
+ int b = (int) o[0];
+ int c = (int) o[1];
+ int p = (int) o[2];
+ int a = (int) o[3];
+ PositionInManifest i = (PositionInManifest) o[4];
+
+ // offset: so many characters (actually bytes here,
+ // filled with one byte characters) are needed to place
+ // the next character (the character under test) into a
+ // position relative to the maximum line width that it
+ // may or may not have to be broken onto the next line
+ int offset =
+ // number of lines; - 1 due to continuation " "
+ b * (MANIFEST_LINE_CONTENT_WIDTH_BYTES - 1)
+ // line length minus "Name: ".length()
+ + MANIFEST_LINE_CONTENT_WIDTH_BYTES - 6
+ // position of maximum line width relative to
+ // beginning of encoded character
+ - p;
+
+ String value = "";
+ value += FILL1BYTE.repeat(offset - 1);
+ // character before the one to test the break
+ value += MARK_BEFORE;
+ String character = numByteUnicodeCharacter(c);
+ value += character;
+ // character after the one to test the break
+ value += MARK_AFTER.repeat(a);
+
+ return new Object[] { b, c, p, a, i, character, value };
+ }).toArray(size -> new Object[size][]);
+ }
+
+ /**
+ * Checks that unicode characters work well with line breaks and
+ * continuation lines in jar manifests without breaking a character across
+ * a line break even when encoded in UTF-8 with more than one byte.
+ *
+ * For each of the cases provided by {@link #lineBreakParameters()} the
+ * break position is verified in the written manifest binary form as well
+ * as verified that it restores to the original values when read again.
+ *
+ * As an additional check, the binary manifests are decoded from UTF-8
+ * into Strings before re-joining continued lines.
+ */
+ @Test(dataProvider = "lineBreakParameters")
+ public void testWriteLineBreaksKeepCharactersTogether(int b, int c, int p,
+ int a, PositionInManifest i, String character, String value)
+ throws IOException {
+ byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, value);
+
+ // in order to unambiguously establish the position of "character" in
+ // brokenPart, brokenPart is prepended and appended with what is
+ // expected before and after it...
+ String brokenPart = MARK_BEFORE;
+
+ // expect the whole character on the next line unless it fits
+ // completely on the current line
+ boolean breakExpected = p < c;
+ if (breakExpected) {
+ brokenPart += "\r\n ";
+ }
+ brokenPart += character;
+ // expect a line break before the next character if there is a next
+ // character and the previous not already broken on next line
+ if (a > 0) {
+ if (!breakExpected) {
+ brokenPart += "\r\n ";
+ }
+ brokenPart += MARK_AFTER;
+ }
+ brokenPart = brokenPart + "\r\n";
+ try {
+ assertOccurrence(mfBytes, brokenPart.getBytes(UTF_8));
+ readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value);
+ decodeManifestFromUtf8AndAssertValue(
+ mfBytes, FOUR_BYTE_NAME, value, true);
+ } catch (AssertionError e) {
+ Utils.echoManifest(mfBytes, "faulty manifest: " + e);
+ throw e;
+ }
+ }
+
+ static byte[] writeManifest(PositionInManifest i, String name,
+ String value) throws IOException {
+ Manifest mf = new Manifest();
+ mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0");
+ Attributes attributes = new Attributes();
+
+ switch (i) {
+ case MAIN_ATTRIBUTES:
+ mf.getMainAttributes().put(new Name(name), value);
+ break;
+ case SECTION_NAME:
+ mf.getEntries().put(value, attributes);
+ break;
+ case NAMED_SECTION:
+ mf.getEntries().put(FOUR_BYTE_NAME, attributes);
+ attributes.put(new Name(name), value);
+ break;
+ }
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ mf.write(out);
+ return out.toByteArray();
+ }
+
+ /**
+ * Asserts one and only one occurrence of a sequence of bytes {@code part}
+ * representing the character and how it is expected to be broken and its
+ * surrounding bytes in a larger sequence that corresponds to the manifest
+ * in binary form {@code mf}.
+ */
+ static void assertOccurrence(byte[] mf, byte[] part) {
+ List matchPos = new LinkedList<>();
+ for (int i = 0; i < mf.length; i++) {
+ for (int j = 0; j < part.length && i + j <= mf.length; j++) {
+ if (part[j] == 0) {
+ if (i + j != mf.length) {
+ break; // expected eof not found
+ }
+ } else if (i + j == mf.length) {
+ break;
+ } else if (mf[i + j] != part[j]) {
+ break;
+ }
+ if (j == part.length - 1) {
+ matchPos.add(i);
+ }
+ }
+ }
+ assertEquals(matchPos.size(), 1, "not "
+ + (matchPos.size() < 1 ? "found" : "unique") + ": '"
+ + new String(part, UTF_8) + "'");
+ }
+
+ static void readManifestAndAssertValue(
+ byte[] mfBytes, PositionInManifest i, String name, String value)
+ throws IOException {
+ Manifest mf = new Manifest(new ByteArrayInputStream(mfBytes));
+
+ switch (i) {
+ case MAIN_ATTRIBUTES:
+ assertEquals(mf.getMainAttributes().getValue(name), value,
+ "main attributes header value");
+ break;
+ case SECTION_NAME:
+ Attributes attributes = mf.getAttributes(value);
+ assertNotNull(attributes, "named section not found");
+ break;
+ case NAMED_SECTION:
+ attributes = mf.getAttributes(FOUR_BYTE_NAME);
+ assertEquals(attributes.getValue(name), value,
+ "named section attributes header value");
+ break;
+ }
+ }
+
+ /**
+ * Decodes a binary manifest {@code mfBytes} into UTF-8 first, before
+ * joining the continuation lines unlike {@link Manifest} and
+ * {@link Attributes} which join the continuation lines first, before
+ * decoding the joined line from UTF-8 into a {@link String}, evaluating
+ * whether or not the binary manifest is valid UTF-8.
+ */
+ static void decodeManifestFromUtf8AndAssertValue(
+ byte[] mfBytes, String name, String value,
+ boolean validUtf8ManifestExpected) throws IOException {
+ String mf = new String(mfBytes, UTF_8)
+ .replaceAll("(\\r\\n|(?!\\r)\\n|\\r(?!\\n)) ", "");
+ String header = "\r\n" + name + ": " + value + "\r\n";
+ int pos = mf.indexOf(header);
+ if (validUtf8ManifestExpected) {
+ assertTrue(pos > 0);
+ pos = mf.indexOf(header, pos + 1);
+ }
+ // assert no ocurrence or no other occurrence after one match above
+ assertTrue(pos == -1);
+ }
+
+ @Test(dataProvider = "lineBreakParameters")
+ public void readCharactersBrokenAcrossLines(int b, int c, int p, int a,
+ PositionInManifest i, String character, String value)
+ throws IOException {
+ byte[] mfBytes = writeManifestWithBrokenCharacters(
+ i, FOUR_BYTE_NAME, value);
+
+ ByteArrayOutputStream buf = new ByteArrayOutputStream();
+ buf.write(MARK_BEFORE.getBytes(UTF_8));
+ byte[] characterBytes = character.getBytes(UTF_8);
+ // the portion of the character that fits on the current line before
+ // a break at 72 bytes, ranges from nothing (p == 0) to the whole
+ // character (p == c)
+ for (int j = 0; j < p; j++) {
+ buf.write(characterBytes, j, 1);
+ }
+ // expect a line break at exactly 72 bytes from the beginning of the
+ // line unless the whole character fits on that line
+ boolean breakExpected = p < c;
+ if (breakExpected) {
+ buf.write("\r\n ".getBytes(UTF_8));
+ }
+ // the remaining portion of the character, if any
+ for (int j = p; j < c; j++) {
+ buf.write(characterBytes, j, 1);
+ }
+ // expect another line break if the whole character fitted on the same
+ // line and there is another character
+ if (a == 1) {
+ if (c == p) {
+ buf.write("\r\n ".getBytes(UTF_8));
+ }
+ buf.write(MARK_AFTER.getBytes(UTF_8));
+ }
+ // if no other character followed expect a line break immediately
+ buf.write("\r\n".getBytes(UTF_8));
+ byte[] brokenPart = buf.toByteArray();
+ try {
+ assertOccurrence(mfBytes, brokenPart);
+ readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value);
+ decodeManifestFromUtf8AndAssertValue(
+ mfBytes, FOUR_BYTE_NAME, value, p == 0 || p == c);
+ } catch (AssertionError e) {
+ Utils.echoManifest(mfBytes, "faulty manifest: " + e);
+ throw e;
+ }
+ }
+
+ /**
+ * From the previous {@link Manifest} implementation reduced to the minimum
+ * required to demonstrate compatibility.
+ */
+ @SuppressWarnings("deprecation")
+ static byte[] writeManifestWithBrokenCharacters(
+ PositionInManifest i, String name, String value)
+ throws IOException {
+ byte[] vb = value.getBytes(UTF_8);
+ value = new String(vb, 0, 0, vb.length);
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(out);
+ dos.writeBytes(Name.MANIFEST_VERSION + ": 0.1\r\n");
+
+ if (i == PositionInManifest.MAIN_ATTRIBUTES) {
+ StringBuffer buffer = new StringBuffer(name);
+ buffer.append(": ");
+ buffer.append(value);
+ make72Safe(buffer);
+ buffer.append("\r\n");
+ dos.writeBytes(buffer.toString());
+ }
+ dos.writeBytes("\r\n");
+
+ if (i == PositionInManifest.SECTION_NAME ||
+ i == PositionInManifest.NAMED_SECTION) {
+ StringBuffer buffer = new StringBuffer("Name: ");
+ if (i == PositionInManifest.SECTION_NAME) {
+ buffer.append(value);
+ } else {
+ buffer.append(FOUR_BYTE_NAME);
+ }
+ make72Safe(buffer);
+ buffer.append("\r\n");
+ dos.writeBytes(buffer.toString());
+
+ if (i == PositionInManifest.NAMED_SECTION) {
+ buffer = new StringBuffer(name);
+ buffer.append(": ");
+ buffer.append(value);
+ make72Safe(buffer);
+ buffer.append("\r\n");
+ dos.writeBytes(buffer.toString());
+ }
+
+ dos.writeBytes("\r\n");
+ }
+
+ dos.flush();
+ return out.toByteArray();
+ }
+
+ /**
+ * Adds line breaks to enforce a maximum 72 bytes per line.
+ *
+ * From previous Manifest implementation without respect for UTF-8 encoded
+ * character boundaries breaking also within multi-byte UTF-8 encoded
+ * characters.
+ *
+ * @see {@link Manifest#make72Safe(StringBuffer)}
+ */
+ static void make72Safe(StringBuffer line) {
+ int length = line.length();
+ int index = 72;
+ while (index < length) {
+ line.insert(index, "\r\n ");
+ index += 74; // + line width + line break ("\r\n")
+ length += 3; // + line break ("\r\n") and space
+ }
+ }
+
+ @Test
+ public void testEmptyValue() throws Exception {
+ for (PositionInManifest i : PositionInManifest.values()) {
+ byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, "");
+ readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, "");
+ }
+ }
+
+}
diff -r 563d6852da45 test/jdk/java/util/jar/Manifest/PrintLine72.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/util/jar/Manifest/PrintLine72.java Tue Mar 10 08:08:19 2020 +0100
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.jar.Manifest;
+import java.util.jar.Attributes.Name;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import org.testng.annotations.*;
+import static org.testng.Assert.*;
+
+/**
+ * @test
+ * @compile ../../../../sun/security/tools/jarsigner/Utils.java
+ * @bug 6443578 6202130
+ * @run testng PrintLine72
+ * @summary Tests {@link Manifest#printLine72} line breaking with some particular
+ * border case kind of test cases involving combining character sequences.
+ *
+ * For another test covering the complete Unicode character set see
+ * {@link ValueUtf8Coding}, for a test for not breaking Unicode code point
+ * UTF-8 encoded byte sequences see {@link PrintChar72}.
+ */
+public class PrintLine72 {
+
+ static final Name TEST_NAME = new Name("test");
+
+ static final int NAME_SEP_LENGTH = (TEST_NAME + ": ").length();
+
+ void test(String originalValue, int... breakPositionsBytes)
+ throws IOException {
+ String expectedValueWithBreaksInManifest = originalValue;
+ // iterating backwards because inserting breaks affects original
+ // positions
+ for (int i = breakPositionsBytes.length - 1; i >= 0; i--) {
+ int breakPositionBytes = breakPositionsBytes[i];
+
+ // Translate breakPositionBytes byte offset into
+ // breakPositionCharacters (primitive char type) character offset
+ // for cutting the string with String.substring lateron.
+ // Higher code points may be represented with two UTF-16 surrogate
+ // pair characters which both count for String.substring.
+ int bytesSoFar = 0;
+ int charsSoFar = 0;
+ while (bytesSoFar < breakPositionBytes) {
+ String s = expectedValueWithBreaksInManifest
+ .substring(charsSoFar, ++charsSoFar);
+ bytesSoFar += s.getBytes(UTF_8).length;
+ assertTrue(bytesSoFar <= breakPositionBytes,
+ "break position not aligned with characters");
+ }
+ int breakPositionCharacters = charsSoFar;
+
+ expectedValueWithBreaksInManifest =
+ expectedValueWithBreaksInManifest
+ .substring(0, breakPositionCharacters)
+ + "\r\n " +
+ expectedValueWithBreaksInManifest
+ .substring(breakPositionCharacters);
+ }
+
+ Manifest mf = new Manifest();
+ mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0");
+ mf.getMainAttributes().put(TEST_NAME, originalValue);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ mf.write(out);
+ byte[] mfBytes = out.toByteArray();
+
+ byte[] actual = mfBytes;
+ String expected =
+ "Manifest-Version: 1.0\r\n" +
+ TEST_NAME + ": " + expectedValueWithBreaksInManifest +
+ "\r\n\r\n";
+ try {
+ assertEquals(new String(actual, UTF_8), expected);
+ assertEquals(actual, expected.getBytes(UTF_8));
+ } catch (AssertionError e) {
+ Utils.echoManifest(mfBytes, "faulty manifest: " + e);
+ System.out.println("actual = " + byteArrayToIntList(actual));
+ System.out.println("expected = " + byteArrayToIntList(
+ expected.getBytes(UTF_8)));
+ throw e;
+ }
+ }
+
+ static List byteArrayToIntList(byte[] bytes) {
+ List list = new ArrayList<>();
+ for (int i = 0; i < bytes.length; i++) {
+ list.add((int) bytes[i]);
+ }
+ return list;
+ }
+
+ @Test
+ public void testEmpty() throws Exception {
+ test(""); // expect neither a line break nor an exception
+ }
+
+ static final String COMBINING_DIACRITICAL_MARKS =
+ IntStream.range(0x300, 0x36F)
+ .mapToObj(i -> new String(Character.toChars(i)))
+ .collect(Collectors.joining());
+
+ static String getCharSeq(int numberOfBytes) {
+ String seq = (numberOfBytes % 2 == 1 ? "e" : "\u00E6")
+ + COMBINING_DIACRITICAL_MARKS.substring(0, (numberOfBytes - 1) / 2);
+ assertEquals(seq.getBytes(UTF_8).length, numberOfBytes);
+ return seq;
+ }
+
+ @Test
+ public void testBreakOnFirstLine() throws Exception {
+ // Combining sequence starts immediately after name and ": " and fits
+ // the remaining space in the first line. Expect no break.
+ test(getCharSeq(66));
+
+ // Combining sequence starts after name and ": " and exceeds the
+ // remaining space in the first line by one byte. Expect to break on a
+ // new line because the combining sequence fits on a continuation line
+ // which does not start with name and ": " and provides enough space.
+ test(getCharSeq(67), 0);
+
+ // Combining sequence starts after name and ": " and exceeds the
+ // remaining space in the first line but still just fits exactly on a
+ // continuation line. Expect the value to break onto a new line.
+ test(getCharSeq(71), 0);
+
+ // Combining sequence starts after name and ": " and exceeds the
+ // remaining space in the first line and neither fits on a continuation
+ // line. Expect that the first line to be filled with as many codepoints
+ // as fit on it and expect a line break onto a continuation line after
+ // 66 bytes of the first line value.
+ test(getCharSeq(72), 72 - NAME_SEP_LENGTH);
+
+ // Combining sequence starts after name and ": x" and exceeds the
+ // remaining space in the first line and neither fits on a continuation
+ // line. Expect that the first line to be filled with as many codepoints
+ // as fit on it and expect a line break onto a continuation line already
+ // after 65 bytes of the first line because the following character is
+ // a code point represented with two bytes in UTF-8 which should not
+ // be interrupted with a line break.
+ test("x" + getCharSeq(72), 72 - NAME_SEP_LENGTH - 1);
+ }
+
+ @Test
+ public void testBreakOnContinuationLine() throws Exception {
+ // fits on next line by skipping one byte free on current line
+ test("x".repeat(72 - NAME_SEP_LENGTH + 71 - 1) + getCharSeq(71),
+ 72 - NAME_SEP_LENGTH,
+ 72 - NAME_SEP_LENGTH + 71 - 1);
+
+ // fits on current line exactly
+ test("x".repeat(72 - NAME_SEP_LENGTH + 71) + getCharSeq(71),
+ 72 - NAME_SEP_LENGTH,
+ 72 - NAME_SEP_LENGTH + 71);
+
+ // fits on next line by inserting a line break after a line that
+ // contains only one character yet
+ test("x".repeat(72 - NAME_SEP_LENGTH + 71 + 1) + getCharSeq(71),
+ 72 - NAME_SEP_LENGTH,
+ 72 - NAME_SEP_LENGTH + 71,
+ 72 - NAME_SEP_LENGTH + 71 + 1);
+
+ // does not fit on the next line and the one byte not yet used on the
+ // current line does not hold the first code point of the combined
+ // character sequence which is a code point encoded with two bytes in
+ // UTF-8.
+ test("x".repeat(72 - NAME_SEP_LENGTH + 71 - 1) + getCharSeq(72),
+ 72 - NAME_SEP_LENGTH,
+ 72 - NAME_SEP_LENGTH + 71 - 1,
+ 72 - NAME_SEP_LENGTH + 71 - 1 + 71 - 1);
+
+ // would not fit on the next line alone but fits on the remaining two
+ // bytes available on the current line and the whole subsequent line.
+ test("x".repeat(72 - NAME_SEP_LENGTH + 71 - 2) + getCharSeq(72),
+ 72 - NAME_SEP_LENGTH,
+ 72 - NAME_SEP_LENGTH + 71);
+
+ // previous character filled the whole previous line completely
+ // but the combined character sequence with 72 bytes still does not fit
+ // on a single line. the last code point is a two byte one so that an
+ // unused byte is left unused on the second last line.
+ test("x".repeat(72 - NAME_SEP_LENGTH + 71) + getCharSeq(72),
+ 72 - NAME_SEP_LENGTH,
+ 72 - NAME_SEP_LENGTH + 71,
+ 72 - NAME_SEP_LENGTH + 71 + 71 - 1);
+
+ // previous character left one byte used on the current line and the
+ // remaining 70 bytes available. the combining sequence can use all of
+ // these 70 bytes because after 70 bytes a new code point starts
+ test("x".repeat(72 - NAME_SEP_LENGTH + 71 + 1) + getCharSeq(72),
+ 72 - NAME_SEP_LENGTH,
+ 72 - NAME_SEP_LENGTH + 71,
+ 72 - NAME_SEP_LENGTH + 71 + 71);
+ }
+
+}
diff -r 563d6852da45 test/jdk/java/util/regex/GraphemeTest.java
--- a/test/jdk/java/util/regex/GraphemeTest.java Mon Mar 09 21:43:01 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,384 +0,0 @@
-/*
- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * @test
- * @bug 7071819 8221431
- * @summary tests Unicode Extended Grapheme support
- * @library /lib/testlibrary/java/lang
- * @run main GraphemeTest
- */
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.Arrays;
-import java.util.ArrayList;
-import java.util.Scanner;
-import java.util.regex.Pattern;
-import java.util.regex.Matcher;
-
-public class GraphemeTest {
-
- public static void main(String[] args) throws Throwable {
- testProps(UCDFiles.GRAPHEME_BREAK_PROPERTY);
- testProps(UCDFiles.EMOJI_DATA);
- }
-
- private static void testProps(Path path) throws IOException {
- Files.lines(path)
- .map( ln -> ln.replaceFirst("#.*", "") )
- .filter( ln -> ln.length() != 0 )
- .forEach(ln -> {
- String[] strs = ln.split("\\s+");
- int off = strs[0].indexOf("..");
- int cp0, cp1;
- String expected = strs[2];
- if (off != -1) {
- cp0 = Integer.parseInt(strs[0], 0, off, 16);
- cp1 = Integer.parseInt(strs[0], off + 2, strs[0].length(), 16);
- } else {
- cp0 = cp1 = Integer.parseInt(strs[0], 16);
- }
- for (int cp = cp0; cp <= cp1; cp++) {
- // Ignore Emoji* for now (only interested in Extended_Pictographic)
- if (expected.startsWith("Emoji")) {
- continue;
- }
-
- // NOTE:
- // #tr29 "plus a few General_Category = Spacing_Mark needed for
- // canonical equivalence."
- // For "extended grapheme clusters" support, there is no
- // need actually to diff "extend" and "spackmark" given GB9, GB9a.
- if (!expected.equals(types[getType(cp)])) {
- if ("Extend".equals(expected) &&
- "SpacingMark".equals(types[getType(cp)]))
- System.out.printf("[%x] [%s][%d] -> [%s]%n",
- cp, expected, Character.getType(cp), types[getType(cp)]);
- else
- throw new RuntimeException(String.format(
- "cp=[%x], expeced:[%s] result:[%s]%n",
- cp, expected, types[getType(cp)]));
- }
- }
- });
- }
-
- private static final String[] types = {
- "Other", "CR", "LF", "Control", "Extend", "ZWJ", "Regional_Indicator",
- "Prepend", "SpacingMark",
- "L", "V", "T", "LV", "LVT",
- "Extended_Pictographic" };
-
- ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
- // from java.util.regex.Grapheme.java
- // types
- private static final int OTHER = 0;
- private static final int CR = 1;
- private static final int LF = 2;
- private static final int CONTROL = 3;
- private static final int EXTEND = 4;
- private static final int ZWJ = 5;
- private static final int RI = 6;
- private static final int PREPEND = 7;
- private static final int SPACINGMARK = 8;
- private static final int L = 9;
- private static final int V = 10;
- private static final int T = 11;
- private static final int LV = 12;
- private static final int LVT = 13;
- private static final int EXTENDED_PICTOGRAPHIC = 14;
-
- private static final int FIRST_TYPE = 0;
- private static final int LAST_TYPE = 14;
-
- private static boolean[][] rules;
- static {
- rules = new boolean[LAST_TYPE + 1][LAST_TYPE + 1];
- // GB 999 Any + Any -> default
- for (int i = FIRST_TYPE; i <= LAST_TYPE; i++)
- for (int j = FIRST_TYPE; j <= LAST_TYPE; j++)
- rules[i][j] = true;
- // GB 6 L x (L | V | LV | VT)
- rules[L][L] = false;
- rules[L][V] = false;
- rules[L][LV] = false;
- rules[L][LVT] = false;
- // GB 7 (LV | V) x (V | T)
- rules[LV][V] = false;
- rules[LV][T] = false;
- rules[V][V] = false;
- rules[V][T] = false;
- // GB 8 (LVT | T) x T
- rules[LVT][T] = false;
- rules[T][T] = false;
- // GB 9 x (Extend|ZWJ)
- // GB 9a x Spacing Mark
- // GB 9b Prepend x
- for (int i = FIRST_TYPE; i <= LAST_TYPE; i++) {
- rules[i][EXTEND] = false;
- rules[i][ZWJ] = false;
- rules[i][SPACINGMARK] = false;
- rules[PREPEND][i] = false;
- }
- // GB 4 (Control | CR | LF) +
- // GB 5 + (Control | CR | LF)
- for (int i = FIRST_TYPE; i <= LAST_TYPE; i++)
- for (int j = CR; j <= CONTROL; j++) {
- rules[i][j] = true;
- rules[j][i] = true;
- }
- // GB 3 CR x LF
- rules[CR][LF] = false;
- // GB 11 Exended_Pictographic x (Extend|ZWJ)
- rules[EXTENDED_PICTOGRAPHIC][EXTEND] = false;
- rules[EXTENDED_PICTOGRAPHIC][ZWJ] = false;
- }
-
- // Hangul syllables
- private static final int SYLLABLE_BASE = 0xAC00;
- private static final int LCOUNT = 19;
- private static final int VCOUNT = 21;
- private static final int TCOUNT = 28;
- private static final int NCOUNT = VCOUNT * TCOUNT; // 588
- private static final int SCOUNT = LCOUNT * NCOUNT; // 11172
-
- // #tr29: SpacingMark exceptions: The following (which have
- // General_Category = Spacing_Mark and would otherwise be included)
- // are specifically excluded
- private static boolean isExcludedSpacingMark(int cp) {
- return cp == 0x102B || cp == 0x102C || cp == 0x1038 ||
- cp >= 0x1062 && cp <= 0x1064 ||
- cp >= 0x1062 && cp <= 0x106D ||
- cp == 0x1083 ||
- cp >= 0x1087 && cp <= 0x108C ||
- cp == 0x108F ||
- cp >= 0x109A && cp <= 0x109C ||
- cp == 0x1A61 || cp == 0x1A63 || cp == 0x1A64 ||
- cp == 0xAA7B || cp == 0xAA7D;
- }
-
- @SuppressWarnings("fallthrough")
- private static int getType(int cp) {
- if (isExtendedPictographic(cp)) {
- return EXTENDED_PICTOGRAPHIC;
- }
-
- int type = Character.getType(cp);
- switch(type) {
- case Character.CONTROL:
- if (cp == 0x000D)
- return CR;
- if (cp == 0x000A)
- return LF;
- return CONTROL;
- case Character.UNASSIGNED:
- // NOTE: #tr29 lists "Unassigned and Default_Ignorable_Code_Point" as Control
- // but GraphemeBreakTest.txt lists u+0378/reserved-0378 as "Other"
- // so type it as "Other" to make the test happy
- if (cp == 0x0378)
- return OTHER;
-
- case Character.LINE_SEPARATOR:
- case Character.PARAGRAPH_SEPARATOR:
- case Character.SURROGATE:
- return CONTROL;
- case Character.FORMAT:
- if (cp == 0x200C ||
- cp >= 0xE0020 && cp <= 0xE007F)
- return EXTEND;
- if (cp == 0x200D)
- return ZWJ;
- if (cp >= 0x0600 && cp <= 0x0605 ||
- cp == 0x06DD || cp == 0x070F || cp == 0x08E2 ||
- cp == 0x110BD || cp == 0x110CD)
- return PREPEND;
- return CONTROL;
- case Character.NON_SPACING_MARK:
- case Character.ENCLOSING_MARK:
- // NOTE:
- // #tr29 "plus a few General_Category = Spacing_Mark needed for
- // canonical equivalence."
- // but for "extended grapheme clusters" support, there is no
- // need actually to diff "extend" and "spackmark" given GB9, GB9a
- return EXTEND;
- case Character.COMBINING_SPACING_MARK:
- if (isExcludedSpacingMark(cp))
- return OTHER;
- // NOTE:
- // 0x11720 and 0x11721 are mentioned in #tr29 as
- // OTHER_LETTER but it appears their category has been updated to
- // COMBING_SPACING_MARK already (verified in ver.8)
- return SPACINGMARK;
- case Character.OTHER_SYMBOL:
- if (cp >= 0x1F1E6 && cp <= 0x1F1FF)
- return RI;
- return OTHER;
- case Character.MODIFIER_LETTER:
- case Character.MODIFIER_SYMBOL:
- // WARNING:
- // not mentioned in #tr29 but listed in GraphemeBreakProperty.txt
- if (cp == 0xFF9E || cp == 0xFF9F ||
- cp >= 0x1F3FB && cp <= 0x1F3FF)
- return EXTEND;
- return OTHER;
- case Character.OTHER_LETTER:
- if (cp == 0x0E33 || cp == 0x0EB3)
- return SPACINGMARK;
- // hangul jamo
- if (cp >= 0x1100 && cp <= 0x11FF) {
- if (cp <= 0x115F)
- return L;
- if (cp <= 0x11A7)
- return V;
- return T;
- }
- // hangul syllables
- int sindex = cp - SYLLABLE_BASE;
- if (sindex >= 0 && sindex < SCOUNT) {
-
- if (sindex % TCOUNT == 0)
- return LV;
- return LVT;
- }
- // hangul jamo_extended A
- if (cp >= 0xA960 && cp <= 0xA97C)
- return L;
- // hangul jamo_extended B
- if (cp >= 0xD7B0 && cp <= 0xD7C6)
- return V;
- if (cp >= 0xD7CB && cp <= 0xD7FB)
- return T;
-
- // Prepend
- switch (cp) {
- case 0x0D4E:
- case 0x111C2:
- case 0x111C3:
- case 0x11A3A:
- case 0x11A84:
- case 0x11A85:
- case 0x11A86:
- case 0x11A87:
- case 0x11A88:
- case 0x11A89:
- case 0x11D46:
- return PREPEND;
- }
- }
- return OTHER;
- }
-
- // from generated java.util.regex.EmojiData.java
- static boolean isExtendedPictographic(int cp) {
- return
- cp == 0x00A9 ||
- cp == 0x00AE ||
- cp == 0x203C ||
- cp == 0x2049 ||
- cp == 0x2122 ||
- cp == 0x2139 ||
- (cp >= 0x2194 && cp <= 0x2199) ||
- cp == 0x21A9 ||
- cp == 0x21AA ||
- cp == 0x231A ||
- cp == 0x231B ||
- cp == 0x2328 ||
- cp == 0x2388 ||
- cp == 0x23CF ||
- (cp >= 0x23E9 && cp <= 0x23F3) ||
- (cp >= 0x23F8 && cp <= 0x23FA) ||
- cp == 0x24C2 ||
- cp == 0x25AA ||
- cp == 0x25AB ||
- cp == 0x25B6 ||
- cp == 0x25C0 ||
- (cp >= 0x25FB && cp <= 0x25FE) ||
- (cp >= 0x2600 && cp <= 0x2605) ||
- (cp >= 0x2607 && cp <= 0x2612) ||
- (cp >= 0x2614 && cp <= 0x2685) ||
- (cp >= 0x2690 && cp <= 0x2705) ||
- (cp >= 0x2708 && cp <= 0x2712) ||
- cp == 0x2714 ||
- cp == 0x2716 ||
- cp == 0x271D ||
- cp == 0x2721 ||
- cp == 0x2728 ||
- cp == 0x2733 ||
- cp == 0x2734 ||
- cp == 0x2744 ||
- cp == 0x2747 ||
- cp == 0x274C ||
- cp == 0x274E ||
- (cp >= 0x2753 && cp <= 0x2755) ||
- cp == 0x2757 ||
- (cp >= 0x2763 && cp <= 0x2767) ||
- (cp >= 0x2795 && cp <= 0x2797) ||
- cp == 0x27A1 ||
- cp == 0x27B0 ||
- cp == 0x27BF ||
- cp == 0x2934 ||
- cp == 0x2935 ||
- (cp >= 0x2B05 && cp <= 0x2B07) ||
- cp == 0x2B1B ||
- cp == 0x2B1C ||
- cp == 0x2B50 ||
- cp == 0x2B55 ||
- cp == 0x3030 ||
- cp == 0x303D ||
- cp == 0x3297 ||
- cp == 0x3299 ||
- (cp >= 0x1F000 && cp <= 0x1F0FF) ||
- (cp >= 0x1F10D && cp <= 0x1F10F) ||
- cp == 0x1F12F ||
- (cp >= 0x1F16C && cp <= 0x1F171) ||
- cp == 0x1F17E ||
- cp == 0x1F17F ||
- cp == 0x1F18E ||
- (cp >= 0x1F191 && cp <= 0x1F19A) ||
- (cp >= 0x1F1AD && cp <= 0x1F1E5) ||
- (cp >= 0x1F201 && cp <= 0x1F20F) ||
- cp == 0x1F21A ||
- cp == 0x1F22F ||
- (cp >= 0x1F232 && cp <= 0x1F23A) ||
- (cp >= 0x1F23C && cp <= 0x1F23F) ||
- (cp >= 0x1F249 && cp <= 0x1F3FA) ||
- (cp >= 0x1F400 && cp <= 0x1F53D) ||
- (cp >= 0x1F546 && cp <= 0x1F64F) ||
- (cp >= 0x1F680 && cp <= 0x1F6FF) ||
- (cp >= 0x1F774 && cp <= 0x1F77F) ||
- (cp >= 0x1F7D5 && cp <= 0x1F7FF) ||
- (cp >= 0x1F80C && cp <= 0x1F80F) ||
- (cp >= 0x1F848 && cp <= 0x1F84F) ||
- (cp >= 0x1F85A && cp <= 0x1F85F) ||
- (cp >= 0x1F888 && cp <= 0x1F88F) ||
- (cp >= 0x1F8AE && cp <= 0x1F8FF) ||
- (cp >= 0x1F90C && cp <= 0x1F93A) ||
- (cp >= 0x1F93C && cp <= 0x1F945) ||
- (cp >= 0x1F947 && cp <= 0x1FFFD);
-
- }
-}
diff -r 563d6852da45 test/jdk/java/util/regex/GraphemeTestRun.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/util/regex/GraphemeTestRun.java Tue Mar 10 08:08:19 2020 +0100
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 7071819 8221431
+ * @library /lib/testlibrary/java/lang
+ * @run main java.base/java.util.regex.GraphemeTest
+ * @summary tests Unicode Extended Grapheme support
+ */
+
+}
diff -r 563d6852da45 test/jdk/java/util/regex/RegExTest.java
--- a/test/jdk/java/util/regex/RegExTest.java Mon Mar 09 21:43:01 2020 +0100
+++ b/test/jdk/java/util/regex/RegExTest.java Tue Mar 10 08:08:19 2020 +0100
@@ -4812,24 +4812,71 @@
buf = new StringBuilder();
}
}
+
+ // test \X directly
Pattern p = Pattern.compile("\\X");
Matcher m = p.matcher(src.toString());
- Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
for (String g : graphemes) {
// System.out.printf(" grapheme:=[%s]%n", g);
- // (1) test \\X directly
- if (!m.find() || !m.group().equals(g)) {
- System.out.println("Failed \\X [" + ln + "] : " + g);
+ String group = null;
+ if (!m.find() || !(group = m.group()).equals(g)) {
+ System.out.println("Failed pattern \\X [" + ln + "] : "
+ + "expected: " + g + " - actual: " + group);
+ failCount++;
+ }
+ }
+ if (m.find()) {
+ failCount++;
+ }
+
+ // test \b{g} (without \X) via Pattern
+ Pattern pbg = Pattern.compile("\\b{g}");
+ m = pbg.matcher(src.toString());
+ m.find();
+ int prev = m.end();
+ for (String g : graphemes) {
+ String group = null;
+ if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) {
+ System.out.println("Failed pattern \\b{g} [" + ln + "] : "
+ + "expected: " + g + " - actual: " + group);
+ failCount++;
+ }
+ if (!"".equals(m.group())) {
failCount++;
}
- // (2) test \\b{g} + \\X via Scanner
- boolean hasNext = s.hasNext(p);
- // if (!s.hasNext() || !s.next().equals(next)) {
- if (!s.hasNext(p) || !s.next(p).equals(g)) {
- System.out.println("Failed b{g} [" + ln + "] : " + g);
+ prev = m.end();
+ }
+ if (m.find()) {
+ failCount++;
+ }
+
+ // test \b{g} + \X via Scanner
+ Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
+ for (String g : graphemes) {
+ String next = null;
+ if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) {
+ System.out.println("Failed \\b{g} [" + ln + "] : "
+ + "expected: " + g + " - actual: " + next);
failCount++;
}
}
+ if (s.hasNext(p)) {
+ failCount++;
+ }
+
+ // test \b{g} without \X via Scanner
+ s = new Scanner(src.toString()).useDelimiter("\\b{g}");
+ for (String g : graphemes) {
+ String next = null;
+ if (!s.hasNext() || !(next = s.next()).equals(g)) {
+ System.out.println("Failed \\b{g} [" + ln + "] : "
+ + "expected: " + g + " - actual: " + next);
+ failCount++;
+ }
+ }
+ if (s.hasNext()) {
+ failCount++;
+ }
});
// some sanity checks
if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
diff -r 563d6852da45 test/jdk/java/util/regex/java.base/java/util/regex/GraphemeTest.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/util/regex/java.base/java/util/regex/GraphemeTest.java Tue Mar 10 08:08:19 2020 +0100
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package java.util.regex;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+public class GraphemeTest {
+
+ /** from UCDFiles in @library /lib/testlibrary/java/lang which I cannot
+ * access in the default (with no) package */
+ static Path UCD_DIR = Paths.get(
+ System.getProperty("test.root"),
+ "..", "..", "make", "data", "unicodedata");
+ static Path GRAPHEME_BREAK_PROPERTY =
+ UCD_DIR.resolve("auxiliary").resolve("GraphemeBreakProperty.txt");
+ static Path EMOJI_DATA =
+ UCD_DIR.resolve("emoji-data.txt");
+
+ public static void main(String[] args) throws Exception {
+ testGraphemeType(GRAPHEME_BREAK_PROPERTY);
+ testGraphemeType(EMOJI_DATA);
+ }
+
+ private static void testGraphemeType(Path path) throws Exception {
+ Files.lines(path)
+ .map( ln -> ln.replaceFirst("#.*", "") )
+ .filter( ln -> ln.length() != 0 )
+ .forEach(ln -> {
+ String[] strs = ln.split("\\s+");
+ int off = strs[0].indexOf("..");
+ int cp0, cp1;
+ String expected = strs[2];
+ if (off != -1) {
+ cp0 = Integer.parseInt(strs[0], 0, off, 16);
+ cp1 = Integer.parseInt(strs[0], off + 2, strs[0].length(), 16);
+ } else {
+ cp0 = cp1 = Integer.parseInt(strs[0], 16);
+ }
+ for (int cp = cp0; cp <= cp1; cp++) {
+ // Ignore Emoji* for now (only interested in Extended_Pictographic)
+ if (expected.startsWith("Emoji")) {
+ continue;
+ }
+
+ // NOTE:
+ // #tr29 "plus a few General_Category = Spacing_Mark needed for
+ // canonical equivalence."
+ // For "extended grapheme clusters" support, there is no
+ // need actually to diff "extend" and "spackmark" given GB9, GB9a.
+ String type = types[Grapheme.getGraphemeType(cp)];
+ if (!expected.equalsIgnoreCase(type)) {
+ if ("Extend".equals(expected) &&
+ "SpacingMark".equalsIgnoreCase(type))
+ System.out.printf("[%x] [%s][%d] -> [%s]%n",
+ cp, expected, Character.getType(cp), type);
+ else
+ throw new RuntimeException(String.format(
+ "cp=[%x], expeced:[%s] result:[%s]%n",
+ cp, expected, type));
+ }
+ }
+ });
+ }
+
+ private static final String[] types = {
+ "Other", "CR", "LF", "Control", "Extend", "ZWJ", "Regional_Indicator",
+ "Prepend", "SpacingMark",
+ "L", "V", "T", "LV", "LVT",
+ "Extended_Pictographic" };
+
+}
diff -r 563d6852da45 test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java
--- a/test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java Mon Mar 09 21:43:01 2020 +0100
+++ b/test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java Tue Mar 10 08:08:19 2020 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,15 +29,25 @@
* @library /test/lib
*/
+import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+import java.util.jar.Attributes.Name;
+import java.util.jar.Manifest;
import java.util.jar.JarFile;
-import java.util.jar.Attributes.Name;
import java.util.jar.JarEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipEntry;
+import static java.util.jar.JarFile.MANIFEST_NAME;
import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.nio.file.StandardCopyOption.COPY_ATTRIBUTES;
+import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import jdk.test.lib.SecurityTools;
import jdk.test.lib.util.JarUtils;
@@ -46,7 +56,7 @@
/**
* this name will break across lines in MANIFEST.MF at the
- * middle of a two-byte utf-8 encoded character due to its e acute letter
+ * middle of a two-byte UTF-8 encoded character due to its e acute letter
* at its exact position.
*
* because no file with such a name exists {@link JarUtils} will add the
@@ -63,53 +73,58 @@
static final String anotherName =
"LineBrokenMultiByteCharacterA1234567890B1234567890C123456789D1234567890.class";
- static final String alias = "a";
- static final String keystoreFileName = "test.jks";
- static final String manifestFileName = "MANIFEST.MF";
+ static final String ALIAS = "A";
+ static final String KEYSTORE_FILENAME = "test.jks";
+ static final String SOME_OTHER_SIG_FILE = "META-INF/FAKE_SIG.DSA";
public static void main(String[] args) throws Exception {
prepare();
testSignJar("test.jar");
- testSignJarNoManifest("test-no-manifest.jar");
testSignJarUpdate("test-update.jar", "test-updated.jar");
}
static void prepare() throws Exception {
- SecurityTools.keytool("-keystore", keystoreFileName, "-genkeypair",
+ SecurityTools.keytool("-keystore", KEYSTORE_FILENAME, "-genkeypair",
"-keyalg", "dsa",
"-storepass", "changeit", "-keypass", "changeit", "-storetype",
- "JKS", "-alias", alias, "-dname", "CN=X", "-validity", "366")
+ "JKS", "-alias", ALIAS, "-dname", "CN=X", "-validity", "366")
.shouldHaveExitValue(0);
- Files.write(Paths.get(manifestFileName), (Name.
+ new File(MANIFEST_NAME).getParentFile().mkdirs();
+ Files.write(Paths.get(MANIFEST_NAME), (Name.
MANIFEST_VERSION.toString() + ": 1.0\r\n").getBytes(UTF_8));
+
+ // prevent jarsigner from assuming it was safe to rewrite the manifest
+ // and its line breaks assuming there were no other signatures present
+ Files.write(Paths.get(SOME_OTHER_SIG_FILE), new byte[] {});
}
static void testSignJar(String jarFileName) throws Exception {
- JarUtils.createJar(jarFileName, manifestFileName, testClassName);
- verifyJarSignature(jarFileName);
- }
-
- static void testSignJarNoManifest(String jarFileName) throws Exception {
- JarUtils.createJar(jarFileName, testClassName);
+ JarUtils.createJar(jarFileName, testClassName, SOME_OTHER_SIG_FILE);
+ createManifestEntries(jarFileName);
+ rebreakManifest72bytes(jarFileName);
verifyJarSignature(jarFileName);
}
static void testSignJarUpdate(
String initialFileName, String updatedFileName) throws Exception {
- JarUtils.createJar(initialFileName, manifestFileName, anotherName);
- SecurityTools.jarsigner("-keystore", keystoreFileName, "-storetype",
+ JarUtils.createJar(initialFileName, testClassName, anotherName,
+ SOME_OTHER_SIG_FILE);
+ createManifestEntries(initialFileName);
+ rebreakManifest72bytes(initialFileName);
+ removeJarEntry(initialFileName, testClassName);
+ SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME, "-storetype",
"JKS", "-storepass", "changeit", "-debug", initialFileName,
- alias).shouldHaveExitValue(0);
+ ALIAS).shouldHaveExitValue(0);
JarUtils.updateJar(initialFileName, updatedFileName, testClassName);
verifyJarSignature(updatedFileName);
}
static void verifyJarSignature(String jarFileName) throws Exception {
// actually sign the jar
- SecurityTools.jarsigner("-keystore", keystoreFileName, "-storetype",
- "JKS", "-storepass", "changeit", "-debug", jarFileName, alias)
+ SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME, "-storetype",
+ "JKS", "-storepass", "changeit", "-debug", jarFileName, ALIAS)
.shouldHaveExitValue(0);
try (
@@ -130,7 +145,7 @@
* the signature file does not even contain the desired entry at all.
*
* this relies on {@link java.util.jar.Manifest} breaking lines unaware
- * of bytes that belong to the same multi-byte utf characters.
+ * of bytes that belong to the same multi-byte UTF-8 encoded characters.
*/
static void verifyClassNameLineBroken(JarFile jar, String className)
throws IOException {
@@ -142,7 +157,7 @@
throw new AssertionError(className + " not found in manifest");
}
- JarEntry manifestEntry = jar.getJarEntry(JarFile.MANIFEST_NAME);
+ JarEntry manifestEntry = jar.getJarEntry(MANIFEST_NAME);
try (
InputStream manifestIs = jar.getInputStream(manifestEntry);
) {
@@ -159,7 +174,7 @@
}
if (bytesMatched < eAcuteBroken.length) {
throw new AssertionError("self-test failed: multi-byte "
- + "utf-8 character not broken across lines");
+ + "UTF-8 encoded character not broken across lines");
}
}
}
@@ -183,4 +198,108 @@
}
}
+ static void createManifestEntries(String jarFileName) throws Exception {
+ JarUtils.updateJarFile(Paths.get(jarFileName),
+ Paths.get("."), Paths.get(MANIFEST_NAME));
+ SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME,
+ "-storepass", "changeit", "-debug", jarFileName, ALIAS)
+ .shouldHaveExitValue(0);
+ // remove the signature files, only manifest is used
+ removeJarEntry(jarFileName,
+ "META-INF/" + ALIAS + ".SF",
+ "META-INF/" + ALIAS + ".DSA");
+ }
+
+ @SuppressWarnings("deprecation")
+ static void removeJarEntry(String jarFileName, String... entryNames)
+ throws IOException {
+ String aCopy = "swap-" + jarFileName;
+ JarUtils.updateJar(jarFileName, aCopy, Arrays.asList(entryNames)
+ .stream().collect(Collectors.toMap(e -> e, e -> false)));
+ Files.copy(Paths.get(aCopy), Paths.get(jarFileName),
+ COPY_ATTRIBUTES, REPLACE_EXISTING);
+ Files.delete(Paths.get(aCopy));
+ }
+
+ static void rebreakManifest72bytes(String jarFileName) throws Exception {
+ byte[] manifest;
+ try (ZipFile zip = new ZipFile(jarFileName)) {
+ ZipEntry zipEntry = zip.getEntry(MANIFEST_NAME);
+ manifest = zip.getInputStream(zipEntry).readAllBytes();
+ }
+ Utils.echoManifest(manifest, MANIFEST_NAME + " before re-break:");
+ byte[] manifest72 = rebreak72bytes(manifest);
+ Utils.echoManifest(manifest72, MANIFEST_NAME + " after re-break:");
+ String aCopy = "swap-" + jarFileName;
+ JarUtils.updateManifest(jarFileName, aCopy, new Manifest() { @Override
+ public void write(OutputStream out) throws IOException {
+ out.write(manifest72);
+ }
+ });
+ Files.copy(Paths.get(aCopy), Paths.get(jarFileName),
+ COPY_ATTRIBUTES, REPLACE_EXISTING);
+ Files.delete(Paths.get(aCopy));
+ }
+
+ /**
+ * Simulates a jar manifest as it would have been created by an earlier
+ * JDK by re-arranging the line break at exactly 72 bytes content thereby
+ * breaking the multi-byte UTF-8 encoded character under test like before
+ * resolution of bug 6202130.
+ *
+ * The returned manifest is accepted as unmodified by
+ * {@link jdk.security.jarsigner.JarSigner#updateDigests
+ * (ZipEntry,ZipFile,MessageDigest[],Manifest)} on line 985:
+ *
+ * if (!mfDigest.equalsIgnoreCase(base64Digests[i])) {
+ *
+ * and therefore left unchanged when the jar is signed and also signature
+ * verification will check it.
+ */
+ static byte[] rebreak72bytes(byte[] mf0) {
+ byte[] mf1 = new byte[mf0.length];
+ int c0 = 0, c1 = 0; // bytes since last line start
+ for (int i0 = 0, i1 = 0; i0 < mf0.length; i0++, i1++) {
+ switch (mf0[i0]) {
+ case '\r':
+ if (i0 + 2 < mf0.length &&
+ mf0[i0 + 1] == '\n' && mf0[i0 + 2] == ' ') {
+ // skip line break
+ i0 += 2;
+ i1 -= 1;
+ } else {
+ mf1[i1] = mf0[i0];
+ c0 = c1 = 0;
+ }
+ break;
+ case '\n':
+ if (i0 + 1 < mf0.length && mf0[i0 + 1] == ' ') {
+ // skip line break
+ i0 += 1;
+ i1 -= 1;
+ } else {
+ mf1[i1] = mf0[i0];
+ c0 = c1 = 0;
+ }
+ break;
+ case ' ':
+ if (c0 == 0) {
+ continue;
+ }
+ default:
+ c0++;
+ if (c1 == 72) {
+ mf1[i1++] = '\r';
+ mf1[i1++] = '\n';
+ mf1[i1++] = ' ';
+ c1 = 1;
+ } else {
+ c1++;
+ }
+ mf1[i1] = mf0[i0];
+ }
+ }
+ return mf1;
+ }
+
}