diff -r f8f2f7ee52cb src/java.base/share/classes/java/util/jar/Attributes.java --- a/src/java.base/share/classes/java/util/jar/Attributes.java Tue Sep 25 11:31:55 2018 -0400 +++ b/src/java.base/share/classes/java/util/jar/Attributes.java Wed Sep 26 07:03:50 2018 +0200 @@ -296,7 +296,6 @@ /* * Writes the current attributes to the specified data output stream. - * XXX Need to handle UTF8 values and break up lines longer than 72 bytes */ @SuppressWarnings("deprecation") void write(DataOutputStream os) throws IOException { @@ -323,12 +322,9 @@ * Writes the current attributes to the specified data output stream, * make sure to write out the MANIFEST_VERSION or SIGNATURE_VERSION * attributes first. - * - * XXX Need to handle UTF8 values and break up lines longer than 72 bytes */ @SuppressWarnings("deprecation") - void writeMain(DataOutputStream out) throws IOException - { + void writeMain(DataOutputStream out) throws IOException { // write out the *-Version header first, if it exists String vername = Name.MANIFEST_VERSION.toString(); String version = getValue(vername); @@ -367,7 +363,6 @@ /* * Reads attributes from the specified input stream. - * XXX Need to handle UTF8 values. */ @SuppressWarnings("deprecation") void read(Manifest.FastInputStream is, byte[] lbuf) throws IOException { diff -r f8f2f7ee52cb test/jdk/java/util/jar/Manifest/ValueUtfEncoding.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/jdk/java/util/jar/Manifest/ValueUtfEncoding.java Wed Sep 26 07:03:50 2018 +0200 @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.jar.Attributes; +import java.util.jar.Attributes.Name; +import java.util.jar.Manifest; +import java.util.List; +import java.util.ArrayList; + +import org.testng.annotations.Test; +import static org.testng.Assert.*; + +/** + * @test + * @bug 6202130 + * @run testng ValueUtfEncoding + * @summary Tests complete utf character set manifest values encoding + *

+ * This test writes and reads a manifest that contains every valid utf + * character, grouped into manifest header values with about as many + * bytes as allowed each, resulting in a single big manifest with. + * This way, all possible 1111995 utf characters are covered in one + * manifest. + *

+ * Only header values are tested. Characters for header names are much more + * limited and very simple ones are used just to get valid and different ones. + */ +public class ValueUtfEncoding { + + /** + * From the specifications: + * Implementations should support 65535-byte (not character) header + * values, and 65535 headers per file. They might run out of memory, + * but there should not be hard-coded limits below these values. + * + * @see Notes on Manifest and Signature Files + */ + static final int MIN_VALUE_LENGTH_SUPPORTED = 2 << 16 - 1; + + static final int MAX_UTF_CHARACTER_ENCODED_LENGTH = 4; + + static boolean isValidUtfCharacter(int codePoint) { + if (0xFDD0 <= codePoint && codePoint <= 0xFDEF) { + return false; /* non-characters */ + } + if ((codePoint & 0xFFFE) == 0xFFFE) { + return false; /* byte order marks */ + } + return true; + } + + /** + * returns {@code true} if {@code codePoint} is explicitly forbidden in + * manifest values based on a statement from the specs: + *

otherchar: any UTF-8 character except NUL, CR and LF
+     *
+     * @see Jar File Specification
+     */
+    static boolean isInvalidManifestValueCharacter(int codePoint) {
+        return codePoint == 0 /* NUL */
+            || codePoint == '\r' /* CR */
+            || codePoint == '\n' /* LF */;
+    };
+
+    /**
+     * Produces a list of strings with all known utf characters except those
+     * invalid in manifest header values with at least
+     * {@link #MIN_VALUE_LENGTH_SUPPORTED} utf-8 encoded bytes each
+     * except the last string which contains just the remaining characters.
+     */
+    static List produceValidManifestUtfCharacterValues() {
+        int maxLengthBytes = MIN_VALUE_LENGTH_SUPPORTED +
+                // exceed the specified limit by at least one character
+                MAX_UTF_CHARACTER_ENCODED_LENGTH + 1;
+
+        int numberOfUsedCodePoints = 0;
+        ArrayList values = new ArrayList<>();
+        byte[] valueBuf = new byte[maxLengthBytes];
+        int pos = 0;
+        for (int codePoint = Character.MIN_CODE_POINT;
+                codePoint <= Character.MAX_CODE_POINT; codePoint++) {
+            if (!isValidUtfCharacter(codePoint)) {
+                continue;
+            }
+            if (isInvalidManifestValueCharacter(codePoint)) {
+                continue;
+            }
+            numberOfUsedCodePoints++;
+
+            byte[] charBuf =
+                    new String(Character.toChars(codePoint)).getBytes(UTF_8);
+            if (pos + charBuf.length > valueBuf.length) {
+                values.add(new String(valueBuf, 0, pos, UTF_8));
+                pos = 0;
+            }
+            System.arraycopy(charBuf, 0, valueBuf, pos, charBuf.length);
+            pos += charBuf.length;
+        }
+        if (pos > 0) {
+            values.add(new String(valueBuf, 0, pos, UTF_8));
+        }
+
+        if (numberOfUsedCodePoints !=
+                (17 << 16) /* utf space */
+                - 66 /* non-characters */
+                - 3 /* nul, cr, lf */) {
+            fail("self-test: utf character set not covered exactly");
+        }
+
+        return values;
+    }
+
+    /**
+     * returns simple, valid, short, and distinct manifest header names.
+     * The returned name cannot be "{@code Manifest-Version}" because the
+     * returned string does not contain "{@code -}".
+     *
+     * @param seed seed to produce distinct names
+     */
+    static String azName(int seed) {
+        StringBuffer name = new StringBuffer();
+        do {
+            name.insert(0, (char) (seed % 26 + (seed < 26 ? 'A' : 'a')));
+            seed = seed / 26 - 1;
+        } while (seed >= 0);
+        return name.toString();
+    }
+
+    /**
+     * covers writing and reading of manifests with all known utf characters.
+     *
+     * Because the implementation uses different portions of code depending on
+     * where the value occurs to read or write, each character is tested in
+     * each of the three positions:
    + *
  • main attribute header,
  • + *
  • named section name, and
  • + *
  • named sections header values
  • + *
+ * Implementation of writing the main section headers in + * {@link Attributes#writeMain(java.io.DataOutputStream)} differs from the + * one writing named section headers in + * {@link Attributes#write(java.io.DataOutputStream)} regarding the special + * order of {@link Name#MANIFEST_VERSION} and + * {@link Name#SIGNATURE_VERSION} and also + * {@link Manifest#read(java.io.InputStream)} reads main sections + * different than reading named sections names headers in + * {@link Attributes#read(Manifest.FastInputStream, byte[])}. + */ + @Test + public void testValueUtfEncoding() throws IOException { + Manifest mf = new Manifest(); + mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0"); + + List values = produceValidManifestUtfCharacterValues(); + for (int i = 0; i < values.size(); i++) { + String name = azName(i); + String value = values.get(i); + + mf.getMainAttributes().put(new Name(name), value); + Attributes attributes = new Attributes(); + mf.getEntries().put(value, attributes); + attributes.put(new Name(name), value); + } + + mf = writeAndRead(mf); + + for (int i = 0; i < values.size(); i++) { + String value = values.get(i); + String name = azName(i); + + assertEquals(mf.getMainAttributes().getValue(name), value, + "main attributes header value"); + Attributes attributes = mf.getAttributes(value); + assertNotNull(attributes, "named section not found"); + assertEquals(attributes.getValue(name), value, + "named section attributes value"); + } + } + + static Manifest writeAndRead(Manifest mf) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + mf.write(out); + byte[] mfBytes = out.toByteArray(); + + System.out.println("-------------------------------------------" + + "-----------------------------"); + System.out.print(new String(mfBytes, UTF_8)); + System.out.println("-------------------------------------------" + + "-----------------------------"); + + ByteArrayInputStream in = new ByteArrayInputStream(mfBytes); + return new Manifest(in); + } + +}