diff options
Diffstat (limited to 'depends/pack200/src/unpack.cpp')
| -rw-r--r-- | depends/pack200/src/unpack.cpp | 5105 |
1 files changed, 5105 insertions, 0 deletions
diff --git a/depends/pack200/src/unpack.cpp b/depends/pack200/src/unpack.cpp new file mode 100644 index 00000000..722d67b5 --- /dev/null +++ b/depends/pack200/src/unpack.cpp @@ -0,0 +1,5105 @@ +/* + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +// -*- C++ -*- +// Program for unpacking specially compressed Java packages. +// John R. Rose + +/* + * When compiling for a 64bit LP64 system (longs and pointers being 64bits), + * the printf format %ld is correct and use of %lld will cause warning + * errors from some compilers (gcc/g++). + * _LP64 can be explicitly set (used on Linux). + * Solaris compilers will define __sparcv9 or __x86_64 on 64bit compilations. + */ +#if defined(_LP64) || defined(__sparcv9) || defined(__x86_64) +#define LONG_LONG_FORMAT "%ld" +#define LONG_LONG_HEX_FORMAT "%lx" +#else +#define LONG_LONG_FORMAT "%lld" +#define LONG_LONG_HEX_FORMAT "%016llx" +#endif + +#include <sys/types.h> + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <stdarg.h> +#include <assert.h> +#include <limits.h> +#include <time.h> + +#include "defines.h" +#include "bytes.h" +#include "utils.h" +#include "coding.h" +#include "bands.h" + +#include "constants.h" + +#include "zip.h" + +#include "unpack.h" + +// tags, in canonical order: +static const byte TAGS_IN_ORDER[] = { + CONSTANT_Utf8, CONSTANT_Integer, CONSTANT_Float, CONSTANT_Long, + CONSTANT_Double, CONSTANT_String, CONSTANT_Class, CONSTANT_Signature, + CONSTANT_NameandType, CONSTANT_Fieldref, CONSTANT_Methodref, CONSTANT_InterfaceMethodref}; +#define N_TAGS_IN_ORDER (sizeof TAGS_IN_ORDER) + +// REQUESTED must be -2 for u2 and REQUESTED_LDC must be -1 for u1 +enum +{ + NOT_REQUESTED = 0, + REQUESTED = -2, + REQUESTED_LDC = -1 +}; + +#define NO_INORD ((uint) - 1) + +struct entry +{ + byte tag; + +#if 0 + byte bits; + enum { + //EB_EXTRA = 1, + EB_SUPER = 2 + }; +#endif + unsigned short nrefs; // pack w/ tag + + int outputIndex; + uint inord; // &cp.entries[cp.tag_base[this->tag]+this->inord] == this + + entry **refs; + + // put last to pack best + union + { + bytes b; + int i; + jlong l; + } value; + + void requestOutputIndex(cpool &cp, int req = REQUESTED); + int getOutputIndex() + { + assert(outputIndex > NOT_REQUESTED); + return outputIndex; + } + + entry *ref(int refnum) + { + assert((uint)refnum < nrefs); + return refs[refnum]; + } + + const char *utf8String() + { + assert(tagMatches(CONSTANT_Utf8)); + assert(value.b.len == strlen((const char *)value.b.ptr)); + return (const char *)value.b.ptr; + } + + entry *className() + { + assert(tagMatches(CONSTANT_Class)); + return ref(0); + } + + entry *memberClass() + { + assert(tagMatches(CONSTANT_Member)); + return ref(0); + } + + entry *memberDescr() + { + assert(tagMatches(CONSTANT_Member)); + return ref(1); + } + + entry *descrName() + { + assert(tagMatches(CONSTANT_NameandType)); + return ref(0); + } + + entry *descrType() + { + assert(tagMatches(CONSTANT_NameandType)); + return ref(1); + } + + int typeSize(); + + bytes &asUtf8(); + int asInteger() + { + assert(tag == CONSTANT_Integer); + return value.i; + } + + bool isUtf8(bytes &b) + { + return tagMatches(CONSTANT_Utf8) && value.b.equals(b); + } + + bool isDoubleWord() + { + return tag == CONSTANT_Double || tag == CONSTANT_Long; + } + + bool tagMatches(byte tag2) + { + return (tag2 == tag) || (tag2 == CONSTANT_Utf8 && tag == CONSTANT_Signature); + } +}; + +entry *cpindex::get(uint i) +{ + if (i >= len) + return nullptr; + else if (base1 != nullptr) + // primary index + return &base1[i]; + else + // secondary index + return base2[i]; +} + +inline bytes &entry::asUtf8() +{ + assert(tagMatches(CONSTANT_Utf8)); + return value.b; +} + +int entry::typeSize() +{ + assert(tagMatches(CONSTANT_Utf8)); + const char *sigp = (char *)value.b.ptr; + switch (*sigp) + { + case '(': + sigp++; + break; // skip opening '(' + case 'D': + case 'J': + return 2; // double field + default: + return 1; // field + } + int siglen = 0; + for (;;) + { + int ch = *sigp++; + switch (ch) + { + case 'D': + case 'J': + siglen += 1; + break; + case '[': + // Skip rest of array info. + while (ch == '[') + { + ch = *sigp++; + } + if (ch != 'L') + break; + // else fall through + case 'L': + sigp = strchr(sigp, ';'); + if (sigp == nullptr) + { + unpack_abort("bad data"); + return 0; + } + sigp += 1; + break; + case ')': // closing ')' + return siglen; + } + siglen += 1; + } +} + +inline cpindex *cpool::getFieldIndex(entry *classRef) +{ + assert(classRef->tagMatches(CONSTANT_Class)); + assert((uint)classRef->inord < (uint)tag_count[CONSTANT_Class]); + return &member_indexes[classRef->inord * 2 + 0]; +} +inline cpindex *cpool::getMethodIndex(entry *classRef) +{ + assert(classRef->tagMatches(CONSTANT_Class)); + assert((uint)classRef->inord < (uint)tag_count[CONSTANT_Class]); + return &member_indexes[classRef->inord * 2 + 1]; +} + +struct inner_class +{ + entry *inner; + entry *outer; + entry *name; + int flags; + inner_class *next_sibling; + bool requested; +}; + +// Here is where everything gets deallocated: +void unpacker::free() +{ + int i; + assert(infileptr == nullptr); // caller resp. + if (jarout != nullptr) + jarout->reset(); + if (gzin != nullptr) + { + gzin->free(); + gzin = nullptr; + } + if (free_input) + input.free(); + // free everybody ever allocated with U_NEW or (recently) with T_NEW + assert(smallbuf.base() == nullptr || mallocs.contains(smallbuf.base())); + assert(tsmallbuf.base() == nullptr || tmallocs.contains(tsmallbuf.base())); + mallocs.freeAll(); + tmallocs.freeAll(); + smallbuf.init(); + tsmallbuf.init(); + bcimap.free(); + class_fixup_type.free(); + class_fixup_offset.free(); + class_fixup_ref.free(); + code_fixup_type.free(); + code_fixup_offset.free(); + code_fixup_source.free(); + requested_ics.free(); + cur_classfile_head.free(); + cur_classfile_tail.free(); + for (i = 0; i < ATTR_CONTEXT_LIMIT; i++) + attr_defs[i].free(); + + // free CP state + cp.outputEntries.free(); + for (i = 0; i < CONSTANT_Limit; i++) + cp.tag_extras[i].free(); +} + +// input handling +// Attempts to advance rplimit so that (rplimit-rp) is at least 'more'. +// Will eagerly read ahead by larger chunks, if possible. +// Returns false if (rplimit-rp) is not at least 'more', +// unless rplimit hits input.limit(). +bool unpacker::ensure_input(jlong more) +{ + julong want = more - input_remaining(); + if ((jlong)want <= 0) + return true; // it's already in the buffer + if (rplimit == input.limit()) + return true; // not expecting any more + + if (read_input_fn == nullptr) + { + // assume it is already all there + bytes_read += input.limit() - rplimit; + rplimit = input.limit(); + return true; + } + CHECK_0; + + julong remaining = (input.limit() - rplimit); // how much left to read? + byte *rpgoal = (want >= remaining) ? input.limit() : rplimit + (size_t)want; + enum + { + CHUNK_SIZE = (1 << 14) + }; + julong fetch = want; + if (fetch < CHUNK_SIZE) + fetch = CHUNK_SIZE; + if (fetch > remaining * 3 / 4) + fetch = remaining; + // Try to fetch at least "more" bytes. + while ((jlong)fetch > 0) + { + jlong nr = (*read_input_fn)(this, rplimit, fetch, remaining); + if (nr <= 0) + { + return (rplimit >= rpgoal); + } + remaining -= nr; + rplimit += nr; + fetch -= nr; + bytes_read += nr; + assert(remaining == (julong)(input.limit() - rplimit)); + } + return true; +} + +// output handling + +fillbytes *unpacker::close_output(fillbytes *which) +{ + assert(wp != nullptr); + if (which == nullptr) + { + if (wpbase == cur_classfile_head.base()) + { + which = &cur_classfile_head; + } + else + { + which = &cur_classfile_tail; + } + } + assert(wpbase == which->base()); + assert(wplimit == which->end()); + which->setLimit(wp); + wp = nullptr; + wplimit = nullptr; + // wpbase = nullptr; + return which; +} + +// maybe_inline +void unpacker::ensure_put_space(size_t size) +{ + if (wp + size <= wplimit) + return; + // Determine which segment needs expanding. + fillbytes *which = close_output(); + byte *wp0 = which->grow(size); + wpbase = which->base(); + wplimit = which->end(); + wp = wp0; +} + +byte *unpacker::put_space(size_t size) +{ + byte *wp0 = wp; + byte *wp1 = wp0 + size; + if (wp1 > wplimit) + { + ensure_put_space(size); + wp0 = wp; + wp1 = wp0 + size; + } + wp = wp1; + return wp0; +} + +void unpacker::putu2_at(byte *wp, int n) +{ + if (n != (unsigned short)n) + { + unpack_abort(ERROR_OVERFLOW); + return; + } + wp[0] = (n) >> 8; + wp[1] = (n) >> 0; +} + +void unpacker::putu4_at(byte *wp, int n) +{ + wp[0] = (n) >> 24; + wp[1] = (n) >> 16; + wp[2] = (n) >> 8; + wp[3] = (n) >> 0; +} + +void unpacker::putu8_at(byte *wp, jlong n) +{ + putu4_at(wp + 0, (int)((julong)n >> 32)); + putu4_at(wp + 4, (int)((julong)n >> 0)); +} + +void unpacker::putu2(int n) +{ + putu2_at(put_space(2), n); +} + +void unpacker::putu4(int n) +{ + putu4_at(put_space(4), n); +} + +void unpacker::putu8(jlong n) +{ + putu8_at(put_space(8), n); +} + +int unpacker::putref_index(entry *e, int size) +{ + if (e == nullptr) + return 0; + else if (e->outputIndex > NOT_REQUESTED) + return e->outputIndex; + else if (e->tag == CONSTANT_Signature) + return putref_index(e->ref(0), size); + else + { + e->requestOutputIndex(cp, -size); + // Later on we'll fix the bits. + class_fixup_type.addByte(size); + class_fixup_offset.add((int)wpoffset()); + class_fixup_ref.add(e); + return 0; + } +} + +void unpacker::putref(entry *e) +{ + int oidx = putref_index(e, 2); + putu2_at(put_space(2), oidx); +} + +void unpacker::putu1ref(entry *e) +{ + int oidx = putref_index(e, 1); + putu1_at(put_space(1), oidx); +} + +static int total_cp_size[] = {0, 0}; +static int largest_cp_ref[] = {0, 0}; +static int hash_probes[] = {0, 0}; + +// Allocation of small and large blocks. + +enum +{ + CHUNK = (1 << 14), + SMALL = (1 << 9) +}; + +// Call malloc. Try to combine small blocks and free much later. +void *unpacker::alloc_heap(size_t size, bool smallOK, bool temp) +{ + if (!smallOK || size > SMALL) + { + void *res = must_malloc((int)size); + (temp ? &tmallocs : &mallocs)->add(res); + return res; + } + fillbytes &xsmallbuf = *(temp ? &tsmallbuf : &smallbuf); + if (!xsmallbuf.canAppend(size + 1)) + { + xsmallbuf.init(CHUNK); + (temp ? &tmallocs : &mallocs)->add(xsmallbuf.base()); + } + int growBy = (int)size; + growBy += -growBy & 7; // round up mod 8 + return xsmallbuf.grow(growBy); +} + +void unpacker::saveTo(bytes &b, byte *ptr, size_t len) +{ + b.ptr = U_NEW(byte, add_size(len, 1)); + if (aborting()) + { + b.len = 0; + return; + } + b.len = len; + b.copyFrom(ptr, len); +} + +// Read up through band_headers. +// Do the archive_size dance to set the size of the input mega-buffer. +void unpacker::read_file_header() +{ + // Read file header to determine file type and total size. + enum + { + MAGIC_BYTES = 4, + AH_LENGTH_0 = 3, // minver, majver, options are outside of archive_size + AH_LENGTH_0_MAX = AH_LENGTH_0 + 1, // options might have 2 bytes + AH_LENGTH = 26, // maximum archive header length (w/ all fields) + // Length contributions from optional header fields: + AH_FILE_HEADER_LEN = 5, // sizehi/lo/next/modtime/files + AH_ARCHIVE_SIZE_LEN = 2, // sizehi/lo only; part of AH_FILE_HEADER_LEN + AH_CP_NUMBER_LEN = 4, // int/float/long/double + AH_SPECIAL_FORMAT_LEN = 2, // layouts/band-headers + AH_LENGTH_MIN = + AH_LENGTH - (AH_FILE_HEADER_LEN + AH_SPECIAL_FORMAT_LEN + AH_CP_NUMBER_LEN), + ARCHIVE_SIZE_MIN = AH_LENGTH_MIN - (AH_LENGTH_0 + AH_ARCHIVE_SIZE_LEN), + FIRST_READ = MAGIC_BYTES + AH_LENGTH_MIN + }; + + assert(AH_LENGTH_MIN == 15); // # of UNSIGNED5 fields required after archive_magic + assert(ARCHIVE_SIZE_MIN == 10); // # of UNSIGNED5 fields required after archive_size + // An absolute minimum nullptr archive is magic[4], {minver,majver,options}[3], + // archive_size[0], cp_counts[8], class_counts[4], for a total of 19 bytes. + // (Note that archive_size is optional; it may be 0..10 bytes in length.) + // The first read must capture everything up through the options field. + // This happens to work even if {minver,majver,options} is a pathological + // 15 bytes long. Legal pack files limit those three fields to 1+1+2 bytes. + assert(FIRST_READ >= MAGIC_BYTES + AH_LENGTH_0 * B_MAX); + + // Up through archive_size, the largest possible archive header is + // magic[4], {minver,majver,options}[4], archive_size[10]. + // (Note only the low 12 bits of options are allowed to be non-zero.) + // In order to parse archive_size, we need at least this many bytes + // in the first read. Of course, if archive_size_hi is more than + // a byte, we probably will fail to allocate the buffer, since it + // will be many gigabytes long. This is a practical, not an + // architectural limit to Pack200 archive sizes. + assert(FIRST_READ >= MAGIC_BYTES + AH_LENGTH_0_MAX + 2 * B_MAX); + + bool foreign_buf = (read_input_fn == nullptr); + byte initbuf[(int)FIRST_READ + (int)C_SLOP + 200]; // 200 is for JAR I/O + if (foreign_buf) + { + // inbytes is all there is + input.set(inbytes); + rp = input.base(); + rplimit = input.limit(); + } + else + { + // inbytes, if not empty, contains some read-ahead we must use first + // ensure_input will take care of copying it into initbuf, + // then querying read_input_fn for any additional data needed. + // However, the caller must assume that we use up all of inbytes. + // There is no way to tell the caller that we used only part of them. + // Therefore, the caller must use only a bare minimum of read-ahead. + if (inbytes.len > FIRST_READ) + { + abort("too much read-ahead"); + return; + } + input.set(initbuf, sizeof(initbuf)); + input.b.clear(); + input.b.copyFrom(inbytes); + rplimit = rp = input.base(); + rplimit += inbytes.len; + bytes_read += inbytes.len; + } + // Read only 19 bytes, which is certain to contain #archive_options fields, + // but is certain not to overflow past the archive_header. + input.b.len = FIRST_READ; + if (!ensure_input(FIRST_READ)) + abort("EOF reading archive magic number"); + + if (rp[0] == 'P' && rp[1] == 'K') + { + // In the Unix-style program, we simply simulate a copy command. + // Copy until EOF; assume the JAR file is the last segment. + fprintf(stderr, "Copy-mode.\n"); + for (;;) + { + jarout->write_data(rp, (int)input_remaining()); + if (foreign_buf) + break; // one-time use of a passed in buffer + if (input.size() < CHUNK) + { + // Get some breathing room. + input.set(U_NEW(byte, (size_t)CHUNK + C_SLOP), (size_t)CHUNK); + CHECK; + } + rp = rplimit = input.base(); + if (!ensure_input(1)) + break; + } + jarout->closeJarFile(false); + return; + } + + // Read the magic number. + magic = 0; + for (int i1 = 0; i1 < (int)sizeof(magic); i1++) + { + magic <<= 8; + magic += (*rp++ & 0xFF); + } + + // Read the first 3 values from the header. + value_stream hdr; + int hdrVals = 0; + int hdrValsSkipped = 0; // debug only + hdr.init(rp, rplimit, UNSIGNED5_spec); + minver = hdr.getInt(); + majver = hdr.getInt(); + hdrVals += 2; + + if (magic != (int)JAVA_PACKAGE_MAGIC || + (majver != JAVA5_PACKAGE_MAJOR_VERSION && majver != JAVA6_PACKAGE_MAJOR_VERSION) || + (minver != JAVA5_PACKAGE_MINOR_VERSION && minver != JAVA6_PACKAGE_MINOR_VERSION)) + { + char message[200]; + sprintf(message, "@" ERROR_FORMAT ": magic/ver = " + "%08X/%d.%d should be %08X/%d.%d OR %08X/%d.%d\n", + magic, majver, minver, JAVA_PACKAGE_MAGIC, JAVA5_PACKAGE_MAJOR_VERSION, + JAVA5_PACKAGE_MINOR_VERSION, JAVA_PACKAGE_MAGIC, JAVA6_PACKAGE_MAJOR_VERSION, + JAVA6_PACKAGE_MINOR_VERSION); + abort(message); + } + CHECK; + + archive_options = hdr.getInt(); + hdrVals += 1; + assert(hdrVals == AH_LENGTH_0); // first three fields only + +#define ORBIT(bit) | (bit) + int OPTION_LIMIT = (0 ARCHIVE_BIT_DO(ORBIT)); +#undef ORBIT + if ((archive_options & ~OPTION_LIMIT) != 0) + { + fprintf(stderr, "Warning: Illegal archive options 0x%x\n", archive_options); + abort("illegal archive options"); + return; + } + + if ((archive_options & AO_HAVE_FILE_HEADERS) != 0) + { + uint hi = hdr.getInt(); + uint lo = hdr.getInt(); + julong x = band::makeLong(hi, lo); + archive_size = (size_t)x; + if (archive_size != x) + { + // Silly size specified; force overflow. + archive_size = PSIZE_MAX + 1; + } + hdrVals += 2; + } + else + { + hdrValsSkipped += 2; + } + + // Now we can size the whole archive. + // Read everything else into a mega-buffer. + rp = hdr.rp; + int header_size_0 = (int)(rp - input.base()); // used-up header (4byte + 3int) + int header_size_1 = (int)(rplimit - rp); // buffered unused initial fragment + int header_size = header_size_0 + header_size_1; + unsized_bytes_read = header_size_0; + CHECK; + if (foreign_buf) + { + if (archive_size > (size_t)header_size_1) + { + abort("EOF reading fixed input buffer"); + return; + } + } + else if (archive_size != 0) + { + if (archive_size < ARCHIVE_SIZE_MIN) + { + abort("impossible archive size"); // bad input data + return; + } + if (archive_size < header_size_1) + { + abort("too much read-ahead"); // somehow we pre-fetched too much? + return; + } + input.set(U_NEW(byte, add_size(header_size_0, archive_size, C_SLOP)), + (size_t)header_size_0 + archive_size); + CHECK; + assert(input.limit()[0] == 0); + // Move all the bytes we read initially into the real buffer. + input.b.copyFrom(initbuf, header_size); + rp = input.b.ptr + header_size_0; + rplimit = input.b.ptr + header_size; + } + else + { + // It's more complicated and painful. + // A zero archive_size means that we must read until EOF. + input.init(CHUNK * 2); + CHECK; + input.b.len = input.allocated; + rp = rplimit = input.base(); + // Set up input buffer as if we already read the header: + input.b.copyFrom(initbuf, header_size); + CHECK; + rplimit += header_size; + while (ensure_input(input.limit() - rp)) + { + size_t dataSoFar = input_remaining(); + size_t nextSize = add_size(dataSoFar, CHUNK); + input.ensureSize(nextSize); + CHECK; + input.b.len = input.allocated; + rp = rplimit = input.base(); + rplimit += dataSoFar; + } + size_t dataSize = (rplimit - input.base()); + input.b.len = dataSize; + input.grow(C_SLOP); + CHECK; + free_input = true; // free it later + input.b.len = dataSize; + assert(input.limit()[0] == 0); + rp = rplimit = input.base(); + rplimit += dataSize; + rp += header_size_0; // already scanned these bytes... + } + live_input = true; // mark as "do not reuse" + if (aborting()) + { + abort("cannot allocate large input buffer for package file"); + return; + } + + // read the rest of the header fields + ensure_input((AH_LENGTH - AH_LENGTH_0) * B_MAX); + CHECK; + hdr.rp = rp; + hdr.rplimit = rplimit; + + if ((archive_options & AO_HAVE_FILE_HEADERS) != 0) + { + archive_next_count = hdr.getInt(); + CHECK_COUNT(archive_next_count); + archive_modtime = hdr.getInt(); + file_count = hdr.getInt(); + CHECK_COUNT(file_count); + hdrVals += 3; + } + else + { + hdrValsSkipped += 3; + } + + if ((archive_options & AO_HAVE_SPECIAL_FORMATS) != 0) + { + band_headers_size = hdr.getInt(); + CHECK_COUNT(band_headers_size); + attr_definition_count = hdr.getInt(); + CHECK_COUNT(attr_definition_count); + hdrVals += 2; + } + else + { + hdrValsSkipped += 2; + } + + int cp_counts[N_TAGS_IN_ORDER]; + for (int k = 0; k < (int)N_TAGS_IN_ORDER; k++) + { + if (!(archive_options & AO_HAVE_CP_NUMBERS)) + { + switch (TAGS_IN_ORDER[k]) + { + case CONSTANT_Integer: + case CONSTANT_Float: + case CONSTANT_Long: + case CONSTANT_Double: + cp_counts[k] = 0; + hdrValsSkipped += 1; + continue; + } + } + cp_counts[k] = hdr.getInt(); + CHECK_COUNT(cp_counts[k]); + hdrVals += 1; + } + + ic_count = hdr.getInt(); + CHECK_COUNT(ic_count); + default_class_minver = hdr.getInt(); + default_class_majver = hdr.getInt(); + class_count = hdr.getInt(); + CHECK_COUNT(class_count); + hdrVals += 4; + + // done with archive_header + hdrVals += hdrValsSkipped; + assert(hdrVals == AH_LENGTH); + + rp = hdr.rp; + if (rp > rplimit) + abort("EOF reading archive header"); + + // Now size the CP. + cp.init(this, cp_counts); + CHECK; + + default_file_modtime = archive_modtime; + if (default_file_modtime == 0 && !(archive_options & AO_HAVE_FILE_MODTIME)) + default_file_modtime = DEFAULT_ARCHIVE_MODTIME; // taken from driver + if ((archive_options & AO_DEFLATE_HINT) != 0) + default_file_options |= FO_DEFLATE_HINT; + + // meta-bytes, if any, immediately follow archive header + // band_headers.readData(band_headers_size); + ensure_input(band_headers_size); + if (input_remaining() < (size_t)band_headers_size) + { + abort("EOF reading band headers"); + return; + } + bytes band_headers; + // The "1+" allows an initial byte to be pushed on the front. + band_headers.set(1 + U_NEW(byte, 1 + band_headers_size + C_SLOP), band_headers_size); + CHECK; + // Start scanning band headers here: + band_headers.copyFrom(rp, band_headers.len); + rp += band_headers.len; + assert(rp <= rplimit); + meta_rp = band_headers.ptr; + // Put evil meta-codes at the end of the band headers, + // so we are sure to throw an error if we run off the end. + bytes::of(band_headers.limit(), C_SLOP).clear(_meta_error); +} + +void unpacker::finish() +{ + if (verbose >= 1) + { + fprintf(stderr, "A total of " LONG_LONG_FORMAT " bytes were read in %d segment(s).\n", + (bytes_read_before_reset + bytes_read), segments_read_before_reset + 1); + fprintf(stderr, "A total of " LONG_LONG_FORMAT " file content bytes were written.\n", + (bytes_written_before_reset + bytes_written)); + fprintf(stderr, + "A total of %d files (of which %d are classes) were written to output.\n", + files_written_before_reset + files_written, + classes_written_before_reset + classes_written); + } + if (jarout != nullptr) + jarout->closeJarFile(true); +} + +// Cf. PackageReader.readConstantPoolCounts +void cpool::init(unpacker *u_, int counts[NUM_COUNTS]) +{ + this->u = u_; + + // Fill-pointer for CP. + int next_entry = 0; + + // Size the constant pool: + for (int k = 0; k < (int)N_TAGS_IN_ORDER; k++) + { + byte tag = TAGS_IN_ORDER[k]; + int len = counts[k]; + tag_count[tag] = len; + tag_base[tag] = next_entry; + next_entry += len; + // Detect and defend against constant pool size overflow. + // (Pack200 forbids the sum of CP counts to exceed 2^29-1.) + enum + { + CP_SIZE_LIMIT = (1 << 29), + IMPLICIT_ENTRY_COUNT = 1 // empty Utf8 string + }; + if (len >= (1 << 29) || len < 0 || next_entry >= CP_SIZE_LIMIT + IMPLICIT_ENTRY_COUNT) + { + abort("archive too large: constant pool limit exceeded"); + return; + } + } + + // Close off the end of the CP: + nentries = next_entry; + + // place a limit on future CP growth: + int generous = 0; + generous = add_size(generous, u->ic_count); // implicit name + generous = add_size(generous, u->ic_count); // outer + generous = add_size(generous, u->ic_count); // outer.utf8 + generous = add_size(generous, 40); // WKUs, misc + generous = add_size(generous, u->class_count); // implicit SourceFile strings + maxentries = add_size(nentries, generous); + + // Note that this CP does not include "empty" entries + // for longs and doubles. Those are introduced when + // the entries are renumbered for classfile output. + + entries = U_NEW(entry, maxentries); + CHECK; + + first_extra_entry = &entries[nentries]; + + // Initialize the standard indexes. + tag_count[CONSTANT_All] = nentries; + tag_base[CONSTANT_All] = 0; + for (int tag = 0; tag < CONSTANT_Limit; tag++) + { + entry *cpMap = &entries[tag_base[tag]]; + tag_index[tag].init(tag_count[tag], cpMap, tag); + } + + // Initialize hashTab to a generous power-of-two size. + uint pow2 = 1; + uint target = maxentries + maxentries / 2; // 60% full + while (pow2 < target) + pow2 <<= 1; + hashTab = U_NEW(entry *, hashTabLength = pow2); +} + +static byte *store_Utf8_char(byte *cp, unsigned short ch) +{ + if (ch >= 0x001 && ch <= 0x007F) + { + *cp++ = (byte)ch; + } + else if (ch <= 0x07FF) + { + *cp++ = (byte)(0xC0 | ((ch >> 6) & 0x1F)); + *cp++ = (byte)(0x80 | ((ch >> 0) & 0x3F)); + } + else + { + *cp++ = (byte)(0xE0 | ((ch >> 12) & 0x0F)); + *cp++ = (byte)(0x80 | ((ch >> 6) & 0x3F)); + *cp++ = (byte)(0x80 | ((ch >> 0) & 0x3F)); + } + return cp; +} + +static byte *skip_Utf8_chars(byte *cp, int len) +{ + for (;; cp++) + { + int ch = *cp & 0xFF; + if ((ch & 0xC0) != 0x80) + { + if (len-- == 0) + return cp; + if (ch < 0x80 && len == 0) + return cp + 1; + } + } +} + +static int compare_Utf8_chars(bytes &b1, bytes &b2) +{ + int l1 = (int)b1.len; + int l2 = (int)b2.len; + int l0 = (l1 < l2) ? l1 : l2; + byte *p1 = b1.ptr; + byte *p2 = b2.ptr; + int c0 = 0; + for (int i = 0; i < l0; i++) + { + int c1 = p1[i] & 0xFF; + int c2 = p2[i] & 0xFF; + if (c1 != c2) + { + // Before returning the obvious answer, + // check to see if c1 or c2 is part of a 0x0000, + // which encodes as {0xC0,0x80}. The 0x0000 is the + // lowest-sorting Java char value, and yet it encodes + // as if it were the first char after 0x7F, which causes + // strings containing nulls to sort too high. All other + // comparisons are consistent between Utf8 and Java chars. + if (c1 == 0xC0 && (p1[i + 1] & 0xFF) == 0x80) + c1 = 0; + if (c2 == 0xC0 && (p2[i + 1] & 0xFF) == 0x80) + c2 = 0; + if (c0 == 0xC0) + { + assert(((c1 | c2) & 0xC0) == 0x80); // c1 & c2 are extension chars + if (c1 == 0x80) + c1 = 0; // will sort below c2 + if (c2 == 0x80) + c2 = 0; // will sort below c1 + } + return c1 - c2; + } + c0 = c1; // save away previous char + } + // common prefix is identical; return length difference if any + return l1 - l2; +} + +// Cf. PackageReader.readUtf8Bands +void unpacker::read_Utf8_values(entry *cpMap, int len) +{ + // Implicit first Utf8 string is the empty string. + enum + { + // certain bands begin with implicit zeroes + PREFIX_SKIP_2 = 2, + SUFFIX_SKIP_1 = 1 + }; + + int i; + + // First band: Read lengths of shared prefixes. + if (len > PREFIX_SKIP_2) + cp_Utf8_prefix.readData(len - PREFIX_SKIP_2); + + // Second band: Read lengths of unshared suffixes: + if (len > SUFFIX_SKIP_1) + cp_Utf8_suffix.readData(len - SUFFIX_SKIP_1); + + bytes *allsuffixes = T_NEW(bytes, len); + CHECK; + + int nbigsuf = 0; + fillbytes charbuf; // buffer to allocate small strings + charbuf.init(); + + // Third band: Read the char values in the unshared suffixes: + cp_Utf8_chars.readData(cp_Utf8_suffix.getIntTotal()); + for (i = 0; i < len; i++) + { + int suffix = (i < SUFFIX_SKIP_1) ? 0 : cp_Utf8_suffix.getInt(); + if (suffix < 0) + { + abort("bad utf8 suffix"); + return; + } + if (suffix == 0 && i >= SUFFIX_SKIP_1) + { + // chars are packed in cp_Utf8_big_chars + nbigsuf += 1; + continue; + } + bytes &chars = allsuffixes[i]; + uint size3 = suffix * 3; // max Utf8 length + bool isMalloc = (suffix > SMALL); + if (isMalloc) + { + chars.malloc(size3); + } + else + { + if (!charbuf.canAppend(size3 + 1)) + { + assert(charbuf.allocated == 0 || tmallocs.contains(charbuf.base())); + charbuf.init(CHUNK); // Reset to new buffer. + tmallocs.add(charbuf.base()); + } + chars.set(charbuf.grow(size3 + 1), size3); + } + CHECK; + byte *chp = chars.ptr; + for (int j = 0; j < suffix; j++) + { + unsigned short ch = cp_Utf8_chars.getInt(); + chp = store_Utf8_char(chp, ch); + } + // shrink to fit: + if (isMalloc) + { + chars.realloc(chp - chars.ptr); + CHECK; + tmallocs.add(chars.ptr); // free it later + } + else + { + int shrink = (int)(chars.limit() - chp); + chars.len -= shrink; + charbuf.b.len -= shrink; // ungrow to reclaim buffer space + // Note that we did not reclaim the final '\0'. + assert(chars.limit() == charbuf.limit() - 1); + assert(strlen((char *)chars.ptr) == chars.len); + } + } + // cp_Utf8_chars.done(); + + // Fourth band: Go back and size the specially packed strings. + int maxlen = 0; + cp_Utf8_big_suffix.readData(nbigsuf); + cp_Utf8_suffix.rewind(); + for (i = 0; i < len; i++) + { + int suffix = (i < SUFFIX_SKIP_1) ? 0 : cp_Utf8_suffix.getInt(); + int prefix = (i < PREFIX_SKIP_2) ? 0 : cp_Utf8_prefix.getInt(); + if (prefix < 0 || prefix + suffix < 0) + { + abort("bad utf8 prefix"); + return; + } + bytes &chars = allsuffixes[i]; + if (suffix == 0 && i >= SUFFIX_SKIP_1) + { + suffix = cp_Utf8_big_suffix.getInt(); + assert(chars.ptr == nullptr); + chars.len = suffix; // just a momentary hack + } + else + { + assert(chars.ptr != nullptr); + } + if (maxlen < prefix + suffix) + { + maxlen = prefix + suffix; + } + } + // cp_Utf8_suffix.done(); // will use allsuffixes[i].len (ptr!=nullptr) + // cp_Utf8_big_suffix.done(); // will use allsuffixes[i].len + + // Fifth band(s): Get the specially packed characters. + cp_Utf8_big_suffix.rewind(); + for (i = 0; i < len; i++) + { + bytes &chars = allsuffixes[i]; + if (chars.ptr != nullptr) + continue; // already input + int suffix = (int)chars.len; // pick up the hack + uint size3 = suffix * 3; + if (suffix == 0) + continue; // done with empty string + chars.malloc(size3); + byte *chp = chars.ptr; + band saved_band = cp_Utf8_big_chars; + cp_Utf8_big_chars.readData(suffix); + for (int j = 0; j < suffix; j++) + { + unsigned short ch = cp_Utf8_big_chars.getInt(); + chp = store_Utf8_char(chp, ch); + } + chars.realloc(chp - chars.ptr); + CHECK; + tmallocs.add(chars.ptr); // free it later + // cp_Utf8_big_chars.done(); + cp_Utf8_big_chars = saved_band; // reset the band for the next string + } + cp_Utf8_big_chars.readData(0); // zero chars + // cp_Utf8_big_chars.done(); + + // Finally, sew together all the prefixes and suffixes. + bytes bigbuf; + bigbuf.malloc(maxlen * 3 + 1); // max Utf8 length, plus slop for nullptr + CHECK; + int prevlen = 0; // previous string length (in chars) + tmallocs.add(bigbuf.ptr); // free after this block + cp_Utf8_prefix.rewind(); + for (i = 0; i < len; i++) + { + bytes &chars = allsuffixes[i]; + int prefix = (i < PREFIX_SKIP_2) ? 0 : cp_Utf8_prefix.getInt(); + int suffix = (int)chars.len; + byte *fillp; + // by induction, the buffer is already filled with the prefix + // make sure the prefix value is not corrupted, though: + if (prefix > prevlen) + { + abort("utf8 prefix overflow"); + return; + } + fillp = skip_Utf8_chars(bigbuf.ptr, prefix); + // copy the suffix into the same buffer: + fillp = chars.writeTo(fillp); + assert(bigbuf.inBounds(fillp)); + *fillp = 0; // bigbuf must contain a well-formed Utf8 string + int length = (int)(fillp - bigbuf.ptr); + bytes &value = cpMap[i].value.b; + value.set(U_NEW(byte, add_size(length, 1)), length); + value.copyFrom(bigbuf.ptr, length); + CHECK; + // Index all Utf8 strings + entry *&htref = cp.hashTabRef(CONSTANT_Utf8, value); + if (htref == nullptr) + { + // Note that if two identical strings are transmitted, + / |
