2 * Copyright (C) 2010 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package com.threerings.getdown.util;
19 import static java.nio.charset.StandardCharsets.US_ASCII;
22 * Utilities for encoding and decoding the Base64 representation of
23 * binary data. See RFCs <a
24 * href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a
25 * href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>.
29 * Default values for encoder/decoder flags.
31 public static final int DEFAULT = 0;
34 * Encoder flag bit to omit the padding '=' characters at the end
35 * of the output (if any).
37 public static final int NO_PADDING = 1;
40 * Encoder flag bit to omit all line terminators (i.e., the output
41 * will be on one long line).
43 public static final int NO_WRAP = 2;
46 * Encoder flag bit to indicate lines should be terminated with a
47 * CRLF pair instead of just an LF. Has no effect if {@code
48 * NO_WRAP} is specified as well.
50 public static final int CRLF = 4;
53 * Encoder/decoder flag bit to indicate using the "URL and
54 * filename safe" variant of Base64 (see RFC 3548 section 4) where
55 * {@code -} and {@code _} are used in place of {@code +} and
58 public static final int URL_SAFE = 8;
61 * Flag to pass to {@code Base64OutputStream} to indicate that it
62 * should not close the output stream it is wrapping when it
65 public static final int NO_CLOSE = 16;
67 // --------------------------------------------------------
69 // --------------------------------------------------------
71 /* package */ static abstract class Coder {
76 * Encode/decode another block of input data. this.output is
77 * provided by the caller, and must be big enough to hold all
78 * the coded data. On exit, this.opwill be set to the length
81 * @param finish true if this is the final call to process for
82 * this object. Will finalize the coder state and
83 * include any final bytes in the output.
85 * @return true if the input so far is good; false if some
86 * error has been detected in the input stream..
88 public abstract boolean process(byte[] input, int offset, int len, boolean finish);
91 * @return the maximum number of bytes a call to process()
92 * could produce for the given number of input bytes. This may
95 public abstract int maxOutputSize(int len);
98 // --------------------------------------------------------
100 // --------------------------------------------------------
103 * Decode the Base64-encoded data in input and return the data in
106 * <p>The padding '=' characters at the end are considered optional, but
107 * if any are present, there must be the correct number of them.
109 * @param str the input String to decode, which is converted to
111 * @param flags controls certain features of the decoded output.
112 * Pass {@code DEFAULT} to decode standard Base64.
114 * @throws IllegalArgumentException if the input contains
117 public static byte[] decode(String str, int flags) {
118 return decode(str.getBytes(US_ASCII), flags);
122 * Decode the Base64-encoded data in input and return the data in
125 * <p>The padding '=' characters at the end are considered optional, but
126 * if any are present, there must be the correct number of them.
128 * @param input the input array to decode
129 * @param flags controls certain features of the decoded output.
130 * Pass {@code DEFAULT} to decode standard Base64.
132 * @throws IllegalArgumentException if the input contains
135 public static byte[] decode(byte[] input, int flags) {
136 return decode(input, 0, input.length, flags);
140 * Decode the Base64-encoded data in input and return the data in
143 * <p>The padding '=' characters at the end are considered optional, but
144 * if any are present, there must be the correct number of them.
146 * @param input the data to decode
147 * @param offset the position within the input array at which to start
148 * @param len the number of bytes of input to decode
149 * @param flags controls certain features of the decoded output.
150 * Pass {@code DEFAULT} to decode standard Base64.
152 * @throws IllegalArgumentException if the input contains
155 public static byte[] decode(byte[] input, int offset, int len, int flags) {
156 // Allocate space for the most data the input could represent.
157 // (It could contain less if it contains whitespace, etc.)
158 Decoder decoder = new Decoder(flags, new byte[len*3/4]);
160 if (!decoder.process(input, offset, len, true)) {
161 throw new IllegalArgumentException("bad base-64");
164 // Maybe we got lucky and allocated exactly enough output space.
165 if (decoder.op == decoder.output.length) {
166 return decoder.output;
169 // Need to shorten the array, so allocate a new one of the
170 // right size and copy.
171 byte[] temp = new byte[decoder.op];
172 System.arraycopy(decoder.output, 0, temp, 0, decoder.op);
176 /* package */ static class Decoder extends Coder {
178 * Lookup table for turning bytes into their position in the
181 private static final int DECODE[] = {
182 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
183 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
184 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
185 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
186 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
187 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
188 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
189 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
190 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
191 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
192 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
193 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
194 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
195 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
196 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
197 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
201 * Decode lookup table for the "web safe" variant (RFC 3548
202 * sec. 4) where - and _ replace + and /.
204 private static final int DECODE_WEBSAFE[] = {
205 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
206 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
207 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1,
208 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
209 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
210 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
211 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
212 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
213 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
214 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
215 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
216 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
217 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
218 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
219 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
220 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
223 /** Non-data values in the DECODE arrays. */
224 private static final int SKIP = -1;
225 private static final int EQUALS = -2;
228 * States 0-3 are reading through the next input tuple.
229 * State 4 is having read one '=' and expecting exactly
231 * State 5 is expecting no more data or padding characters
233 * State 6 is the error state; an error has been detected
234 * in the input and no future input can "fix" it.
236 private int state; // state number (0 to 6)
239 final private int[] alphabet;
241 public Decoder(int flags, byte[] output) {
242 this.output = output;
244 alphabet = ((flags & URL_SAFE) == 0) ? DECODE : DECODE_WEBSAFE;
250 * @return an overestimate for the number of bytes {@code
251 * len} bytes could decode to.
253 public int maxOutputSize(int len) {
254 return len * 3/4 + 10;
258 * Decode another block of input data.
260 * @return true if the state machine is still healthy. false if
261 * bad base-64 data has been detected in the input stream.
263 public boolean process(byte[] input, int offset, int len, boolean finish) {
264 if (this.state == 6) return false;
269 // Using local variables makes the decoder about 12%
270 // faster than if we manipulate the member variables in
271 // the loop. (Even alphabet makes a measurable
272 // difference, which is somewhat surprising to me since
273 // the member variable is final.)
274 int state = this.state;
275 int value = this.value;
277 final byte[] output = this.output;
278 final int[] alphabet = this.alphabet;
281 // Try the fast path: we're starting a new tuple and the
282 // next four bytes of the input stream are all data
283 // bytes. This corresponds to going through states
284 // 0-1-2-3-0. We expect to use this method for most of
287 // If any of the next four bytes of input are non-data
288 // (whitespace, etc.), value will end up negative. (All
289 // the non-data values in decode are small negative
290 // numbers, so shifting any of them up and or'ing them
291 // together will result in a value with its top bit set.)
293 // You can remove this whole block and the output should
294 // be the same, just slower.
297 (value = ((alphabet[input[p] & 0xff] << 18) |
298 (alphabet[input[p+1] & 0xff] << 12) |
299 (alphabet[input[p+2] & 0xff] << 6) |
300 (alphabet[input[p+3] & 0xff]))) >= 0) {
301 output[op+2] = (byte) value;
302 output[op+1] = (byte) (value >> 8);
303 output[op] = (byte) (value >> 16);
310 // The fast path isn't available -- either we've read a
311 // partial tuple, or the next four input bytes aren't all
312 // data, or whatever. Fall back to the slower state
313 // machine implementation.
315 int d = alphabet[input[p++] & 0xff];
322 } else if (d != SKIP) {
330 value = (value << 6) | d;
332 } else if (d != SKIP) {
340 value = (value << 6) | d;
342 } else if (d == EQUALS) {
343 // Emit the last (partial) output tuple;
344 // expect exactly one more padding character.
345 output[op++] = (byte) (value >> 4);
347 } else if (d != SKIP) {
355 // Emit the output triple and return to state 0.
356 value = (value << 6) | d;
357 output[op+2] = (byte) value;
358 output[op+1] = (byte) (value >> 8);
359 output[op] = (byte) (value >> 16);
362 } else if (d == EQUALS) {
363 // Emit the last (partial) output tuple;
364 // expect no further data or padding characters.
365 output[op+1] = (byte) (value >> 2);
366 output[op] = (byte) (value >> 10);
369 } else if (d != SKIP) {
378 } else if (d != SKIP) {
394 // We're out of input, but a future call could provide
402 // Done reading input. Now figure out where we are left in
403 // the state machine and finish up.
407 // Output length is a multiple of three. Fine.
410 // Read one extra input byte, which isn't enough to
411 // make another output byte. Illegal.
415 // Read two extra input bytes, enough to emit 1 more
416 // output byte. Fine.
417 output[op++] = (byte) (value >> 4);
420 // Read three extra input bytes, enough to emit 2 more
421 // output bytes. Fine.
422 output[op++] = (byte) (value >> 10);
423 output[op++] = (byte) (value >> 2);
426 // Read one padding '=' when we expected 2. Illegal.
430 // Read all the padding '='s we expected and no more.
441 // --------------------------------------------------------
443 // --------------------------------------------------------
446 * Base64-encode the given data and return a newly allocated
447 * String with the result.
449 * @param input the data to encode
450 * @param flags controls certain features of the encoded output.
451 * Passing {@code DEFAULT} results in output that
452 * adheres to RFC 2045.
454 public static String encodeToString(byte[] input, int flags) {
455 return new String(encode(input, flags), US_ASCII);
459 * Base64-encode the given data and return a newly allocated
460 * String with the result.
462 * @param input the data to encode
463 * @param offset the position within the input array at which to
465 * @param len the number of bytes of input to encode
466 * @param flags controls certain features of the encoded output.
467 * Passing {@code DEFAULT} results in output that
468 * adheres to RFC 2045.
470 public static String encodeToString(byte[] input, int offset, int len, int flags) {
471 return new String(encode(input, offset, len, flags), US_ASCII);
475 * Base64-encode the given data and return a newly allocated
476 * byte[] with the result.
478 * @param input the data to encode
479 * @param flags controls certain features of the encoded output.
480 * Passing {@code DEFAULT} results in output that
481 * adheres to RFC 2045.
483 public static byte[] encode(byte[] input, int flags) {
484 return encode(input, 0, input.length, flags);
488 * Base64-encode the given data and return a newly allocated
489 * byte[] with the result.
491 * @param input the data to encode
492 * @param offset the position within the input array at which to
494 * @param len the number of bytes of input to encode
495 * @param flags controls certain features of the encoded output.
496 * Passing {@code DEFAULT} results in output that
497 * adheres to RFC 2045.
499 public static byte[] encode(byte[] input, int offset, int len, int flags) {
500 Encoder encoder = new Encoder(flags, null);
502 // Compute the exact length of the array we will produce.
503 int output_len = len / 3 * 4;
505 // Account for the tail of the data and the padding bytes, if any.
506 if (encoder.do_padding) {
513 case 1: output_len += 2; break;
514 case 2: output_len += 3; break;
518 // Account for the newlines, if any.
519 if (encoder.do_newline && len > 0) {
520 output_len += (((len-1) / (3 * Encoder.LINE_GROUPS)) + 1) *
521 (encoder.do_cr ? 2 : 1);
524 encoder.output = new byte[output_len];
525 encoder.process(input, offset, len, true);
527 assert encoder.op == output_len;
529 return encoder.output;
532 /* package */ static class Encoder extends Coder {
534 * Emit a new line every this many output tuples. Corresponds to
535 * a 76-character line length (the maximum allowable according to
536 * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>).
538 public static final int LINE_GROUPS = 19;
541 * Lookup table for turning Base64 alphabet positions (6 bits)
544 private static final byte ENCODE[] = {
545 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
546 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
547 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
548 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
552 * Lookup table for turning Base64 alphabet positions (6 bits)
555 private static final byte ENCODE_WEBSAFE[] = {
556 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
557 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
558 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
559 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
562 final private byte[] tail;
563 /* package */ int tailLen;
566 final public boolean do_padding;
567 final public boolean do_newline;
568 final public boolean do_cr;
569 final private byte[] alphabet;
571 public Encoder(int flags, byte[] output) {
572 this.output = output;
574 do_padding = (flags & NO_PADDING) == 0;
575 do_newline = (flags & NO_WRAP) == 0;
576 do_cr = (flags & CRLF) != 0;
577 alphabet = ((flags & URL_SAFE) == 0) ? ENCODE : ENCODE_WEBSAFE;
582 count = do_newline ? LINE_GROUPS : -1;
586 * @return an overestimate for the number of bytes {@code
587 * len} bytes could encode to.
589 public int maxOutputSize(int len) {
590 return len * 8/5 + 10;
593 public boolean process(byte[] input, int offset, int len, boolean finish) {
594 // Using local variables makes the encoder about 9% faster.
595 final byte[] alphabet = this.alphabet;
596 final byte[] output = this.output;
598 int count = this.count;
604 // First we need to concatenate the tail of the previous call
605 // with any input bytes available now and see if we can empty
610 // There was no tail.
615 // A 1-byte tail with at least 2 bytes of
616 // input available now.
617 v = ((tail[0] & 0xff) << 16) |
618 ((input[p++] & 0xff) << 8) |
626 // A 2-byte tail with at least 1 byte of input.
627 v = ((tail[0] & 0xff) << 16) |
628 ((tail[1] & 0xff) << 8) |
636 output[op++] = alphabet[(v >> 18) & 0x3f];
637 output[op++] = alphabet[(v >> 12) & 0x3f];
638 output[op++] = alphabet[(v >> 6) & 0x3f];
639 output[op++] = alphabet[v & 0x3f];
641 if (do_cr) output[op++] = '\r';
647 // At this point either there is no tail, or there are fewer
648 // than 3 bytes of input available.
650 // The main loop, turning 3 input bytes into 4 output bytes on
653 v = ((input[p] & 0xff) << 16) |
654 ((input[p+1] & 0xff) << 8) |
656 output[op] = alphabet[(v >> 18) & 0x3f];
657 output[op+1] = alphabet[(v >> 12) & 0x3f];
658 output[op+2] = alphabet[(v >> 6) & 0x3f];
659 output[op+3] = alphabet[v & 0x3f];
663 if (do_cr) output[op++] = '\r';
670 // Finish up the tail of the input. Note that we need to
671 // consume any bytes in tail before any bytes
672 // remaining in input; there should be at most two bytes
675 if (p-tailLen == len-1) {
677 v = ((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 4;
679 output[op++] = alphabet[(v >> 6) & 0x3f];
680 output[op++] = alphabet[v & 0x3f];
686 if (do_cr) output[op++] = '\r';
689 } else if (p-tailLen == len-2) {
691 v = (((tailLen > 1 ? tail[t++] : input[p++]) & 0xff) << 10) |
692 (((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 2);
694 output[op++] = alphabet[(v >> 12) & 0x3f];
695 output[op++] = alphabet[(v >> 6) & 0x3f];
696 output[op++] = alphabet[v & 0x3f];
701 if (do_cr) output[op++] = '\r';
704 } else if (do_newline && op > 0 && count != LINE_GROUPS) {
705 if (do_cr) output[op++] = '\r';
712 // Save the leftovers in tail to be consumed on the next
713 // call to encodeInternal.
716 tail[tailLen++] = input[p];
717 } else if (p == len-2) {
718 tail[tailLen++] = input[p];
719 tail[tailLen++] = input[p+1];
730 private Base64() { } // don't instantiate