Skip to content

Commit 1813044

Browse files
committed
Added text.base64Encode and text.base64Decode
1 parent bbdb6b5 commit 1813044

File tree

3 files changed

+260
-1
lines changed

3 files changed

+260
-1
lines changed

LICENSE

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,20 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2121
THE SOFTWARE.
2222

23+
Code in FS2 is derived in part from scodec. The scodec license is as follows:
24+
25+
Copyright (c) 2013-2014, Michael Pilquist and Paul Chiusano
26+
All rights reserved.
27+
28+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
29+
30+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
31+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
32+
3. Neither the name of the scodec team nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
33+
34+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35+
36+
2337
Code in FS2 is derived in part from Cats. The Cats license is as follows:
2438

2539
Cats Copyright (c) 2015 Erik Osheim.
@@ -70,4 +84,4 @@ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
7084
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
7185
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
7286
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
73-
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
87+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

core/shared/src/main/scala/fs2/text.scala

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
package fs2
22

3+
import java.nio.CharBuffer
34
import java.nio.charset.Charset
45

56
import scala.annotation.tailrec
67

8+
import scodec.bits.{Bases, ByteVector}
9+
710
/** Provides utilities for working with streams of text (e.g., encoding byte streams to strings). */
811
object text {
912
private val utf8Charset = Charset.forName("UTF-8")
@@ -201,4 +204,219 @@ object text {
201204

202205
s => go(Vector.empty, false, s).stream
203206
}
207+
208+
/**
209+
* Converts a stream of base 64 text in to a stream of bytes.
210+
*
211+
* If the text is not valid base 64, the pipe fails with an exception. Padding
212+
* characters at the end of the input stream are optional, but if present, must
213+
* be valid per the base 64 specification. Whitespace characters are ignored.
214+
*
215+
* The default base 64 alphabet is used by this pipe.
216+
*/
217+
def base64Decode[F[_]: RaiseThrowable]: Pipe[F, String, Byte] =
218+
base64Decode(Bases.Alphabets.Base64)
219+
220+
/**
221+
* Like [[base64Decode]] but takes a base 64 alphabet. For example,
222+
* `base64Decode(Bases.Alphabets.Base64Url)` will decode URL compatible base 64.
223+
*/
224+
def base64Decode[F[_]: RaiseThrowable](alphabet: Bases.Base64Alphabet): Pipe[F, String, Byte] = {
225+
// Adapted from scodec-bits, licensed under 3-clause BSD
226+
final case class State(buffer: Int, mod: Int, padding: Int)
227+
val Pad = alphabet.pad
228+
def paddingError =
229+
Left(
230+
"Malformed padding - final quantum may optionally be padded with one or two padding characters such that the quantum is completed"
231+
)
232+
233+
def decode(state: State, str: String): Either[String, (State, Chunk[Byte])] = {
234+
var buffer = state.buffer
235+
var mod = state.mod
236+
var padding = state.padding
237+
var idx, bidx = 0
238+
val acc = new Array[Byte]((str.size + 3) / 4 * 3)
239+
while (idx < str.length) {
240+
str(idx) match {
241+
case c if alphabet.ignore(c) => // ignore
242+
case c =>
243+
val cidx = {
244+
if (padding == 0) {
245+
if (c == Pad) {
246+
if (mod == 2 || mod == 3) {
247+
padding += 1
248+
0
249+
} else {
250+
return paddingError
251+
}
252+
} else {
253+
try alphabet.toIndex(c)
254+
catch {
255+
case _: IllegalArgumentException =>
256+
return Left(s"Invalid base 64 character '$c' at index $idx")
257+
}
258+
}
259+
} else {
260+
if (c == Pad) {
261+
if (padding == 1 && mod == 3) {
262+
padding += 1
263+
0
264+
} else {
265+
return paddingError
266+
}
267+
} else {
268+
return Left(
269+
s"Unexpected character '$c' at index $idx after padding character; only '=' and whitespace characters allowed after first padding character"
270+
)
271+
}
272+
}
273+
}
274+
mod match {
275+
case 0 =>
276+
buffer = (cidx & 0x3f)
277+
mod += 1
278+
case 1 =>
279+
buffer = (buffer << 6) | (cidx & 0x3f)
280+
mod += 1
281+
case 2 =>
282+
buffer = (buffer << 6) | (cidx & 0x3f)
283+
mod += 1
284+
case 3 =>
285+
buffer = (buffer << 6) | (cidx & 0x3f)
286+
mod = 0
287+
val c = buffer & 0x0ff
288+
val b = (buffer >> 8) & 0x0ff
289+
val a = (buffer >> 16) & 0x0ff
290+
acc(bidx) = a.toByte
291+
acc(bidx + 1) = b.toByte
292+
acc(bidx + 2) = c.toByte
293+
bidx += 3
294+
}
295+
}
296+
idx += 1
297+
}
298+
val out = Chunk.byteVector(ByteVector.view(acc).take((bidx - padding).toLong))
299+
val carry = State(buffer, mod, padding)
300+
Right((carry, out))
301+
}
302+
303+
def finish(state: State): Either[String, Chunk[Byte]] =
304+
if (state.padding != 0 && state.mod != 0) paddingError
305+
else
306+
state.mod match {
307+
case 0 => Right(Chunk.empty)
308+
case 1 => Left("Final base 64 quantum had only 1 digit - must have at least 2 digits")
309+
case 2 =>
310+
Right(Chunk(((state.buffer >> 4) & 0x0ff).toByte))
311+
case 3 =>
312+
val buffer = state.buffer
313+
Right(
314+
Chunk(
315+
((buffer >> 10) & 0x0ff).toByte,
316+
((buffer >> 2) & 0x0ff).toByte
317+
)
318+
)
319+
}
320+
321+
def go(state: State, s: Stream[F, String]): Pull[F, Byte, Unit] =
322+
s.pull.uncons1.flatMap {
323+
case Some((hd, tl)) =>
324+
decode(state, hd) match {
325+
case Right((newState, out)) =>
326+
Pull.output(out) >> go(newState, tl)
327+
case Left(err) => Pull.raiseError(new IllegalArgumentException(err))
328+
}
329+
case None =>
330+
finish(state) match {
331+
case Right(out) => Pull.output(out)
332+
case Left(err) => Pull.raiseError(new IllegalArgumentException(err))
333+
}
334+
}
335+
336+
in => go(State(0, 0, 0), in).stream
337+
}
338+
339+
/**
340+
* Encodes a byte stream in to a stream of base 64 text.
341+
* The default base 64 alphabet is used by this pipe.
342+
*/
343+
def base64Encode[F[_]]: Pipe[F, Byte, String] = base64Encode(Bases.Alphabets.Base64)
344+
345+
/**
346+
* Like [[base64Encode]] but takes a base 64 alphabet. For example,
347+
* `base64Encode(Bases.Alphabets.Base64Url)` will encode URL compatible base 64.
348+
*/
349+
def base64Encode[F[_]](alphabet: Bases.Base64Alphabet): Pipe[F, Byte, String] = {
350+
// Adapted from scodec-bits, licensed under 3-clause BSD
351+
def encode(c: ByteVector): (String, ByteVector) = {
352+
val bytes = c.toArray
353+
val bldr = CharBuffer.allocate(((bytes.length + 2) / 3) * 4)
354+
var idx = 0
355+
val mod = bytes.length % 3
356+
while (idx < bytes.length - mod) {
357+
var buffer = ((bytes(idx) & 0x0ff) << 16) | ((bytes(idx + 1) & 0x0ff) << 8) | (bytes(
358+
idx + 2
359+
) & 0x0ff)
360+
val fourth = buffer & 0x3f
361+
buffer = buffer >> 6
362+
val third = buffer & 0x3f
363+
buffer = buffer >> 6
364+
val second = buffer & 0x3f
365+
buffer = buffer >> 6
366+
val first = buffer
367+
bldr
368+
.append(alphabet.toChar(first))
369+
.append(alphabet.toChar(second))
370+
.append(alphabet.toChar(third))
371+
.append(alphabet.toChar(fourth))
372+
idx = idx + 3
373+
}
374+
if (mod == 0) {
375+
(bldr.flip.toString, ByteVector.empty)
376+
} else if (mod == 1) {
377+
(bldr.flip.toString, ByteVector(bytes(idx)))
378+
} else {
379+
(bldr.flip.toString, ByteVector(bytes(idx), bytes(idx + 1)))
380+
}
381+
}
382+
383+
def go(carry: ByteVector, s: Stream[F, Byte]): Pull[F, String, Unit] =
384+
s.pull.uncons.flatMap {
385+
case Some((hd, tl)) =>
386+
val (out, newCarry) = encode(carry ++ hd.toByteVector)
387+
Pull.output1(out) >> go(newCarry, tl)
388+
case None =>
389+
carry.size match {
390+
case 0 => Pull.done
391+
case 1 =>
392+
var buffer = (carry(0) & 0x0ff) << 4
393+
val second = buffer & 0x3f
394+
buffer = buffer >> 6
395+
val first = buffer
396+
val out = new String(
397+
Array(alphabet.toChar(first), alphabet.toChar(second), alphabet.pad, alphabet.pad)
398+
)
399+
Pull.output1(out)
400+
case 2 =>
401+
var buffer = ((carry(0) & 0x0ff) << 10) | ((carry(1) & 0x0ff) << 2)
402+
val third = buffer & 0x3f
403+
buffer = buffer >> 6
404+
val second = buffer & 0x3f
405+
buffer = buffer >> 6
406+
val first = buffer
407+
val out = new String(
408+
Array(
409+
alphabet.toChar(first),
410+
alphabet.toChar(second),
411+
alphabet.toChar(third),
412+
alphabet.pad
413+
)
414+
)
415+
Pull.output1(out)
416+
case _ => sys.error("carry must be size 0, 1, or 2")
417+
}
418+
}
419+
420+
in => go(ByteVector.empty, in).stream
421+
}
204422
}

core/shared/src/test/scala/fs2/TextSpec.scala

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
package fs2
22

33
import org.scalatest.{Assertion, Succeeded}
4+
5+
import cats.implicits._
6+
import scodec.bits.ByteVector
7+
48
import fs2.text._
59

610
class TextSpec extends Fs2Spec {
@@ -230,5 +234,28 @@ class TextSpec extends Fs2Spec {
230234
}
231235
}
232236
}
237+
238+
"base64Encode" in {
239+
forAll { (bs: List[Array[Byte]]) =>
240+
bs.map(Chunk.bytes).foldMap(Stream.chunk).through(text.base64Encode).compile.string shouldBe
241+
bs.map(ByteVector.view(_)).foldLeft(ByteVector.empty)(_ ++ _).toBase64
242+
}
243+
}
244+
245+
"base64Encode andThen base64Decode" in {
246+
forAll { (bs: List[Array[Byte]], unchunked: Boolean, rechunkSeed: Long) =>
247+
bs.map(Chunk.bytes)
248+
.foldMap(Stream.chunk)
249+
.through(text.base64Encode)
250+
.through {
251+
if (unchunked) _.unchunk
252+
else _.rechunkRandomlyWithSeed(0.1, 2.0)(rechunkSeed)
253+
}
254+
.through(text.base64Decode[Fallible])
255+
.compile
256+
.to(ByteVector) shouldBe
257+
Right(bs.map(ByteVector.view(_)).foldLeft(ByteVector.empty)(_ ++ _))
258+
}
259+
}
233260
}
234261
}

0 commit comments

Comments
 (0)