Skip to content

Commit

Permalink
Group reader/writer into format adapters
Browse files Browse the repository at this point in the history
  • Loading branch information
sagebind committed Sep 19, 2018
1 parent 0787de2 commit 2e2de6a
Show file tree
Hide file tree
Showing 11 changed files with 236 additions and 120 deletions.
58 changes: 58 additions & 0 deletions src/main/java/com/widen/tabitha/formats/FormatAdapter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package com.widen.tabitha.formats;

import com.widen.tabitha.reader.ReaderOptions;
import com.widen.tabitha.reader.RowReader;
import com.widen.tabitha.writer.RowWriter;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;

/**
* Provides factory methods for creating readers and writers of a particular format.
*/
public interface FormatAdapter {
/**
* Create a row reader for a file at the given path.
*
* @param path The path of the file to read.
* @param options Options to pass to the reader.
* @return A new row reader.
* @throws IOException if an I/O error occurs.
*/
default RowReader createReader(Path path, ReaderOptions options) throws IOException {
return createReader(Files.newInputStream(path), options);
}

/**
* Create a row reader for an input stream.
*
* @param inputStream The input stream to read.
* @param options Options to pass to the reader.
* @return A new row reader.
* @throws IOException if an I/O error occurs.
*/
RowReader createReader(InputStream inputStream, ReaderOptions options) throws IOException;

/**
* Create a row writer that writes to the given path.
*
* @param path The path to write to.
* @return A new row writer.
* @throws IOException if an I/O error occurs.
*/
default RowWriter createWriter(Path path) throws IOException {
return createWriter(Files.newOutputStream(path));
}

/**
* Create a row writer that writes to the given output stream.
*
* @param outputStream The output stream to write to.
* @return A new row writer.
* @throws IOException if an I/O error occurs.
*/
RowWriter createWriter(OutputStream outputStream) throws IOException;
}
110 changes: 110 additions & 0 deletions src/main/java/com/widen/tabitha/formats/FormatRegistry.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package com.widen.tabitha.formats;

import com.widen.tabitha.formats.delimited.DelimitedFormat;
import com.widen.tabitha.formats.delimited.DelimitedRowReader;
import com.widen.tabitha.formats.delimited.DelimitedRowWriter;
import com.widen.tabitha.formats.excel.WorkbookRowWriter;
import com.widen.tabitha.formats.excel.XLSRowReader;
import com.widen.tabitha.formats.excel.XLSXRowReader;
import com.widen.tabitha.reader.InlineHeaderReader;
import com.widen.tabitha.reader.ReaderOptions;
import com.widen.tabitha.reader.RowReader;
import com.widen.tabitha.writer.RowWriter;
import io.reactivex.Maybe;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;

/**
* Manages the adapters for the file formats supported by Tabitha.
* <p>
* You probably want to use {@link com.widen.tabitha.reader.RowReaders} or {@link com.widen.tabitha.writer.RowWriter}
* instead.
*/
public class FormatRegistry {
/**
* Get a format factory for handling the given MIME type.
*
* @param mimeType The format MIME type.
* @return A format adapter, if one could be found.
*/
public static Maybe<FormatAdapter> forMimeType(String mimeType) {
switch (mimeType) {
case "text/csv":
case "text/plain":
return Maybe.just(new FormatAdapter() {
@Override
public RowReader createReader(InputStream inputStream, ReaderOptions options) {
return decorateReader(new DelimitedRowReader(inputStream, DelimitedFormat.CSV), options);
}

@Override
public RowWriter createWriter(OutputStream outputStream) {
return new DelimitedRowWriter(outputStream, DelimitedFormat.CSV);
}
});

case "text/tab-separated-values":
return Maybe.just(new FormatAdapter() {
@Override
public RowReader createReader(InputStream inputStream, ReaderOptions options) {
return decorateReader(new DelimitedRowReader(inputStream, DelimitedFormat.TSV), options);
}

@Override
public RowWriter createWriter(OutputStream outputStream) {
return new DelimitedRowWriter(outputStream, DelimitedFormat.TSV);
}
});

case "application/vnd.ms-excel":
return Maybe.just(new FormatAdapter() {
@Override
public RowReader createReader(Path path, ReaderOptions options) throws IOException {
return decorateReader(XLSRowReader.open(path, options), options);
}

@Override
public RowReader createReader(InputStream inputStream, ReaderOptions options) throws IOException {
return decorateReader(XLSRowReader.open(inputStream, options), options);
}

@Override
public RowWriter createWriter(OutputStream outputStream) {
return WorkbookRowWriter.xls(outputStream);
}
});

case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
case "application/x-tika-ooxml":
return Maybe.just(new FormatAdapter() {
@Override
public RowReader createReader(Path path, ReaderOptions options) throws IOException {
return decorateReader(XLSXRowReader.open(path, options), options);
}

@Override
public RowReader createReader(InputStream inputStream, ReaderOptions options) throws IOException {
return decorateReader(XLSXRowReader.open(inputStream, options), options);
}

@Override
public RowWriter createWriter(OutputStream outputStream) {
return WorkbookRowWriter.xlsx(outputStream);
}
});

default:
return Maybe.empty();
}
}

private static RowReader decorateReader(RowReader reader, ReaderOptions options) {
if (options.isInlineHeaders()) {
reader = new InlineHeaderReader(reader);
}
return reader;
}
}
12 changes: 10 additions & 2 deletions src/main/java/com/widen/tabitha/reader/Header.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
package com.widen.tabitha.reader;

import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;

/**
* Defines an ordered list of named columns.
Expand Down Expand Up @@ -182,5 +188,7 @@ public DuplicateColumnException(String column) {
}

@Override
public String toString() { return columnsByIndex.toString(); }
public String toString() {
return columnsByIndex.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
/**
* Decorates another reader, interpreting the first row of each page of data as the header for subsequent rows.
*/
class InlineHeaderReader implements RowReader {
public class InlineHeaderReader implements RowReader {
private final RowReader inner;
private Header currentHeader;
private long currentPage = -1;

InlineHeaderReader(RowReader inner) {
public InlineHeaderReader(RowReader inner) {
this.inner = inner;
}

Expand Down
89 changes: 18 additions & 71 deletions src/main/java/com/widen/tabitha/reader/RowReaders.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
package com.widen.tabitha.reader;

import com.widen.tabitha.formats.delimited.DelimitedFormat;
import com.widen.tabitha.formats.delimited.DelimitedRowReader;
import com.widen.tabitha.formats.excel.XLSRowReader;
import com.widen.tabitha.formats.excel.XLSXRowReader;
import com.widen.tabitha.formats.FormatRegistry;
import io.reactivex.Maybe;
import org.apache.tika.Tika;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Optional;

/**
* Helper factory methods for creating row readers.
Expand All @@ -24,7 +19,7 @@ public class RowReaders {
* @param path The file path of the file to open.
* @return A row reader if the file is in a supported format.
*/
public static Optional<RowReader> open(String path) throws Exception {
public static Maybe<RowReader> open(String path) {
return open(Paths.get(path), null);
}

Expand All @@ -34,7 +29,7 @@ public static Optional<RowReader> open(String path) throws Exception {
* @param path The file path of the file to open.
* @return A row reader if the file is in a supported format.
*/
public static Optional<RowReader> open(Path path) throws Exception {
public static Maybe<RowReader> open(Path path) {
return open(path, null);
}

Expand All @@ -45,30 +40,11 @@ public static Optional<RowReader> open(Path path) throws Exception {
* @param options Options to pass to the reader.
* @return A row reader if the file is in a supported format.
*/
public static Optional<RowReader> open(Path path, ReaderOptions options) throws Exception {
if (options == null) {
options = new ReaderOptions();
}

String mimeType = tika.detect(path);

switch (mimeType) {
case "text/csv":
case "text/plain":
return Optional.of(decorate(new DelimitedRowReader(Files.newInputStream(path), DelimitedFormat.CSV), options));

case "text/tab-separated-values":
return Optional.of(decorate(new DelimitedRowReader(Files.newInputStream(path), DelimitedFormat.TSV), options));

case "application/vnd.ms-excel":
return Optional.of(decorate(XLSRowReader.open(path, options), options));

case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
case "application/x-tika-ooxml":
return Optional.of(decorate(XLSXRowReader.open(path, options), options));
}

return Optional.empty();
public static Maybe<RowReader> open(Path path, ReaderOptions options) {
return Maybe
.fromCallable(() -> tika.detect(path))
.flatMap(FormatRegistry::forMimeType)
.map(formatAdapter -> formatAdapter.createReader(path, options != null ? options : new ReaderOptions()));
}

/**
Expand All @@ -77,7 +53,7 @@ public static Optional<RowReader> open(Path path, ReaderOptions options) throws
* @param inputStream The input stream to read.
* @return A row reader if the stream is in a supported format.
*/
public static Optional<RowReader> open(InputStream inputStream) throws IOException {
public static Maybe<RowReader> open(InputStream inputStream) {
return open(inputStream, null, null);
}

Expand All @@ -88,7 +64,7 @@ public static Optional<RowReader> open(InputStream inputStream) throws IOExcepti
* @param filename The filename associated with the stream, if known.
* @return A row reader if the stream is in a supported format.
*/
public static Optional<RowReader> open(InputStream inputStream, String filename) throws IOException {
public static Maybe<RowReader> open(InputStream inputStream, String filename) {
return open(inputStream, filename, null);
}

Expand All @@ -99,7 +75,7 @@ public static Optional<RowReader> open(InputStream inputStream, String filename)
* @param options Options to pass to the reader.
* @return A row reader if the stream is in a supported format.
*/
public static Optional<RowReader> open(InputStream inputStream, ReaderOptions options) throws IOException {
public static Maybe<RowReader> open(InputStream inputStream, ReaderOptions options) {
return open(inputStream, null, options);
}

Expand All @@ -111,44 +87,15 @@ public static Optional<RowReader> open(InputStream inputStream, ReaderOptions op
* @param options Options to pass to the reader.
* @return A row reader if the stream is in a supported format.
*/
public static Optional<RowReader> open(
InputStream inputStream,
String filename,
ReaderOptions options
) throws IOException {
if (options == null) {
options = new ReaderOptions();
}

public static Maybe<RowReader> open(InputStream inputStream, String filename, ReaderOptions options) {
// If our input stream supports marks, Tika will rewind the stream back to the start for us after detecting the
// format, so ensure our input stream supports it.
inputStream = createRewindableInputStream(inputStream);
String mimeType = tika.detect(inputStream, filename);

switch (mimeType) {
case "text/csv":
case "text/plain":
return Optional.of(decorate(new DelimitedRowReader(inputStream, DelimitedFormat.CSV), options));

case "text/tab-separated-values":
return Optional.of(decorate(new DelimitedRowReader(inputStream, DelimitedFormat.TSV), options));

case "application/vnd.ms-excel":
return Optional.of(decorate(XLSRowReader.open(inputStream, options), options));

case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
case "application/x-tika-ooxml":
return Optional.of(decorate(XLSXRowReader.open(inputStream, options), options));
}

return Optional.empty();
}
InputStream rewindableStream = createRewindableInputStream(inputStream);

private static RowReader decorate(RowReader reader, ReaderOptions options) {
if (options.isInlineHeaders()) {
reader = new InlineHeaderReader(reader);
}
return reader;
return Maybe
.fromCallable(() -> tika.detect(rewindableStream, filename))
.flatMap(FormatRegistry::forMimeType)
.map(formatAdapter -> formatAdapter.createReader(rewindableStream, options != null ? options : new ReaderOptions()));
}

private static InputStream createRewindableInputStream(InputStream inputStream) {
Expand Down
Loading

0 comments on commit 2e2de6a

Please sign in to comment.