Skip to content

Commit d6f0f38

Browse files
authored
WIP : Code refactoring (#77)
* 🔨 simplify the plugin interface. 🔥 no open function, 🔥 no more useless set_type, each plugin knows its type. * This is an auto-commit, updating project meta data, such as changelog.rst, contributors.rst * 🚜 csvr to csv_sheet * 🚜 better naming for the readers * 🤝 update gitignore * 📚 update pyinstaller notes * 🔥 remmove python 2 related compactibility codes * 💄 update coding style * 🔥 remove test files generated during tests Co-authored-by: chfw <chfw@users.noreply.github.com>
1 parent 4bc78d6 commit d6f0f38

33 files changed

+699
-249
lines changed

.gitignore

+545-7
Large diffs are not rendered by default.

.moban.yml

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ targets:
77
- .travis.yml: custom_travis.yml.jj2
88
- README.rst: io_readme.rst.jj2
99
- "docs/source/index.rst": "docs/source/index.rst"
10+
- .gitignore: gitignore.jj2

CONTRIBUTORS.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
In alphabetical order:
66

7-
* `Craig Anderson <https://api.github.com/users/craiga>`_
8-
* `John Vandenberg <https://api.github.com/users/jayvdb>`_
9-
* `Stephen J. Fuhry <https://api.github.com/users/fuhrysteve>`_
10-
* `Stephen Rauch <https://api.github.com/users/stephenrauch>`_
11-
* `Víctor Antonio Hernández Monroy <https://api.github.com/users/antherkiv>`_
7+
* `Craig Anderson <https://github.com/craiga>`_
8+
* `John Vandenberg <https://github.com/jayvdb>`_
9+
* `Stephen J. Fuhry <https://github.com/fuhrysteve>`_
10+
* `Stephen Rauch <https://github.com/stephenrauch>`_
11+
* `Víctor Antonio Hernández Monroy <https://github.com/antherkiv>`_

MANIFEST.in

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
include README.rst
22
include LICENSE
33
include CHANGELOG.rst
4+
include CONTRIBUTORS.rst
45
recursive-include tests *
5-
<<<<<<< HEAD
6-
=======
76
recursive-include docs *
8-
>>>>>>> master
9-
include docs/source/*
107
include Makefile
118
include test.sh

docs/source/extendedcsv.rst

+2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ Continue from previous example::
7474
:hide:
7575

7676
>>> import os
77+
>>> if os.path.exists("your_file.csv"):
78+
... os.unlink("your_file.csv")
7779
>>> os.unlink("your_file__Sheet 1__0.csv")
7880
>>> os.unlink("your_file__Sheet 2__1.csv")
7981

docs/source/pagination.rst

+6
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,9 @@ Obvious, you could do both at the same time:
6060
6161
The pagination support is available across all pyexcel-io plugins.
6262

63+
.. testcode::
64+
:hide:
65+
66+
>>> import os
67+
>>> if os.path.exists("your_file.csv"):
68+
... os.unlink("your_file.csv")

docs/source/plaincsv.rst

+1
Original file line numberDiff line numberDiff line change
@@ -172,4 +172,5 @@ When you read it back you will have to specify encoding too.
172172

173173
>>> import os
174174
>>> os.unlink("your_file.csv")
175+
>>> os.unlink("test-utf16-encoding.csv")
175176
>>> os.unlink(test_file)

docs/source/pyinstaller.rst

+7-5
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,16 @@ Built-in plugins for pyexcel-io
99

1010
In order to package every built-in plugins of pyexcel-io, you need to specify::
1111

12-
--hidden-import pyexcel_io.readers.csvr
12+
--hidden-import pyexcel_io.readers.csv_in_file
13+
--hidden-import pyexcel_io.readers.csv_in_memory
14+
--hidden-import pyexcel_io.readers.csv_content
1315
--hidden-import pyexcel_io.readers.csvz
1416
--hidden-import pyexcel_io.readers.tsv
1517
--hidden-import pyexcel_io.readers.tsvz
16-
--hidden-import pyexcel_io.writers.csv_file_writer
17-
--hidden-import pyexcel_io.writers.csv_memory_writer
18-
--hidden-import pyexcel_io.writers.tsv_file_writer
19-
--hidden-import pyexcel_io.writers.tsv_memory_writer
18+
--hidden-import pyexcel_io.writers.csv_in_file
19+
--hidden-import pyexcel_io.writers.csv_in_memory
20+
--hidden-import pyexcel_io.writers.tsv_in_file
21+
--hidden-import pyexcel_io.writers.tsv_in_memory
2022
--hidden-import pyexcel_io.writers.csvz_writer
2123
--hidden-import pyexcel_io.writers.tsvz_writer
2224
--hidden-import pyexcel_io.database.importers.django

docs/source/renderer.rst

+7
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,10 @@ And you may want use row_renderer to customize it to string:
4646
>>> data = get_data("your_file.csv", row_renderer=my_renderer)
4747
>>> data['your_file.csv']
4848
[['1', '21', '31'], ['2', '22', '32'], ['3', '23', '33']]
49+
50+
.. testcode::
51+
:hide:
52+
53+
>>> import os
54+
>>> if os.path.exists("your_file.csv"):
55+
... os.unlink("your_file.csv")

pyexcel_io/_compact.py

+5-28
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,7 @@
1616
import sys
1717
import types
1818
import logging
19-
20-
PY2 = sys.version_info[0] == 2
21-
PY3_ABOVE = sys.version_info[0] >= 3
22-
PY26 = PY2 and sys.version_info[1] < 7
23-
PY27 = PY2 and sys.version_info[1] == 7
24-
PY27_ABOVE = PY27 or PY3_ABOVE
25-
26-
if PY26:
27-
from ordereddict import OrderedDict
28-
else:
29-
from collections import OrderedDict
19+
from collections import OrderedDict
3020

3121
try:
3222
from logging import NullHandler
@@ -37,24 +27,11 @@ def emit(self, record):
3727
pass
3828

3929

40-
if PY2:
41-
from cStringIO import StringIO
42-
from cStringIO import StringIO as BytesIO
43-
44-
text_type = unicode
45-
irange = xrange
46-
47-
class Iterator(object):
48-
def next(self):
49-
return type(self).__next__(self)
50-
51-
52-
else:
53-
from io import BytesIO, StringIO
30+
from io import BytesIO, StringIO
5431

55-
text_type = str
56-
Iterator = object
57-
irange = range
32+
text_type = str
33+
Iterator = object
34+
irange = range
5835

5936

6037
def isstream(instance):

pyexcel_io/plugins.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,7 @@ def get_a_plugin(
176176
):
177177
__file_type = file_type.lower()
178178
plugin = self.load_me_now(f"{location}-{__file_type}", library=library)
179-
handler = plugin()
180-
return handler
179+
return plugin
181180

182181
def raise_exception(self, file_type):
183182
file_type = file_type.split("-")[1]
@@ -224,14 +223,14 @@ def _do_additional_registration_for_new_plugins(plugin_info):
224223
)
225224

226225

227-
class FakeReaders:
226+
class AllReaders:
228227
def get_all_formats(self):
229228
return OLD_READERS.get_all_formats().union(
230229
NEW_READERS.get_all_formats()
231230
)
232231

233232

234-
class FakeWriters:
233+
class AllWriters:
235234
def get_all_formats(self):
236235
return OLD_WRITERS.get_all_formats().union(
237236
NEW_WRITERS.get_all_formats()
@@ -242,8 +241,8 @@ def get_all_formats(self):
242241
OLD_WRITERS = IOManager(WRITER_PLUGIN, ioutils.AVAILABLE_WRITERS)
243242
NEW_WRITERS = NewIOManager(NEW_WRITER_PLUGIN, ioutils.AVAILABLE_WRITERS)
244243
NEW_READERS = NewIOManager(NEW_READER_PLUGIN, ioutils.AVAILABLE_READERS)
245-
READERS = FakeReaders()
246-
WRITERS = FakeWriters()
244+
READERS = AllReaders()
245+
WRITERS = AllWriters()
247246

248247

249248
def load_plugins(plugin_name_patterns, path, black_list, white_list):

pyexcel_io/reader.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,21 @@ def __init__(self, file_type, library=None):
3333
self.keywords = None
3434

3535
def open(self, file_name, **keywords):
36-
self.reader = NEW_READERS.get_a_plugin(
36+
reader_class = NEW_READERS.get_a_plugin(
3737
self.file_type, location="file", library=self.library
3838
)
3939
self.keywords, native_sheet_keywords = clean_keywords(keywords)
40-
return self.reader.open(file_name, **native_sheet_keywords)
40+
self.reader = reader_class(file_name, **native_sheet_keywords)
41+
return self.reader
4142

4243
def open_content(self, file_content, **keywords):
4344
self.keywords, native_sheet_keywords = clean_keywords(keywords)
4445
try:
45-
self.reader = NEW_READERS.get_a_plugin(
46+
reader_class = NEW_READERS.get_a_plugin(
4647
self.file_type, location="content", library=self.library
4748
)
48-
return self.reader.open(file_content, **native_sheet_keywords)
49+
self.reader = reader_class(file_content, **native_sheet_keywords)
50+
return self.reader
4951
except (
5052
exceptions.NoSupportingPluginFound,
5153
exceptions.SupportingPluginAvailableButNotInstalled,
@@ -57,10 +59,11 @@ def open_content(self, file_content, **keywords):
5759

5860
def open_stream(self, file_stream, **keywords):
5961
self.keywords, native_sheet_keywords = clean_keywords(keywords)
60-
self.reader = NEW_READERS.get_a_plugin(
62+
reader_class = NEW_READERS.get_a_plugin(
6163
self.file_type, location="memory", library=self.library
6264
)
63-
return self.reader.open(file_stream, **native_sheet_keywords)
65+
self.reader = reader_class(file_stream, **native_sheet_keywords)
66+
return self.reader
6467

6568
def read_sheet_by_name(self, sheet_name):
6669
"""

pyexcel_io/readers/__init__.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,17 @@
1010
from pyexcel_io.plugins import NewIOPluginInfoChain
1111

1212
NewIOPluginInfoChain(__name__).add_a_reader(
13-
relative_plugin_class_path="csv_file_reader.FileReader",
13+
relative_plugin_class_path="csv_in_file.FileReader",
1414
location="file",
1515
file_types=["csv"],
1616
stream_type="text",
1717
).add_a_reader(
18-
relative_plugin_class_path="csv_content_reader.ContentReader",
18+
relative_plugin_class_path="csv_content.ContentReader",
1919
location="content",
2020
file_types=["csv"],
2121
stream_type="text",
2222
).add_a_reader(
23-
relative_plugin_class_path="csv_memory_reader.MemoryReader",
23+
relative_plugin_class_path="csv_in_memory.MemoryReader",
2424
location="memory",
2525
file_types=["csv"],
2626
stream_type="text",

pyexcel_io/readers/csv_content.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import mmap
2+
3+
import pyexcel_io.constants as constants
4+
from pyexcel_io.book import _convert_content_to_stream
5+
from pyexcel_io.readers.csv_sheet import CSVMemoryMapIterator
6+
from pyexcel_io.readers.csv_in_memory import MemoryReader
7+
8+
9+
class ContentReader(MemoryReader):
10+
file_type = constants.FILE_FORMAT_CSV
11+
12+
def __init__(self, file_content, **keywords):
13+
file_stream = ContentReader.convert_content_to_stream(
14+
file_content, self.file_type, **keywords
15+
)
16+
super().__init__(file_stream, **keywords)
17+
18+
@staticmethod
19+
def convert_content_to_stream(file_content, file_type, **keywords):
20+
encoding = keywords.get("encoding", "utf-8")
21+
if isinstance(file_content, mmap.mmap):
22+
# load from mmap
23+
file_stream = CSVMemoryMapIterator(file_content, encoding)
24+
else:
25+
if isinstance(file_content, bytes):
26+
file_content = file_content.decode(encoding)
27+
28+
file_stream = _convert_content_to_stream(file_content, file_type)
29+
30+
return file_stream

pyexcel_io/readers/csv_content_reader.py

-21
This file was deleted.

pyexcel_io/readers/csv_file_reader.py renamed to pyexcel_io/readers/csv_in_file.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,18 @@
44

55
from pyexcel_io import constants
66
from pyexcel_io.sheet import NamedContent
7-
from pyexcel_io.readers.csvr import CSVFileReader
7+
from pyexcel_io.readers.csv_sheet import CSVFileReader
88

99
DEFAULT_NEWLINE = "\r\n"
1010

1111

1212
class FileReader(object):
13-
def __init__(self):
14-
self.handles = []
15-
16-
def set_type(self, _):
17-
pass
18-
19-
def open(self, file_name, **keywords):
13+
def __init__(self, file_name, **keywords):
2014
"""Load content from a file
2115
:params str filename: an accessible file path
2216
:returns: a book
2317
"""
18+
self.handles = []
2419
self.keywords = keywords
2520
self.__line_terminator = keywords.get(
2621
constants.KEYWORD_LINE_TERMINATOR, DEFAULT_NEWLINE

pyexcel_io/readers/csv_memory_reader.py renamed to pyexcel_io/readers/csv_in_memory.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,20 @@
33
import pyexcel_io._compact as compact
44
from pyexcel_io import constants
55
from pyexcel_io.sheet import NamedContent
6-
from pyexcel_io.readers.csvr import CSVinMemoryReader
6+
from pyexcel_io.readers.csv_sheet import CSVinMemoryReader
77

88
DEFAULT_SHEET_SEPARATOR_FORMATTER = f"---{constants.DEFAULT_NAME}---%s"
99

1010

1111
class MemoryReader(object):
12-
def __init__(self):
13-
self.handles = []
14-
self.file_type = constants.FILE_FORMAT_CSV
15-
16-
def set_type(self, _):
17-
pass
12+
file_type = constants.FILE_FORMAT_CSV
1813

19-
def open(self, file_stream, multiple_sheets=False, **keywords):
14+
def __init__(self, file_stream, multiple_sheets=False, **keywords):
2015
"""Load content from memory
2116
:params stream file_content: the actual file content in memory
2217
:returns: a book
2318
"""
19+
self.handles = []
2420
self.keywords = keywords
2521
self.__load_from_memory_flag = True
2622
self.__line_terminator = keywords.get(
File renamed without changes.

pyexcel_io/readers/csvz.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,12 @@
1212
import chardet
1313
from pyexcel_io.sheet import NamedContent
1414
from pyexcel_io._compact import StringIO
15-
from pyexcel_io.readers.csvr import CSVinMemoryReader
15+
from pyexcel_io.readers.csv_sheet import CSVinMemoryReader
1616

1717

1818
class FileReader(object):
19-
def __init__(self):
19+
def __init__(self, file_alike_object, **keywords):
2020
self.content_array = []
21-
self.keywords = None
22-
23-
def set_type(self, _):
24-
pass
25-
26-
def open(self, file_alike_object, **keywords):
2721
try:
2822
self.zipfile = zipfile.ZipFile(file_alike_object, "r")
2923
sheets = [

0 commit comments

Comments
 (0)