@@ -63,9 +63,10 @@ PYBIND11_MODULE(fasttext_pybind, m) {
63
63
.value (" softmax" , fasttext::loss_name::softmax)
64
64
.export_values ();
65
65
66
- m.def (" train" , [](fasttext::FastText& ft, fasttext::Args& a) {
67
- ft.train (a);
68
- }, py::call_guard<py::gil_scoped_release>());
66
+ m.def (
67
+ " train" ,
68
+ [](fasttext::FastText& ft, fasttext::Args& a) { ft.train (a); },
69
+ py::call_guard<py::gil_scoped_release>());
69
70
70
71
py::class_<fasttext::Vector>(m, " Vector" , py::buffer_protocol ())
71
72
.def (py::init<ssize_t >())
@@ -120,17 +121,15 @@ PYBIND11_MODULE(fasttext_pybind, m) {
120
121
[](fasttext::FastText& m,
121
122
fasttext::Vector& v,
122
123
const std::string text) {
123
- std::stringstream ioss;
124
- copy (text.begin (), text.end (), std::ostream_iterator<char >(ioss));
124
+ std::stringstream ioss (text);
125
125
m.getSentenceVector (ioss, v);
126
126
})
127
127
.def (
128
128
" tokenize" ,
129
129
[](fasttext::FastText& m, const std::string text) {
130
130
std::vector<std::string> text_split;
131
131
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary ();
132
- std::stringstream ioss;
133
- copy (text.begin (), text.end (), std::ostream_iterator<char >(ioss));
132
+ std::stringstream ioss (text);
134
133
std::string token;
135
134
while (!ioss.eof ()) {
136
135
while (d->readWord (ioss, token)) {
@@ -139,6 +138,28 @@ PYBIND11_MODULE(fasttext_pybind, m) {
139
138
}
140
139
return text_split;
141
140
})
141
+ .def (
142
+ " getLine" ,
143
+ [](fasttext::FastText& m, const std::string text) {
144
+ std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary ();
145
+ std::stringstream ioss (text);
146
+ std::string token;
147
+ std::vector<std::string> words;
148
+ std::vector<std::string> labels;
149
+ while (!ioss.eof ()) {
150
+ while (d->readWord (ioss, token)) {
151
+ fasttext::entry_type type = d->getType (token);
152
+ if (type == fasttext::entry_type::word) {
153
+ words.push_back (token);
154
+ } else {
155
+ labels.push_back (token);
156
+ }
157
+ }
158
+ }
159
+ return std::
160
+ pair<std::vector<std::string>, std::vector<std::string>>(
161
+ words, labels);
162
+ })
142
163
.def (
143
164
" getVocab" ,
144
165
[](fasttext::FastText& m) {
@@ -199,8 +220,7 @@ PYBIND11_MODULE(fasttext_pybind, m) {
199
220
// to exactly mimic the behavior of the cli
200
221
[](fasttext::FastText& m, const std::string text, int32_t k) {
201
222
std::vector<std::pair<fasttext::real, std::string>> predictions;
202
- std::stringstream ioss;
203
- copy (text.begin (), text.end (), std::ostream_iterator<char >(ioss));
223
+ std::stringstream ioss (text);
204
224
m.predict (ioss, k, predictions);
205
225
return predictions;
206
226
})
0 commit comments