Skip to content

Commit

Permalink
fix python
Browse files Browse the repository at this point in the history
  • Loading branch information
sbalandi committed Jan 16, 2025
1 parent 488b83b commit 528b60f
Show file tree
Hide file tree
Showing 9 changed files with 26 additions and 37 deletions.
3 changes: 1 addition & 2 deletions samples/python/text_generation/chat_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
import openvino_genai


def streamer(subword) -> openvino_genai.StreamerRunningStatus:
def streamer(subword):
print(subword, end='', flush=True)

return openvino_genai.StreamerRunningStatus.RUNNING

def main():
Expand Down
9 changes: 4 additions & 5 deletions samples/python/text_generation/prompt_lookup_decoding_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
import argparse
import openvino_genai

def streamer(subword):
print(subword, end='', flush=True)
# Return flag corresponds whether generation should be stopped.
# False means continue generation.
return False
def streamer(subword):
print(subword, end='', flush=True)
# Return flag corresponds whether generation should be stopped.
return openvino_genai.StreamerRunningStatus.RUNNING

def main():
parser = argparse.ArgumentParser()
Expand Down
7 changes: 3 additions & 4 deletions samples/python/text_generation/speculative_decoding_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@
import threading

def streamer(subword):
print(subword, end='', flush=True)
# Return flag corresponds whether generation should be stopped.
# False means continue generation.
return False
print(subword, end='', flush=True)
# Return flag corresponds whether generation should be stopped.
return openvino_genai.StreamerRunningStatus.RUNNING

def main():
parser = argparse.ArgumentParser()
Expand Down
4 changes: 3 additions & 1 deletion samples/python/visual_language_chat/visual_language_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pathlib import Path


def streamer(subword: str) -> bool:
def streamer(subword: str) -> openvino_genai.StreamerRunningStatus:
'''
Args:
Expand All @@ -25,6 +25,8 @@ def streamer(subword: str) -> bool:
# No value is returned as in this example we don't want to stop the generation in this method.
# "return None" will be treated the same as "return False".

return openvino_genai.StreamerRunningStatus.RUNNING


def read_image(path: str) -> Tensor:
'''
Expand Down
3 changes: 0 additions & 3 deletions src/cpp/src/visual_language/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,6 @@ class ov::genai::VLMPipeline::VLMPipelineImpl {
SequenceGroup::Ptr sequence_group = std::make_shared<SequenceGroup>(request_id, prompt_ids, generation_config, block_size);
requests.push_back(sequence_group);

OPENVINO_ASSERT((!m_is_chat_conversation || !std::get_if<std::function<StreamerRunningStatus(std::string)>>(&streamer)),
"For chat mode, please, use Steamer as StreamerBase class or as callback with a bool return value.");

std::shared_ptr<StreamerBase> streamer_ptr = std::visit(overloaded{
[&m_tokenizer = m_tokenizer](
const std::function<bool(std::string)>& callback
Expand Down
19 changes: 10 additions & 9 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ class LLMPipeline:
"""
This class is used for generation with LLMs
"""
def __call__(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
def __call__(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
"""
Generates sequences or tokens for LLMs. If input is a string or list of strings then resulting sequences will be already detokenized.
Expand Down Expand Up @@ -1035,7 +1035,7 @@ class LLMPipeline:
"""
def finish_chat(self) -> None:
...
def generate(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
def generate(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
"""
Generates sequences or tokens for LLMs. If input is a string or list of strings then resulting sequences will be already detokenized.
Expand Down Expand Up @@ -1855,7 +1855,7 @@ class VLMPipeline:
def finish_chat(self) -> None:
...
@typing.overload
def generate(self, prompt: str, images: list[openvino._pyopenvino.Tensor], generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
def generate(self, prompt: str, images: list[openvino._pyopenvino.Tensor], generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
"""
Generates sequences for VLMs.
Expand All @@ -1868,8 +1868,8 @@ class VLMPipeline:
:param generation_config: generation_config
:type generation_config: GenerationConfig or a Dict
:param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
:type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:param streamer: streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped. Please, be aware that status CANCELLED is not supported and work as STOP.
:type : Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
:type : Dict
Expand All @@ -1878,7 +1878,7 @@ class VLMPipeline:
:rtype: VLMDecodedResults
"""
@typing.overload
def generate(self, prompt: str, images: openvino._pyopenvino.Tensor, generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
def generate(self, prompt: str, images: openvino._pyopenvino.Tensor, generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
"""
Generates sequences for VLMs.
Expand All @@ -1891,8 +1891,8 @@ class VLMPipeline:
:param generation_config: generation_config
:type generation_config: GenerationConfig or a Dict
:param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
:type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:param streamer: streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped. Please, be aware that status CANCELLED is not supported and work as STOP.
:type : Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
:type : Dict
Expand All @@ -1914,7 +1914,8 @@ class VLMPipeline:
image: ov.Tensor - input image,
images: List[ov.Tensor] - input images,
generation_config: GenerationConfig,
streamer: Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase - streamer either as a lambda with a boolean returning flag whether generation should be stopped
streamer: Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase - streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped.
Please, be aware that status CANCELLED is not supported and work as STOP.
:return: return results in decoded form
:rtype: VLMDecodedResults
Expand Down
9 changes: 0 additions & 9 deletions src/python/py_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,15 +316,6 @@ ov::genai::StreamerVariant pystreamer_to_streamer(const PyBindStreamerVariant& p
ov::genai::StreamerVariant streamer = std::monostate();

std::visit(overloaded {
[&streamer](const std::function<bool(py::str)>& py_callback){
// Wrap python streamer with manual utf-8 decoding. Do not rely
// on pybind automatic decoding since it raises exceptions on incomplete strings.
auto callback_wrapped = [py_callback](std::string subword) -> bool {
auto py_str = PyUnicode_DecodeUTF8(subword.data(), subword.length(), "replace");
return py_callback(py::reinterpret_borrow<py::str>(py_str));
};
streamer = callback_wrapped;
},
[&streamer](const std::function<ov::genai::StreamerRunningStatus(py::str)>& py_callback){
// Wrap python streamer with manual utf-8 decoding. Do not rely
// on pybind automatic decoding since it raises exceptions on incomplete strings.
Expand Down
2 changes: 1 addition & 1 deletion src/python/py_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace ov::genai::pybind::utils {

// When StreamerVariant is used utf-8 decoding is done by pybind and can lead to exception on incomplete texts.
// Therefore strings decoding should be handled with PyUnicode_DecodeUTF8(..., "replace") to not throw errors.
using PyBindStreamerVariant = std::variant<std::function<StreamerRunningStatus(py::str)>, std::function<bool(py::str)>, std::shared_ptr<StreamerBase>, std::monostate>;
using PyBindStreamerVariant = std::variant<std::function<StreamerRunningStatus(py::str)>, std::shared_ptr<StreamerBase>, std::monostate>;

template <class... Ts>
struct overloaded : Ts... {
Expand Down
7 changes: 4 additions & 3 deletions src/python/py_vlm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ auto vlm_generate_docstring = R"(
:param generation_config: generation_config
:type generation_config: GenerationConfig or a Dict
:param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
:type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:param streamer: streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped. Please, be aware that status CANCELLED is not supported and work as STOP.
:type : Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
:type : Dict
Expand All @@ -53,7 +53,8 @@ auto vlm_generate_kwargs_docstring = R"(
image: ov.Tensor - input image,
images: List[ov.Tensor] - input images,
generation_config: GenerationConfig,
streamer: Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase - streamer either as a lambda with a boolean returning flag whether generation should be stopped
streamer: Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase - streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped.
Please, be aware that status CANCELLED is not supported and work as STOP.
:return: return results in decoded form
:rtype: VLMDecodedResults
Expand Down

0 comments on commit 528b60f

Please sign in to comment.