diff --git a/README.md b/README.md index ce0d7f6..de3f5ec 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ contain advertisements (e.g., "Advertentie"). ], ``` -To select the most relevant articles: +The steps to select the most relevant articles and generate the output: 1. articles are selected based the filters in the config file @@ -135,30 +135,10 @@ such as ```year``` or ```decade```. This categorization is essential for subsequ 3.2. Utilize TF-IDF (the default model), which can be extended to other models. -```commandline -python3 scripts/filter_articles.py - --input-dir "path/to/converted/json/compressed/" - - --output-dir "output/" - - --input-type "delpher_kranten" - - --glob "*.gz" - - --period-type "decade" -``` -In our case: -- The input data consists of compressed JSON files with the .gz extension. -- The input type is "delpher_kranten". -- Selected articles are categorized by decade. +4. Select final articles based on criteria defined in [config.py](https://github.com/UtrechtUniversity/dataQuest/blob/main/config.json). - -#### Output -The output consists of a .csv file for each period, such as one file per decade. Each file contains the ```file_path``` and ```article_id``` of the filtered articles, -along with an additional column, ```selected```, which indicates the articles labeled as the most relevant by the model (e.g., TF-IDF). - -There are different strategies for selecting the final articles. You should specify one of the following criteria in [config.py](https://github.com/UtrechtUniversity/dataQuest/blob/main/config.json): +There are different strategies for selecting the final articles: - Percentage: Select a percentage of articles with the highest scores. @@ -190,8 +170,8 @@ There are different strategies for selecting the final articles. You should spec }, ``` +5. Generate output -### 3. Generate output As the final step of the pipeline, the text of the selected articles is saved in a .csv file, which can be used for manual labeling. The user has the option to choose whether the text should be divided into paragraphs or a segmentation of the text. This feature can be set in [config.py](https://github.com/UtrechtUniversity/dataQuest/blob/main/config.json). ```commandline @@ -206,11 +186,30 @@ OR "sentences_per_segment": 10 ``` +To run the pipeline: + ```commandline -python3 scripts/generate_output.py ---input-dir "output/output_timestamped/” ---output-dir “output/output_results/“ ---glob “*.csv” +python3 dataQuest/filter_articles.py + + --input-dir "path/to/converted/json/compressed/" + + --output-dir "output/" + + --input-type "delpher_kranten" + + --glob "*.gz" + + --period-type "decade" +``` +In our case: +- The input data consists of compressed JSON files with the .gz extension. +- The input type is "delpher_kranten". +- Selected articles are categorized by decade. + +OR + +``` +sh scripts/filter_articles.sh ``` ## About the Project **Date**: February 2024 diff --git a/dataQuest/filter_articles.py b/dataQuest/filter_articles.py index 4c8c13f..2486ef6 100644 --- a/dataQuest/filter_articles.py +++ b/dataQuest/filter_articles.py @@ -19,6 +19,7 @@ from dataQuest.utils import get_keywords_from_config from dataQuest.utils import read_config from dataQuest.article_final_selection.process_articles import select_articles +from dataQuest.generate_output import generate_output ARTICLE_SELECTOR_FIELD = "article_selector" OUTPUT_FILE_NAME = 'articles' @@ -238,6 +239,13 @@ def cli(): config_path=args.config_path, ) + generate_output( + input_dir=args.output_dir / "output_timestamped", + glob_pattern="*.csv", + config_path=args.config_path, + output_dir=args.output_dir / "results" + ) + except ValueError as e: parser.error(str(e)) except Exception as e: # pylint: disable=broad-except diff --git a/dataQuest/generate_output.py b/dataQuest/generate_output.py index bb89a8b..794a03d 100644 --- a/dataQuest/generate_output.py +++ b/dataQuest/generate_output.py @@ -1,6 +1,5 @@ """This script reads selected articles from CSV files, and saves their text for manual labeling""" -import argparse import logging from pathlib import Path from typing import Union @@ -139,53 +138,3 @@ def generate_output( df.to_csv(output_file, index=False) except Exception as e: # pylint: disable=broad-except logging.error("Error processing file %s: %s", articles_filepath, str(e)) - - -def cli(): - """ - Command-line interface for generating final output. - """ - parser = argparse.ArgumentParser("Select final articles.") - - parser.add_argument( - "--input-dir", - type=Path, - required=True, - help="Base directory for reading input files.", - ) - parser.add_argument( - "--glob", - type=str, - default="*.csv", - help="Glob pattern for find input files; e.g. '*.csv'.", - ) - parser.add_argument( - "--config-path", - type=Path, - default="config.json", - help="File path of config file.", - ) - parser.add_argument( - "--output-dir", - type=Path, - required=True, - help="The directory for storing output files.", - ) - - args = parser.parse_args() - - try: - generate_output( - input_dir=args.input_dir, - glob_pattern=args.glob, - config_path=args.config_path, - output_dir=args.output_dir - ) - except ValueError as e: - parser.error(str(e)) - except Exception as e: # pylint: disable=broad-except - logging.error("Error occurred in CLI: %s", str(e)) - - -if __name__ == "__main__": - cli() diff --git a/example/config.json b/example/config.json new file mode 100644 index 0000000..41ff464 --- /dev/null +++ b/example/config.json @@ -0,0 +1,33 @@ +{ + "filters": [ + { + "type": "AndFilter", + "filters": [ + { + "type": "YearFilter", + "start_year": 1800, + "end_year": 1910 + }, + { + "type": "NotFilter", + "filter": { + "type": "ArticleTitleFilter", + "article_title": "Advertentie" + }, + "level": "article" + }, + { + "type": "KeywordsFilter", + "keywords": ["dames", "liberalen"] + } + ] + } + ], + "article_selector": + { + "type": "percentage", + "value": "30" + }, + "output_unit": "segmented_text", + "sentences_per_segment": 10 +} diff --git a/example/data/KRANTEN_KBPERS01_000002100.json.gz b/example/data/KRANTEN_KBPERS01_000002100.json.gz new file mode 100644 index 0000000..46a061e Binary files /dev/null and b/example/data/KRANTEN_KBPERS01_000002100.json.gz differ diff --git a/example/data/KRANTEN_KBPERS01_000002200.json.gz b/example/data/KRANTEN_KBPERS01_000002200.json.gz new file mode 100644 index 0000000..43053fb Binary files /dev/null and b/example/data/KRANTEN_KBPERS01_000002200.json.gz differ diff --git a/example/data/KRANTEN_KBPERS01_000003100.json.gz b/example/data/KRANTEN_KBPERS01_000003100.json.gz new file mode 100644 index 0000000..e63d827 Binary files /dev/null and b/example/data/KRANTEN_KBPERS01_000003100.json.gz differ diff --git a/example/getting_started.ipynb b/example/getting_started.ipynb new file mode 100644 index 0000000..7418d8e --- /dev/null +++ b/example/getting_started.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7070b655-e16c-4b29-9a96-8a55055ebc34", + "metadata": {}, + "source": [ + "# dataQuest pipeline\n", + "\n", + "This notebook illustrates the complete pipeline of dataQuest, from defining keywords and other metadata to selecting final articles and generating output.\n", + "\n", + "## Step0: Install dataQuest package" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cd6b3982-49cd-4150-93f3-e9a55210bec5", + "metadata": {}, + "outputs": [], + "source": [ + "# Run the following line to install dataQuest\n", + "# %pip install dataQuest" + ] + }, + { + "cell_type": "markdown", + "id": "f4f89a52-dcc3-42cb-8631-47d212118733", + "metadata": {}, + "source": [ + "## Step1: Convert your corpus to the expected json format\n", + "\n", + "The expected format is a set of JSON files compressed in the .gz format. Each JSON file contains metadata related to a newsletter, magazine, etc., as well as a list of article titles and their corresponding bodies. These files may be organized within different folders or sub-folders.\n", + "Below is a snapshot of the JSON file format:\n", + "\n", + "```commandline\n", + "{\n", + " \"newsletter_metadata\": {\n", + " \"title\": \"Newspaper title ..\",\n", + " \"language\": \"NL\",\n", + " \"date\": \"1878-04-29\",\n", + " ...\n", + " },\n", + " \"articles\": {\n", + " \"1\": {\n", + " \"title\": \"title of article1 \",\n", + " \"body\": [\n", + " \"paragraph 1 ....\",\n", + " \"paragraph 2....\"\n", + " ]\n", + " },\n", + " \"2\": {\n", + " \"title\": \"title of article2\",\n", + " \"body\": [\n", + " \"text...\" \n", + " ]\n", + " }\n", + " }\n", + "} \n", + "```\n", + "\n", + "You can find a sample of data in [data](https://github.com/UtrechtUniversity/dataQuest/blob/main/example/data/).\n" + ] + }, + { + "cell_type": "markdown", + "id": "19685342-cb9f-4439-a2fb-0f22960a94ae", + "metadata": {}, + "source": [ + "## Step2: Create a config file \n", + "\n", + "Create a config file to include the followings:\n", + "- filters\n", + "- criteria to select final articles\n", + "- output format\n", + "\n", + "```\n", + "{\n", + " \"filters\": [\n", + " {\n", + " \"type\": \"AndFilter\",\n", + " \"filters\": [\n", + " {\n", + " \"type\": \"YearFilter\",\n", + " \"start_year\": 1800,\n", + " \"end_year\": 1910\n", + " },\n", + " {\n", + " \"type\": \"NotFilter\",\n", + " \"filter\": {\n", + " \"type\": \"ArticleTitleFilter\",\n", + " \"article_title\": \"Advertentie\"\n", + " },\n", + " \"level\": \"article\"\n", + " },\n", + " {\n", + " \"type\": \"KeywordsFilter\",\n", + " \"keywords\": [\"dames\", \"liberalen\"]\n", + " }\n", + " ]\n", + " }\n", + " ],\n", + " \"article_selector\":\n", + " {\n", + " \"type\": \"percentage\",\n", + " \"value\": \"30\"\n", + " },\n", + " \"output_unit\": \"segmented_text\",\n", + " \"sentences_per_segment\": 10\n", + "}\n", + "```\n", + "\n", + "You can find a sample of [config.json](https://github.com/UtrechtUniversity/dataQuest/blob/main/example/config.json)" + ] + }, + { + "cell_type": "markdown", + "id": "d7f423b2-4a94-409c-bbc0-ec9248cfa838", + "metadata": {}, + "source": [ + "## Step3: Run the pipeline\n", + "Run the following command:\n", + "\n", + "```\n", + "filter-articles\n", + "--input-dir \"data/\"\n", + "--output-dir \"output/\"\n", + "--input-type \"delpher_kranten\"\n", + "--glob \"*.gz\"\n", + "--config-path \"config.json\"\n", + "--period-type \"decade\"\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee3390dd-4e89-4a8f-90aa-0f7fe4a72bb7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/scripts/filter_articles.py b/scripts/filter_articles.py deleted file mode 100644 index 4c8c13f..0000000 --- a/scripts/filter_articles.py +++ /dev/null @@ -1,248 +0,0 @@ -""" -This script filter articles from input files according to -specified configurations. -""" - -import argparse -import logging -from pathlib import Path -from typing import Iterable, List -import pandas as pd -from tqdm import tqdm - -from dataQuest.filter import INPUT_FILE_TYPES -from dataQuest.filter.input_file import InputFile -from dataQuest.utils import load_filters_from_config -from dataQuest.utils import save_filtered_articles -from dataQuest.temporal_categorization import PERIOD_TYPES -from dataQuest.temporal_categorization.timestamped_data import TimestampedData -from dataQuest.utils import get_keywords_from_config -from dataQuest.utils import read_config -from dataQuest.article_final_selection.process_articles import select_articles - -ARTICLE_SELECTOR_FIELD = "article_selector" -OUTPUT_FILE_NAME = 'articles' -FILENAME_COLUMN = 'file_path' -ARTICLE_ID_COLUMN = 'article_id' - - -def filter_articles( - input_dir: Path, - glob_pattern: str, - config_path: Path, - input_type: str, - output_dir: Path, -): - """ - Core functionality to process files, filter articles, and save results. - - Args: - input_dir (Path): Directory containing input files. - glob_pattern (str): Glob pattern to match input files. - config_path (Path): Path to the configuration file. - input_type (str): File format of the input files. - output_dir (Path): Directory to save filtered articles. - """ - if not input_dir.is_dir(): - raise ValueError(f"Not a directory: '{str(input_dir.absolute())}'") - - input_file_class = INPUT_FILE_TYPES[input_type] - input_files: Iterable[InputFile] = [ - input_file_class(path) for path in input_dir.rglob(glob_pattern) - ] - - output_dir.mkdir(parents=True, exist_ok=True) - - compound_filter = load_filters_from_config(config_path) - - for input_file in tqdm(input_files, desc="Filtering articles", unit="file"): - for article in input_file.selected_articles(compound_filter): - save_filtered_articles(input_file, article.id, output_dir) - - -def categorize_articles( - input_dir: Path, - period_type: str, - glob_pattern: str, - output_dir: Path, -): - """ - Core functionality to categorize articles by timestamp. - - Args: - input_dir (Path): Directory containing input files. - period_type (str): Type of time period to use for categorization. - glob_pattern (str): Glob pattern to find input files (e.g., '*.json'). - output_dir (Path): Directory to save categorized files. - """ - if not input_dir.is_dir(): - raise ValueError(f"Not a directory: '{str(input_dir.absolute())}'") - - time_period_class = PERIOD_TYPES[period_type] - timestamped_objects: Iterable[TimestampedData] = [ - time_period_class(path) for path in input_dir.rglob(glob_pattern) - ] - - output_dir.mkdir(parents=True, exist_ok=True) - - for timestamped_object in tqdm(timestamped_objects, - desc="Categorize by timestamp", - unit="file"): - try: - timestamp = timestamped_object.categorize() - timestamp_file_name = output_dir / f"{OUTPUT_FILE_NAME}_{timestamp}.csv" - - if timestamp_file_name.exists(): - df = pd.read_csv(timestamp_file_name) - else: - df = pd.DataFrame(columns=[FILENAME_COLUMN, ARTICLE_ID_COLUMN]) - - new_row = { - FILENAME_COLUMN: str(timestamped_object.data()[FILENAME_COLUMN]), - ARTICLE_ID_COLUMN: str(timestamped_object.data()[ARTICLE_ID_COLUMN]), - } - df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) - - df.to_csv(timestamp_file_name, index=False) - - except Exception as e: # pylint: disable=broad-except - logging.error("Error processing timestamped object: %s", str(e)) - - -def update_selected_indices_in_file(filepath: str, - indices_selected: List[int]) -> None: - """ - Update selected indices in a CSV file. - - Args: - filepath (str): The path to the CSV file. - indices_selected (List[int]): A list of indices to be marked - as selected. - - Raises: - ValueError: If indices_selected is empty or contains - non-negative integers. - - """ - try: - if indices_selected and all(isinstance(idx, int) and idx >= 0 - for idx in indices_selected): - df = pd.read_csv(filepath) - df['selected'] = 0 - df.loc[indices_selected, 'selected'] = 1 - df.to_csv(filepath, index=False) - else: - raise ValueError("Invalid indices_selected") - except Exception as e: # pylint: disable=W0718 - logging.error("Error updating selected indices in file: %s", - e) - - -def select_final_articles( - input_dir: Path, - glob_pattern: str, - config_path: Path, -): - """ - Core functionality to select final articles based on keywords and configuration. - - Args: - input_dir (Path): Directory containing input files. - glob_pattern (str): Glob pattern to match input files (e.g., '*.csv'). - config_path (Path): Path to the configuration file. - """ - if not input_dir.is_dir(): - raise ValueError(f"Not a directory: '{str(input_dir.absolute())}'") - - keywords = get_keywords_from_config(config_path) - config_article_selector = read_config(config_path, ARTICLE_SELECTOR_FIELD) - - if len(keywords) > 0 and config_article_selector: - for articles_filepath in tqdm( - input_dir.rglob(glob_pattern), - desc="Processing articles", - unit="file", - ): - try: - selected_indices = select_articles( - str(articles_filepath), keywords, config_article_selector - ) - - update_selected_indices_in_file(str(articles_filepath), selected_indices) - except Exception as e: # pylint: disable=broad-except - logging.error("Error processing file %s: %s", articles_filepath, str(e)) - - -def cli(): - """ - Command-line interface for filter articles. - """ - parser = argparse.ArgumentParser("Filter articles from input files.") - - parser.add_argument( - "--input-dir", - type=Path, - help="Base directory for reading input files. ", - ) - parser.add_argument( - "--glob", - type=str, - required=True, - help="Glob pattern for find input files; e.g. '*.gz' ", - ) - parser.add_argument( - "--config-path", - type=Path, - default="config.json", - help="File path of config file.", - ) - parser.add_argument( - "--input-type", - type=str, - required=True, - choices=list(INPUT_FILE_TYPES.keys()), - help="Input file format.", - ) - parser.add_argument( - "--output-dir", - type=Path, - help="The directory for storing output files.", - ) - parser.add_argument( - "--period-type", - type=str, - required=True, - choices=list(PERIOD_TYPES.keys()), - help="Time periods", - ) - args = parser.parse_args() - - try: - filter_articles( - input_dir=args.input_dir, - glob_pattern=args.glob, - config_path=args.config_path, - input_type=args.input_type, - output_dir=args.output_dir / "output_filter", - ) - categorize_articles( - input_dir=args.output_dir / "output_filter", - period_type=args.period_type, - glob_pattern="*.json", - output_dir=args.output_dir / "output_timestamped", - ) - - select_final_articles( - input_dir=args.output_dir / "output_timestamped", - glob_pattern="*.csv", - config_path=args.config_path, - ) - - except ValueError as e: - parser.error(str(e)) - except Exception as e: # pylint: disable=broad-except - logging.error("Error occurred in CLI: %s", str(e)) - - -if __name__ == "__main__": - cli() diff --git a/scripts/filter_articles.sh b/scripts/filter_articles.sh new file mode 100644 index 0000000..765f21f --- /dev/null +++ b/scripts/filter_articles.sh @@ -0,0 +1 @@ +filter-articles --input-dir "../dataQuest_data/transfered_data" --output-dir "../output/" --input-type "delpher_kranten" --glob "*.gz" --config-path "config.json" --period-type "decade" \ No newline at end of file diff --git a/scripts/generate_output.py b/scripts/generate_output.py deleted file mode 100644 index bb89a8b..0000000 --- a/scripts/generate_output.py +++ /dev/null @@ -1,191 +0,0 @@ -"""This script reads selected articles from CSV files, -and saves their text for manual labeling""" -import argparse -import logging -from pathlib import Path -from typing import Union -import pandas as pd -from pandas import DataFrame -from spacy.language import Language -from dataQuest.settings import SPACY_MODEL -from dataQuest.article_final_selection.process_article import ArticleProcessor -from dataQuest.utils import read_config, get_file_name_without_extension -from dataQuest.output_generator.text_formater import (TextFormatter, - SEGMENTED_TEXT_FORMATTER) - - -FILE_PATH_FIELD = "file_path" -TITLE_FIELD = "title" -ARTICLE_ID_FIELD = "article_id" -BODY_FIELD = "body" -LABEL_FIELD = "label" -SELECTED_FIELD = "selected" -DATE_FIELD = "date" - -OUTPUT_UNIT_KEY = "output_unit" -SENTENCE_PER_SEGMENT_KEY = "sentences_per_segment" - - -def read_article(row: pd.Series, formatter: TextFormatter) -> DataFrame: - """ - Read article from row and return DataFrame of articles. - - Args: - row (pd.Series): A row from a DataFrame. - formatter (TextFormatter): An object of TextFormatter to format - output text. Defaults to False. - - Returns: - DataFrame: DataFrame containing article information. - """ - file_path = row[FILE_PATH_FIELD] - article_id = row[ARTICLE_ID_FIELD] - article_processor = ArticleProcessor(file_path, article_id) - title, body, date = article_processor.read_article_from_gzip() - - body_formatted = formatter.format_output(body) - - dates = [date] * len(body_formatted) \ - if ((not formatter.is_fulltext) and body_formatted is not None) \ - else [date] - titles = [title] * len(body_formatted) \ - if ((not formatter.is_fulltext) and body_formatted is not None) \ - else [title] - files_path = [file_path] * len(body_formatted) \ - if ((not formatter.is_fulltext) and body_formatted is not None) \ - else [file_path] - articles_id = ([article_id] * len(body_formatted)) \ - if (not formatter.is_fulltext) and body_formatted is not None \ - else [article_id] - label = [''] * len(body_formatted) \ - if (not formatter.is_fulltext) and body_formatted is not None \ - else [''] - return pd.DataFrame({FILE_PATH_FIELD: files_path, - DATE_FIELD: dates, - ARTICLE_ID_FIELD: articles_id, - TITLE_FIELD: titles, - BODY_FIELD: body_formatted, - LABEL_FIELD: label}) - - -def find_articles_in_file(filepath: str, formatter: TextFormatter) -> ( - Union)[DataFrame, None]: - """ - Find selected articles in a CSV file and return DataFrame of articles. - - Args: - filepath (str): Path to the CSV file. - formatter (TextFormatter): An object of TextFormatter to format - output text. - - Returns: - DataFrame: DataFrame containing selected articles information. - """ - try: - df_articles = pd.read_csv(filepath) - df_selected = df_articles.loc[df_articles[SELECTED_FIELD] == 1] - - result = pd.concat([read_article(row, formatter) - for _, row in df_selected.iterrows()], - axis=0, ignore_index=True) - return result - except Exception as e: # pylint: disable=W0718 - logging.error("Error reading selected indices in file: %s", e) - return None - - -def generate_output( - input_dir: Path, - glob_pattern: str, - config_path: Path, - output_dir: Path, - spacy_model: Union[str, Language] = SPACY_MODEL, -): - """ - Core functionality to select final articles and save them to output files. - - Args: - input_dir (Path): Directory containing input files. - glob_pattern (str): Glob pattern to find input files (e.g., '*.csv'). - config_path (Path): Path to the configuration file. - output_dir (Path): Directory to save output files. - spacy_model (Union[str, Language]): SpaCy model to use for text processing. - """ - if not input_dir.is_dir(): - raise ValueError(f"Not a directory: '{str(input_dir.absolute())}'") - - output_dir.mkdir(parents=True, exist_ok=True) - - output_unit = read_config(config_path, OUTPUT_UNIT_KEY) - sentences_per_segment = '0' - - if output_unit == SEGMENTED_TEXT_FORMATTER: - sentences_per_segment = str(read_config(config_path, SENTENCE_PER_SEGMENT_KEY)) - - text_formatter = TextFormatter( - str(output_unit), - int(sentences_per_segment), - spacy_model=spacy_model, - ) - - for articles_filepath in input_dir.rglob(glob_pattern): - try: - df = find_articles_in_file(str(articles_filepath), text_formatter) - if df is None: - continue - - file_name = get_file_name_without_extension(str(articles_filepath)) - output_file = output_dir / f"to_label_{file_name}.csv" - df.to_csv(output_file, index=False) - except Exception as e: # pylint: disable=broad-except - logging.error("Error processing file %s: %s", articles_filepath, str(e)) - - -def cli(): - """ - Command-line interface for generating final output. - """ - parser = argparse.ArgumentParser("Select final articles.") - - parser.add_argument( - "--input-dir", - type=Path, - required=True, - help="Base directory for reading input files.", - ) - parser.add_argument( - "--glob", - type=str, - default="*.csv", - help="Glob pattern for find input files; e.g. '*.csv'.", - ) - parser.add_argument( - "--config-path", - type=Path, - default="config.json", - help="File path of config file.", - ) - parser.add_argument( - "--output-dir", - type=Path, - required=True, - help="The directory for storing output files.", - ) - - args = parser.parse_args() - - try: - generate_output( - input_dir=args.input_dir, - glob_pattern=args.glob, - config_path=args.config_path, - output_dir=args.output_dir - ) - except ValueError as e: - parser.error(str(e)) - except Exception as e: # pylint: disable=broad-except - logging.error("Error occurred in CLI: %s", str(e)) - - -if __name__ == "__main__": - cli()