diff --git a/README.md b/README.md
index ce0d7f6..de3f5ec 100644
--- a/README.md
+++ b/README.md
@@ -121,7 +121,7 @@ contain advertisements (e.g., "Advertentie").
  ],
 
 ```
-To select the most relevant articles:
+The steps to select the most relevant articles and generate the output:
 1. articles are selected based the filters in the config file 
 
 
@@ -135,30 +135,10 @@ such as ```year``` or ```decade```. This categorization is essential for subsequ
    
    3.2. Utilize TF-IDF (the default model), which can be extended to other models.
 
-```commandline
-python3 scripts/filter_articles.py 
 
-    --input-dir "path/to/converted/json/compressed/" 
-    
-    --output-dir "output/" 
-    
-    --input-type "delpher_kranten" 
-    
-    --glob "*.gz"
-    
-    --period-type "decade"
-```
-In our case:
-- The input data consists of compressed JSON files with the .gz extension. 
-- The input type is "delpher_kranten". 
-- Selected articles are categorized by decade.
+4. Select final articles based on criteria defined in [config.py](https://github.com/UtrechtUniversity/dataQuest/blob/main/config.json). 
 
-
-#### Output
-The output consists of a .csv file for each period, such as one file per decade. Each file contains the ```file_path``` and ```article_id``` of the filtered articles, 
-along with an additional column, ```selected```, which indicates the articles labeled as the most relevant by the model (e.g., TF-IDF).
-
-There are different strategies for selecting the final articles. You should specify one of the following criteria in [config.py](https://github.com/UtrechtUniversity/dataQuest/blob/main/config.json):
+There are different strategies for selecting the final articles:
 
 - Percentage: Select a percentage of articles with the highest scores.
 
@@ -190,8 +170,8 @@ There are different strategies for selecting the final articles. You should spec
     }, 
 ```
 
+5. Generate output 
 
-### 3. Generate output
 As the final step of the pipeline, the text of the selected articles is saved in a .csv file, which can be used for manual labeling. The user has the option to choose whether the text should be divided into paragraphs or a segmentation of the text.
 This feature can be set in [config.py](https://github.com/UtrechtUniversity/dataQuest/blob/main/config.json).
 ```commandline
@@ -206,11 +186,30 @@ OR
 "sentences_per_segment": 10
 ```
 
+To run the pipeline:
+
 ```commandline
-python3 scripts/generate_output.py 
---input-dir "output/output_timestamped/” 
---output-dir “output/output_results/“  
---glob “*.csv”
+python3 dataQuest/filter_articles.py 
+
+    --input-dir "path/to/converted/json/compressed/" 
+    
+    --output-dir "output/" 
+    
+    --input-type "delpher_kranten" 
+    
+    --glob "*.gz"
+    
+    --period-type "decade"
+```
+In our case:
+- The input data consists of compressed JSON files with the .gz extension. 
+- The input type is "delpher_kranten". 
+- Selected articles are categorized by decade.
+
+OR
+
+```
+sh scripts/filter_articles.sh
 ```
 ## About the Project
 **Date**: February 2024
diff --git a/dataQuest/filter_articles.py b/dataQuest/filter_articles.py
index 4c8c13f..2486ef6 100644
--- a/dataQuest/filter_articles.py
+++ b/dataQuest/filter_articles.py
@@ -19,6 +19,7 @@
 from dataQuest.utils import get_keywords_from_config
 from dataQuest.utils import read_config
 from dataQuest.article_final_selection.process_articles import select_articles
+from dataQuest.generate_output import generate_output
 
 ARTICLE_SELECTOR_FIELD = "article_selector"
 OUTPUT_FILE_NAME = 'articles'
@@ -238,6 +239,13 @@ def cli():
             config_path=args.config_path,
         )
 
+        generate_output(
+                    input_dir=args.output_dir / "output_timestamped",
+                    glob_pattern="*.csv",
+                    config_path=args.config_path,
+                    output_dir=args.output_dir / "results"
+        )
+
     except ValueError as e:
         parser.error(str(e))
     except Exception as e:  # pylint: disable=broad-except
diff --git a/dataQuest/generate_output.py b/dataQuest/generate_output.py
index bb89a8b..794a03d 100644
--- a/dataQuest/generate_output.py
+++ b/dataQuest/generate_output.py
@@ -1,6 +1,5 @@
 """This script reads selected articles from CSV files,
 and saves their text for manual labeling"""
-import argparse
 import logging
 from pathlib import Path
 from typing import Union
@@ -139,53 +138,3 @@ def generate_output(
             df.to_csv(output_file, index=False)
         except Exception as e:  # pylint: disable=broad-except
             logging.error("Error processing file %s: %s", articles_filepath, str(e))
-
-
-def cli():
-    """
-        Command-line interface for generating final output.
-    """
-    parser = argparse.ArgumentParser("Select final articles.")
-
-    parser.add_argument(
-        "--input-dir",
-        type=Path,
-        required=True,
-        help="Base directory for reading input files.",
-    )
-    parser.add_argument(
-        "--glob",
-        type=str,
-        default="*.csv",
-        help="Glob pattern for find input files; e.g. '*.csv'.",
-    )
-    parser.add_argument(
-        "--config-path",
-        type=Path,
-        default="config.json",
-        help="File path of config file.",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=Path,
-        required=True,
-        help="The directory for storing output files.",
-    )
-
-    args = parser.parse_args()
-
-    try:
-        generate_output(
-            input_dir=args.input_dir,
-            glob_pattern=args.glob,
-            config_path=args.config_path,
-            output_dir=args.output_dir
-        )
-    except ValueError as e:
-        parser.error(str(e))
-    except Exception as e:  # pylint: disable=broad-except
-        logging.error("Error occurred in CLI: %s", str(e))
-
-
-if __name__ == "__main__":
-    cli()
diff --git a/example/config.json b/example/config.json
new file mode 100644
index 0000000..41ff464
--- /dev/null
+++ b/example/config.json
@@ -0,0 +1,33 @@
+{
+ "filters": [
+        {
+            "type": "AndFilter",
+                "filters": [
+                        {
+                            "type": "YearFilter",
+                            "start_year": 1800,
+                            "end_year": 1910
+                        },
+                        {
+                            "type": "NotFilter",
+                            "filter": {
+                                "type": "ArticleTitleFilter",
+                                "article_title": "Advertentie"
+                            },
+                            "level": "article"
+                        },
+                        {
+                            "type": "KeywordsFilter",
+                            "keywords": ["dames", "liberalen"]
+                        }
+                ]
+        }
+ ],
+  "article_selector":
+    {
+      "type": "percentage",
+      "value": "30"
+    },
+  "output_unit": "segmented_text",
+  "sentences_per_segment": 10
+}
diff --git a/example/data/KRANTEN_KBPERS01_000002100.json.gz b/example/data/KRANTEN_KBPERS01_000002100.json.gz
new file mode 100644
index 0000000..46a061e
Binary files /dev/null and b/example/data/KRANTEN_KBPERS01_000002100.json.gz differ
diff --git a/example/data/KRANTEN_KBPERS01_000002200.json.gz b/example/data/KRANTEN_KBPERS01_000002200.json.gz
new file mode 100644
index 0000000..43053fb
Binary files /dev/null and b/example/data/KRANTEN_KBPERS01_000002200.json.gz differ
diff --git a/example/data/KRANTEN_KBPERS01_000003100.json.gz b/example/data/KRANTEN_KBPERS01_000003100.json.gz
new file mode 100644
index 0000000..e63d827
Binary files /dev/null and b/example/data/KRANTEN_KBPERS01_000003100.json.gz differ
diff --git a/example/getting_started.ipynb b/example/getting_started.ipynb
new file mode 100644
index 0000000..7418d8e
--- /dev/null
+++ b/example/getting_started.ipynb
@@ -0,0 +1,165 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "7070b655-e16c-4b29-9a96-8a55055ebc34",
+   "metadata": {},
+   "source": [
+    "# dataQuest pipeline\n",
+    "\n",
+    "This notebook illustrates the complete pipeline of dataQuest, from defining keywords and other metadata to selecting final articles and generating output.\n",
+    "\n",
+    "## Step0: Install dataQuest package"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "cd6b3982-49cd-4150-93f3-e9a55210bec5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run the following line to install dataQuest\n",
+    "# %pip install dataQuest"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f4f89a52-dcc3-42cb-8631-47d212118733",
+   "metadata": {},
+   "source": [
+    "## Step1: Convert your corpus to the expected json format\n",
+    "\n",
+    "The expected format is a set of JSON files compressed in the .gz format. Each JSON file contains metadata related to a newsletter, magazine, etc., as well as a list of article titles and their corresponding bodies. These files may be organized within different folders or sub-folders.\n",
+    "Below is a snapshot of the JSON file format:\n",
+    "\n",
+    "```commandline\n",
+    "{\n",
+    "    \"newsletter_metadata\": {\n",
+    "        \"title\": \"Newspaper title ..\",\n",
+    "        \"language\": \"NL\",\n",
+    "        \"date\": \"1878-04-29\",\n",
+    "        ...\n",
+    "    },\n",
+    "    \"articles\": {\n",
+    "        \"1\": {\n",
+    "            \"title\": \"title of article1 \",\n",
+    "            \"body\": [\n",
+    "                \"paragraph 1 ....\",\n",
+    "                \"paragraph 2....\"\n",
+    "            ]\n",
+    "        },\n",
+    "        \"2\": {\n",
+    "            \"title\": \"title of article2\",\n",
+    "            \"body\": [\n",
+    "                \"text...\"  \n",
+    "             ]\n",
+    "        }\n",
+    "    }\n",
+    "}    \n",
+    "```\n",
+    "\n",
+    "You can find a sample of data in [data](https://github.com/UtrechtUniversity/dataQuest/blob/main/example/data/).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "19685342-cb9f-4439-a2fb-0f22960a94ae",
+   "metadata": {},
+   "source": [
+    "## Step2: Create a config file \n",
+    "\n",
+    "Create a config file to include the followings:\n",
+    "- filters\n",
+    "- criteria to select final articles\n",
+    "- output format\n",
+    "\n",
+    "```\n",
+    "{\n",
+    " \"filters\": [\n",
+    "        {\n",
+    "            \"type\": \"AndFilter\",\n",
+    "                \"filters\": [\n",
+    "                        {\n",
+    "                            \"type\": \"YearFilter\",\n",
+    "                            \"start_year\": 1800,\n",
+    "                            \"end_year\": 1910\n",
+    "                        },\n",
+    "                        {\n",
+    "                            \"type\": \"NotFilter\",\n",
+    "                            \"filter\": {\n",
+    "                                \"type\": \"ArticleTitleFilter\",\n",
+    "                                \"article_title\": \"Advertentie\"\n",
+    "                            },\n",
+    "                            \"level\": \"article\"\n",
+    "                        },\n",
+    "                        {\n",
+    "                            \"type\": \"KeywordsFilter\",\n",
+    "                            \"keywords\": [\"dames\", \"liberalen\"]\n",
+    "                        }\n",
+    "                ]\n",
+    "        }\n",
+    " ],\n",
+    "  \"article_selector\":\n",
+    "    {\n",
+    "      \"type\": \"percentage\",\n",
+    "      \"value\": \"30\"\n",
+    "    },\n",
+    "  \"output_unit\": \"segmented_text\",\n",
+    "  \"sentences_per_segment\": 10\n",
+    "}\n",
+    "```\n",
+    "\n",
+    "You can find a sample of [config.json](https://github.com/UtrechtUniversity/dataQuest/blob/main/example/config.json)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d7f423b2-4a94-409c-bbc0-ec9248cfa838",
+   "metadata": {},
+   "source": [
+    "## Step3: Run the pipeline\n",
+    "Run the following command:\n",
+    "\n",
+    "```\n",
+    "filter-articles\n",
+    "--input-dir \"data/\"\n",
+    "--output-dir \"output/\"\n",
+    "--input-type \"delpher_kranten\"\n",
+    "--glob \"*.gz\"\n",
+    "--config-path \"config.json\"\n",
+    "--period-type \"decade\"\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee3390dd-4e89-4a8f-90aa-0f7fe4a72bb7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/scripts/filter_articles.py b/scripts/filter_articles.py
deleted file mode 100644
index 4c8c13f..0000000
--- a/scripts/filter_articles.py
+++ /dev/null
@@ -1,248 +0,0 @@
-"""
-This script filter articles from input files according to
-specified configurations.
-"""
-
-import argparse
-import logging
-from pathlib import Path
-from typing import Iterable, List
-import pandas as pd
-from tqdm import tqdm
-
-from dataQuest.filter import INPUT_FILE_TYPES
-from dataQuest.filter.input_file import InputFile
-from dataQuest.utils import load_filters_from_config
-from dataQuest.utils import save_filtered_articles
-from dataQuest.temporal_categorization import PERIOD_TYPES
-from dataQuest.temporal_categorization.timestamped_data import TimestampedData
-from dataQuest.utils import get_keywords_from_config
-from dataQuest.utils import read_config
-from dataQuest.article_final_selection.process_articles import select_articles
-
-ARTICLE_SELECTOR_FIELD = "article_selector"
-OUTPUT_FILE_NAME = 'articles'
-FILENAME_COLUMN = 'file_path'
-ARTICLE_ID_COLUMN = 'article_id'
-
-
-def filter_articles(
-    input_dir: Path,
-    glob_pattern: str,
-    config_path: Path,
-    input_type: str,
-    output_dir: Path,
-):
-    """
-    Core functionality to process files, filter articles, and save results.
-
-    Args:
-        input_dir (Path): Directory containing input files.
-        glob_pattern (str): Glob pattern to match input files.
-        config_path (Path): Path to the configuration file.
-        input_type (str): File format of the input files.
-        output_dir (Path): Directory to save filtered articles.
-    """
-    if not input_dir.is_dir():
-        raise ValueError(f"Not a directory: '{str(input_dir.absolute())}'")
-
-    input_file_class = INPUT_FILE_TYPES[input_type]
-    input_files: Iterable[InputFile] = [
-        input_file_class(path) for path in input_dir.rglob(glob_pattern)
-    ]
-
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    compound_filter = load_filters_from_config(config_path)
-
-    for input_file in tqdm(input_files, desc="Filtering articles", unit="file"):
-        for article in input_file.selected_articles(compound_filter):
-            save_filtered_articles(input_file, article.id, output_dir)
-
-
-def categorize_articles(
-    input_dir: Path,
-    period_type: str,
-    glob_pattern: str,
-    output_dir: Path,
-):
-    """
-    Core functionality to categorize articles by timestamp.
-
-    Args:
-        input_dir (Path): Directory containing input files.
-        period_type (str): Type of time period to use for categorization.
-        glob_pattern (str): Glob pattern to find input files (e.g., '*.json').
-        output_dir (Path): Directory to save categorized files.
-    """
-    if not input_dir.is_dir():
-        raise ValueError(f"Not a directory: '{str(input_dir.absolute())}'")
-
-    time_period_class = PERIOD_TYPES[period_type]
-    timestamped_objects: Iterable[TimestampedData] = [
-        time_period_class(path) for path in input_dir.rglob(glob_pattern)
-    ]
-
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    for timestamped_object in tqdm(timestamped_objects,
-                                   desc="Categorize by timestamp",
-                                   unit="file"):
-        try:
-            timestamp = timestamped_object.categorize()
-            timestamp_file_name = output_dir / f"{OUTPUT_FILE_NAME}_{timestamp}.csv"
-
-            if timestamp_file_name.exists():
-                df = pd.read_csv(timestamp_file_name)
-            else:
-                df = pd.DataFrame(columns=[FILENAME_COLUMN, ARTICLE_ID_COLUMN])
-
-            new_row = {
-                FILENAME_COLUMN: str(timestamped_object.data()[FILENAME_COLUMN]),
-                ARTICLE_ID_COLUMN: str(timestamped_object.data()[ARTICLE_ID_COLUMN]),
-            }
-            df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
-
-            df.to_csv(timestamp_file_name, index=False)
-
-        except Exception as e:  # pylint: disable=broad-except
-            logging.error("Error processing timestamped object: %s", str(e))
-
-
-def update_selected_indices_in_file(filepath: str,
-                                    indices_selected: List[int]) -> None:
-    """
-    Update selected indices in a CSV file.
-
-    Args:
-        filepath (str): The path to the CSV file.
-        indices_selected (List[int]): A list of indices to be marked
-        as selected.
-
-    Raises:
-        ValueError: If indices_selected is empty or contains
-        non-negative integers.
-
-    """
-    try:
-        if indices_selected and all(isinstance(idx, int) and idx >= 0
-                                    for idx in indices_selected):
-            df = pd.read_csv(filepath)
-            df['selected'] = 0
-            df.loc[indices_selected, 'selected'] = 1
-            df.to_csv(filepath, index=False)
-        else:
-            raise ValueError("Invalid indices_selected")
-    except Exception as e:  # pylint: disable=W0718
-        logging.error("Error updating selected indices in file: %s",
-                      e)
-
-
-def select_final_articles(
-    input_dir: Path,
-    glob_pattern: str,
-    config_path: Path,
-):
-    """
-    Core functionality to select final articles based on keywords and configuration.
-
-    Args:
-        input_dir (Path): Directory containing input files.
-        glob_pattern (str): Glob pattern to match input files (e.g., '*.csv').
-        config_path (Path): Path to the configuration file.
-    """
-    if not input_dir.is_dir():
-        raise ValueError(f"Not a directory: '{str(input_dir.absolute())}'")
-
-    keywords = get_keywords_from_config(config_path)
-    config_article_selector = read_config(config_path, ARTICLE_SELECTOR_FIELD)
-
-    if len(keywords) > 0 and config_article_selector:
-        for articles_filepath in tqdm(
-            input_dir.rglob(glob_pattern),
-            desc="Processing articles",
-            unit="file",
-        ):
-            try:
-                selected_indices = select_articles(
-                    str(articles_filepath), keywords, config_article_selector
-                )
-
-                update_selected_indices_in_file(str(articles_filepath), selected_indices)
-            except Exception as e:  # pylint: disable=broad-except
-                logging.error("Error processing file %s: %s", articles_filepath, str(e))
-
-
-def cli():
-    """
-        Command-line interface for filter articles.
-    """
-    parser = argparse.ArgumentParser("Filter articles from input files.")
-
-    parser.add_argument(
-        "--input-dir",
-        type=Path,
-        help="Base directory for reading input files. ",
-    )
-    parser.add_argument(
-        "--glob",
-        type=str,
-        required=True,
-        help="Glob pattern for find input files; e.g. '*.gz' ",
-    )
-    parser.add_argument(
-        "--config-path",
-        type=Path,
-        default="config.json",
-        help="File path of config file.",
-    )
-    parser.add_argument(
-        "--input-type",
-        type=str,
-        required=True,
-        choices=list(INPUT_FILE_TYPES.keys()),
-        help="Input file format.",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=Path,
-        help="The directory for storing output files.",
-    )
-    parser.add_argument(
-        "--period-type",
-        type=str,
-        required=True,
-        choices=list(PERIOD_TYPES.keys()),
-        help="Time periods",
-    )
-    args = parser.parse_args()
-
-    try:
-        filter_articles(
-            input_dir=args.input_dir,
-            glob_pattern=args.glob,
-            config_path=args.config_path,
-            input_type=args.input_type,
-            output_dir=args.output_dir / "output_filter",
-        )
-        categorize_articles(
-            input_dir=args.output_dir / "output_filter",
-            period_type=args.period_type,
-            glob_pattern="*.json",
-            output_dir=args.output_dir / "output_timestamped",
-        )
-
-        select_final_articles(
-            input_dir=args.output_dir / "output_timestamped",
-            glob_pattern="*.csv",
-            config_path=args.config_path,
-        )
-
-    except ValueError as e:
-        parser.error(str(e))
-    except Exception as e:  # pylint: disable=broad-except
-        logging.error("Error occurred in CLI: %s", str(e))
-
-
-if __name__ == "__main__":
-    cli()
diff --git a/scripts/filter_articles.sh b/scripts/filter_articles.sh
new file mode 100644
index 0000000..765f21f
--- /dev/null
+++ b/scripts/filter_articles.sh
@@ -0,0 +1 @@
+filter-articles --input-dir "../dataQuest_data/transfered_data" --output-dir "../output/" --input-type "delpher_kranten" --glob "*.gz" --config-path "config.json" --period-type "decade"
\ No newline at end of file
diff --git a/scripts/generate_output.py b/scripts/generate_output.py
deleted file mode 100644
index bb89a8b..0000000
--- a/scripts/generate_output.py
+++ /dev/null
@@ -1,191 +0,0 @@
-"""This script reads selected articles from CSV files,
-and saves their text for manual labeling"""
-import argparse
-import logging
-from pathlib import Path
-from typing import Union
-import pandas as pd
-from pandas import DataFrame
-from spacy.language import Language
-from dataQuest.settings import SPACY_MODEL
-from dataQuest.article_final_selection.process_article import ArticleProcessor
-from dataQuest.utils import read_config, get_file_name_without_extension
-from dataQuest.output_generator.text_formater import (TextFormatter,
-                                                      SEGMENTED_TEXT_FORMATTER)
-
-
-FILE_PATH_FIELD = "file_path"
-TITLE_FIELD = "title"
-ARTICLE_ID_FIELD = "article_id"
-BODY_FIELD = "body"
-LABEL_FIELD = "label"
-SELECTED_FIELD = "selected"
-DATE_FIELD = "date"
-
-OUTPUT_UNIT_KEY = "output_unit"
-SENTENCE_PER_SEGMENT_KEY = "sentences_per_segment"
-
-
-def read_article(row: pd.Series, formatter: TextFormatter) -> DataFrame:
-    """
-    Read article from row and return DataFrame of articles.
-
-    Args:
-        row (pd.Series): A row from a DataFrame.
-        formatter (TextFormatter): An object of TextFormatter to format
-        output text. Defaults to False.
-
-    Returns:
-        DataFrame: DataFrame containing article information.
-    """
-    file_path = row[FILE_PATH_FIELD]
-    article_id = row[ARTICLE_ID_FIELD]
-    article_processor = ArticleProcessor(file_path, article_id)
-    title, body, date = article_processor.read_article_from_gzip()
-
-    body_formatted = formatter.format_output(body)
-
-    dates = [date] * len(body_formatted) \
-        if ((not formatter.is_fulltext) and body_formatted is not None) \
-        else [date]
-    titles = [title] * len(body_formatted) \
-        if ((not formatter.is_fulltext) and body_formatted is not None) \
-        else [title]
-    files_path = [file_path] * len(body_formatted) \
-        if ((not formatter.is_fulltext) and body_formatted is not None) \
-        else [file_path]
-    articles_id = ([article_id] * len(body_formatted)) \
-        if (not formatter.is_fulltext) and body_formatted is not None \
-        else [article_id]
-    label = [''] * len(body_formatted) \
-        if (not formatter.is_fulltext) and body_formatted is not None \
-        else ['']
-    return pd.DataFrame({FILE_PATH_FIELD: files_path,
-                         DATE_FIELD: dates,
-                         ARTICLE_ID_FIELD: articles_id,
-                         TITLE_FIELD: titles,
-                         BODY_FIELD: body_formatted,
-                         LABEL_FIELD: label})
-
-
-def find_articles_in_file(filepath: str, formatter: TextFormatter) -> (
-        Union)[DataFrame, None]:
-    """
-    Find selected articles in a CSV file and return DataFrame of articles.
-
-    Args:
-        filepath (str): Path to the CSV file.
-        formatter (TextFormatter): An object of TextFormatter to format
-        output text.
-
-    Returns:
-        DataFrame: DataFrame containing selected articles information.
-    """
-    try:
-        df_articles = pd.read_csv(filepath)
-        df_selected = df_articles.loc[df_articles[SELECTED_FIELD] == 1]
-
-        result = pd.concat([read_article(row, formatter)
-                            for _, row in df_selected.iterrows()],
-                           axis=0, ignore_index=True)
-        return result
-    except Exception as e:  # pylint: disable=W0718
-        logging.error("Error reading selected indices in file: %s", e)
-        return None
-
-
-def generate_output(
-    input_dir: Path,
-    glob_pattern: str,
-    config_path: Path,
-    output_dir: Path,
-    spacy_model: Union[str, Language] = SPACY_MODEL,
-):
-    """
-    Core functionality to select final articles and save them to output files.
-
-    Args:
-        input_dir (Path): Directory containing input files.
-        glob_pattern (str): Glob pattern to find input files (e.g., '*.csv').
-        config_path (Path): Path to the configuration file.
-        output_dir (Path): Directory to save output files.
-        spacy_model (Union[str, Language]): SpaCy model to use for text processing.
-    """
-    if not input_dir.is_dir():
-        raise ValueError(f"Not a directory: '{str(input_dir.absolute())}'")
-
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    output_unit = read_config(config_path, OUTPUT_UNIT_KEY)
-    sentences_per_segment = '0'
-
-    if output_unit == SEGMENTED_TEXT_FORMATTER:
-        sentences_per_segment = str(read_config(config_path, SENTENCE_PER_SEGMENT_KEY))
-
-    text_formatter = TextFormatter(
-        str(output_unit),
-        int(sentences_per_segment),
-        spacy_model=spacy_model,
-    )
-
-    for articles_filepath in input_dir.rglob(glob_pattern):
-        try:
-            df = find_articles_in_file(str(articles_filepath), text_formatter)
-            if df is None:
-                continue
-
-            file_name = get_file_name_without_extension(str(articles_filepath))
-            output_file = output_dir / f"to_label_{file_name}.csv"
-            df.to_csv(output_file, index=False)
-        except Exception as e:  # pylint: disable=broad-except
-            logging.error("Error processing file %s: %s", articles_filepath, str(e))
-
-
-def cli():
-    """
-        Command-line interface for generating final output.
-    """
-    parser = argparse.ArgumentParser("Select final articles.")
-
-    parser.add_argument(
-        "--input-dir",
-        type=Path,
-        required=True,
-        help="Base directory for reading input files.",
-    )
-    parser.add_argument(
-        "--glob",
-        type=str,
-        default="*.csv",
-        help="Glob pattern for find input files; e.g. '*.csv'.",
-    )
-    parser.add_argument(
-        "--config-path",
-        type=Path,
-        default="config.json",
-        help="File path of config file.",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=Path,
-        required=True,
-        help="The directory for storing output files.",
-    )
-
-    args = parser.parse_args()
-
-    try:
-        generate_output(
-            input_dir=args.input_dir,
-            glob_pattern=args.glob,
-            config_path=args.config_path,
-            output_dir=args.output_dir
-        )
-    except ValueError as e:
-        parser.error(str(e))
-    except Exception as e:  # pylint: disable=broad-except
-        logging.error("Error occurred in CLI: %s", str(e))
-
-
-if __name__ == "__main__":
-    cli()