From b5a602ce3dcb5a5776cb5a7d92dd375a496f9384 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9a=20Buend=C3=A9?=
<46668755+leabuende@users.noreply.github.com>
Date: Thu, 19 Oct 2023 14:34:51 +0200
Subject: [PATCH] feat: initializing qa on sql databases with langchain
notebook
---
...angChain_Perform_QA_on_SQL_Databases.ipynb | 382 ++++++++++++++++++
1 file changed, 382 insertions(+)
create mode 100644 LangChain/LangChain_Perform_QA_on_SQL_Databases.ipynb
diff --git a/LangChain/LangChain_Perform_QA_on_SQL_Databases.ipynb b/LangChain/LangChain_Perform_QA_on_SQL_Databases.ipynb
new file mode 100644
index 0000000000..f667f4888c
--- /dev/null
+++ b/LangChain/LangChain_Perform_QA_on_SQL_Databases.ipynb
@@ -0,0 +1,382 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "ef942509-6981-40b7-894d-4f6c33607de2",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ca4c2cd5-6acc-49c1-9336-72eb54c5ec83",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "# Langchain - QA on SQL Database"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "df7190c7-9f84-4ef9-afed-cbeec9189bdf",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "**Tags**: #langchain #toolkit #database #qa #python #sql"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cda2f1ea-c2df-45c5-9329-d832077dadfb",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "**Author:** [Léa Buendé](https://www.linkedin.com/in/leabuende)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6b607685-d026-4f09-8724-013227fcf2c5",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "**Last update:** 2023-10-19 (Created: 2023-10-08) "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09056bc2-135e-46e8-a422-4ef3dfabf41e",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "**Description:** This notebook demonstrates how to perform queries on a SQL Database, as well as how to generate SQL queries from questions or text."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a250dcaf-66c0-40a0-a7f5-c2ff8d50a4c6",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "**References:** \n",
+ "- [Langchain - SQL](https://python.langchain.com/docs/use_cases/qa_structured/sql)\n",
+ "- [Langsmith - Text-to-SQL](https://smith.langchain.com/hub/rlm/text-to-sql)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "70cb3b03-1af8-47b0-a299-2c798e4e0452",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "## Input"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "90208573-1f14-4e2b-8900-e304035a2570",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "### Import libraries\n",
+ "Libraries to be used within this notebook.
\n",
+ "\n",
+ "Note: You may need to restart the kernel to use updated packages."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "de70f076-2e23-4d17-9e65-d7812366aad5",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "try:\n",
+ " import langchain\n",
+ " import langchain_experimental\n",
+ " import openai\n",
+ "except: \n",
+ " !pip install langchain langchain-experimental openai --user\n",
+ "\n",
+ "from langchain.utilities import SQLDatabase\n",
+ "from langchain.llms import OpenAI\n",
+ "import naas"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0e4b4668-1cfa-4aa8-b453-2c9f863b6f29",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "### Setup variables\n",
+ "\n",
+ "- `openai_api_key`: [Get your API key here](https://openai.com/docs/api-overview/).\n",
+ "- `temp`: Default value is 0 but preferred to have 0.7. You can change this value according to your requirements\n",
+ "- `database_uri`: URI of your database. We will create one for the purpose of this notebook."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "54158c1d-3ddf-45ed-8989-9f1fd2240b6f",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "openai_api_key = naas.secret.get(\"OPENAI_API_KEY\") or \"Paste-your-key-here\"\n",
+ "temp = 0\n",
+ "sql_uri = \"sqlite:///Chinook.db\" # Replace with the URI of your database"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2f7ab5dc",
+ "metadata": {},
+ "source": [
+ "#### Creating a mock database (Optional)\n",
+ "The Chinook database is a sample database for you to run quick tests, such as trying out this notebook. It represents a digital media store, including tables for artists, albums, media tracks, invoices and customers.\n",
+ "\n",
+ "To install the Chinook database :\n",
+ "- Download the [Chinook.sql file](https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sql) in the same directory as this notebook, and save it as Chinook_Sqlite.sql\n",
+ "- In your terminal, run ```sqlite3 Chinook.db``` to connect to SQLite\n",
+ "- Then, run ```.read Chinook_Sqlite.sql``` to execute the script from the file\n",
+ "\n",
+ "Your database is now initialized."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3ba70fc1",
+ "metadata": {},
+ "source": [
+ "## *Text to SQL query*"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9b1db6dd",
+ "metadata": {},
+ "source": [
+ "## Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "86ba23e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.chat_models import ChatOpenAI\n",
+ "from langchain.chains import create_sql_query_chain"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4dbca1bc",
+ "metadata": {},
+ "source": [
+ "#### Build an SQL query using Create_sql_query_chain"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f66d1a9b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "db = SQLDatabase.from_uri(sql_uri) \n",
+ "chain = create_sql_query_chain(ChatOpenAI(temperature=temp, openai_api_key=openai_api_key), db)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4443a2ae",
+ "metadata": {},
+ "source": [
+ "## Output"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e1eecee9",
+ "metadata": {},
+ "source": [
+ "#### Display result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c3e94abb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "response = chain.invoke({\"question\":\"How many songs are in the Grunge playlist ?\"})\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ebfef2e5",
+ "metadata": {},
+ "source": [
+ "You can test out the SQL query by running :"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cfb1d20e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "db.run(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a4923704",
+ "metadata": {},
+ "source": [
+ "## *Text to SQL query and execution*"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4abf9e57",
+ "metadata": {},
+ "source": [
+ "## Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "df536f67",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_experimental.sql import SQLDatabaseChain"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3caef398",
+ "metadata": {},
+ "source": [
+ "#### Build and run an SQL query using SQLDatabaseChain "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "9cc14e45",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "db = SQLDatabase.from_uri(sql_uri) \n",
+ "llm = OpenAI(temperature=temp, verbose=True, openai_api_key=openai_api_key)\n",
+ "db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20a4e159",
+ "metadata": {},
+ "source": [
+ "## Output"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9ea4deda",
+ "metadata": {},
+ "source": [
+ "#### Display result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "eb795f63",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "db_chain.run(\"Who is the artist with the most albums ?\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.11.3 64-bit",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.3"
+ },
+ "naas": {
+ "notebook_id": "658a8244b01dfe532c4faaf8423ce808aca1020f8afdf491d99810b8e1cb4ba1",
+ "notebook_path": "LangChain/LangChain_CSV_Agent.ipynb"
+ },
+ "papermill": {
+ "default_parameters": {},
+ "environment_variables": {},
+ "parameters": {},
+ "version": "2.4.0"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+ }
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "state": {},
+ "version_major": 2,
+ "version_minor": 0
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}