diff --git a/RegEx/RegEx_Remove_emojis_from_text.ipynb b/RegEx/RegEx_Remove_emojis_from_text.ipynb index 2bed7cc18d..a7d59b8e81 100644 --- a/RegEx/RegEx_Remove_emojis_from_text.ipynb +++ b/RegEx/RegEx_Remove_emojis_from_text.ipynb @@ -52,7 +52,7 @@ "tags": [] }, "source": [ - "**Last update:** 2023-12-04 (Created: 2023-12-04)" + "**Last update:** 2023-12-05 (Created: 2023-12-05)" ] }, { @@ -63,7 +63,7 @@ "tags": [] }, "source": [ - "**Description:** This notebook will show how to remove emojis from a text using RegEx and Python. It is usefull for organizations that need to clean text from emojis." + "**Description:** This notebook will show how to remove emojis from a text using RegEx and Python." ] }, { @@ -74,7 +74,9 @@ "tags": [] }, "source": [ - "**References:**\n- [Regular Expressions - Python Documentation](https://docs.python.org/3/library/re.html)\n- [Remove Emojis from Text - Stack Overflow](https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python)" + "**References:**\n", + "- [Regular Expressions - Python Documentation](https://docs.python.org/3/library/re.html)\n", + "- [Remove Emojis from Text - Stack Overflow](https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python)" ] }, { @@ -107,8 +109,10 @@ "papermill": {}, "tags": [] }, - "source": "import re", - "outputs": [] + "outputs": [], + "source": [ + "import re" + ] }, { "cell_type": "markdown", @@ -118,7 +122,8 @@ "tags": [] }, "source": [ - "### Setup variables\n- `text`: Text containing emojis" + "### Setup variables\n", + "- `text`: Text containing emojis" ] }, { @@ -129,8 +134,10 @@ "papermill": {}, "tags": [] }, - "source": "text = \"This is a text with emojis \ud83d\ude0a\ud83d\ude0a\ud83d\ude0a\"", - "outputs": [] + "outputs": [], + "source": [ + "text = \"This is a text with emojis 😊😊😊\"" + ] }, { "cell_type": "markdown", @@ -173,8 +180,34 @@ "papermill": {}, "tags": [] }, - "source": "def remove_emojis(text):\n return re.sub(r\"[^\\w\\s]\", \"\", text)", - "outputs": [] + "outputs": [], + "source": [ + "def remove_emojis(text):\n", + " # Emoji pattern\n", + " emoji_pattern = re.compile(\"[\"\n", + " u\"\\U0001F600-\\U0001F64F\" # emoticons\n", + " u\"\\U0001F300-\\U0001F5FF\" # symbols & pictographs\n", + " u\"\\U0001F680-\\U0001F6FF\" # transport & map symbols\n", + " u\"\\U0001F1E0-\\U0001F1FF\" # flags (iOS)\n", + " u\"\\U00002500-\\U00002BEF\" # chinese char\n", + " u\"\\U00002702-\\U000027B0\"\n", + " u\"\\U00002702-\\U000027B0\"\n", + " u\"\\U000024C2-\\U0001F251\"\n", + " u\"\\U0001f926-\\U0001f937\"\n", + " u\"\\U00010000-\\U0010ffff\"\n", + " u\"\\u2640-\\u2642\"\n", + " u\"\\u2600-\\u2B55\"\n", + " u\"\\u200d\"\n", + " u\"\\u23cf\"\n", + " u\"\\u23e9\"\n", + " u\"\\u231a\"\n", + " u\"\\ufe0f\" # dingbats\n", + " u\"\\u3030\"\n", + " \"]+\", flags=re.UNICODE)\n", + " # Remove emojis from the text\n", + " text = emoji_pattern.sub(r'', text)\n", + " return text.strip()" + ] }, { "cell_type": "markdown", @@ -206,8 +239,10 @@ "papermill": {}, "tags": [] }, - "source": "print(remove_emojis(text))", - "outputs": [] + "outputs": [], + "source": [ + "print(remove_emojis(text))" + ] }, { "cell_type": "markdown", @@ -249,4 +284,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/template.ipynb b/template.ipynb index 4a816365f5..63580c3239 100644 --- a/template.ipynb +++ b/template.ipynb @@ -2,20 +2,13 @@ "cells": [ { "cell_type": "markdown", - "id": "latin-packing", + "id": "88c104cc-bf08-4242-821b-b3a40908152a", "metadata": { - "execution": { - "iopub.execute_input": "2021-02-23T14:22:16.610471Z", - "iopub.status.busy": "2021-02-23T14:22:16.610129Z", - "iopub.status.idle": "2021-02-23T14:22:16.627784Z", - "shell.execute_reply": "2021-02-23T14:22:16.626866Z", - "shell.execute_reply.started": "2021-02-23T14:22:16.610384Z" - }, "papermill": {}, "tags": [] }, "source": [ - "\"Naas\"" + "\"Naas.png\"" ] }, {