diff --git a/RegEx/RegEx_Remove_emojis_from_text.ipynb b/RegEx/RegEx_Remove_emojis_from_text.ipynb
index 2bed7cc18d..a7d59b8e81 100644
--- a/RegEx/RegEx_Remove_emojis_from_text.ipynb
+++ b/RegEx/RegEx_Remove_emojis_from_text.ipynb
@@ -52,7 +52,7 @@
"tags": []
},
"source": [
- "**Last update:** 2023-12-04 (Created: 2023-12-04)"
+ "**Last update:** 2023-12-05 (Created: 2023-12-05)"
]
},
{
@@ -63,7 +63,7 @@
"tags": []
},
"source": [
- "**Description:** This notebook will show how to remove emojis from a text using RegEx and Python. It is usefull for organizations that need to clean text from emojis."
+ "**Description:** This notebook will show how to remove emojis from a text using RegEx and Python."
]
},
{
@@ -74,7 +74,9 @@
"tags": []
},
"source": [
- "**References:**\n- [Regular Expressions - Python Documentation](https://docs.python.org/3/library/re.html)\n- [Remove Emojis from Text - Stack Overflow](https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python)"
+ "**References:**\n",
+ "- [Regular Expressions - Python Documentation](https://docs.python.org/3/library/re.html)\n",
+ "- [Remove Emojis from Text - Stack Overflow](https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python)"
]
},
{
@@ -107,8 +109,10 @@
"papermill": {},
"tags": []
},
- "source": "import re",
- "outputs": []
+ "outputs": [],
+ "source": [
+ "import re"
+ ]
},
{
"cell_type": "markdown",
@@ -118,7 +122,8 @@
"tags": []
},
"source": [
- "### Setup variables\n- `text`: Text containing emojis"
+ "### Setup variables\n",
+ "- `text`: Text containing emojis"
]
},
{
@@ -129,8 +134,10 @@
"papermill": {},
"tags": []
},
- "source": "text = \"This is a text with emojis \ud83d\ude0a\ud83d\ude0a\ud83d\ude0a\"",
- "outputs": []
+ "outputs": [],
+ "source": [
+ "text = \"This is a text with emojis 😊😊😊\""
+ ]
},
{
"cell_type": "markdown",
@@ -173,8 +180,34 @@
"papermill": {},
"tags": []
},
- "source": "def remove_emojis(text):\n return re.sub(r\"[^\\w\\s]\", \"\", text)",
- "outputs": []
+ "outputs": [],
+ "source": [
+ "def remove_emojis(text):\n",
+ " # Emoji pattern\n",
+ " emoji_pattern = re.compile(\"[\"\n",
+ " u\"\\U0001F600-\\U0001F64F\" # emoticons\n",
+ " u\"\\U0001F300-\\U0001F5FF\" # symbols & pictographs\n",
+ " u\"\\U0001F680-\\U0001F6FF\" # transport & map symbols\n",
+ " u\"\\U0001F1E0-\\U0001F1FF\" # flags (iOS)\n",
+ " u\"\\U00002500-\\U00002BEF\" # chinese char\n",
+ " u\"\\U00002702-\\U000027B0\"\n",
+ " u\"\\U00002702-\\U000027B0\"\n",
+ " u\"\\U000024C2-\\U0001F251\"\n",
+ " u\"\\U0001f926-\\U0001f937\"\n",
+ " u\"\\U00010000-\\U0010ffff\"\n",
+ " u\"\\u2640-\\u2642\"\n",
+ " u\"\\u2600-\\u2B55\"\n",
+ " u\"\\u200d\"\n",
+ " u\"\\u23cf\"\n",
+ " u\"\\u23e9\"\n",
+ " u\"\\u231a\"\n",
+ " u\"\\ufe0f\" # dingbats\n",
+ " u\"\\u3030\"\n",
+ " \"]+\", flags=re.UNICODE)\n",
+ " # Remove emojis from the text\n",
+ " text = emoji_pattern.sub(r'', text)\n",
+ " return text.strip()"
+ ]
},
{
"cell_type": "markdown",
@@ -206,8 +239,10 @@
"papermill": {},
"tags": []
},
- "source": "print(remove_emojis(text))",
- "outputs": []
+ "outputs": [],
+ "source": [
+ "print(remove_emojis(text))"
+ ]
},
{
"cell_type": "markdown",
@@ -249,4 +284,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/template.ipynb b/template.ipynb
index 4a816365f5..63580c3239 100644
--- a/template.ipynb
+++ b/template.ipynb
@@ -2,20 +2,13 @@
"cells": [
{
"cell_type": "markdown",
- "id": "latin-packing",
+ "id": "88c104cc-bf08-4242-821b-b3a40908152a",
"metadata": {
- "execution": {
- "iopub.execute_input": "2021-02-23T14:22:16.610471Z",
- "iopub.status.busy": "2021-02-23T14:22:16.610129Z",
- "iopub.status.idle": "2021-02-23T14:22:16.627784Z",
- "shell.execute_reply": "2021-02-23T14:22:16.626866Z",
- "shell.execute_reply.started": "2021-02-23T14:22:16.610384Z"
- },
"papermill": {},
"tags": []
},
"source": [
- ""
+ ""
]
},
{