diff --git a/Instagram/Instagram_Get_comments_from_post.ipynb b/Instagram/Instagram_Get_comments_from_post.ipynb
new file mode 100644
index 0000000000..3c82de40ba
--- /dev/null
+++ b/Instagram/Instagram_Get_comments_from_post.ipynb
@@ -0,0 +1,506 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "rocky-cardiff",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "judicial-headline",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "# Instagram - Get comments from post\n",
+ "Give Feedback | Bug report"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1cef8cab-e783-4589-b2c4-c21ee380c773",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "**Tags:** #instagram #likes #comments #snippet #content"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "naas-author",
+ "metadata": {
+ "papermill": {},
+ "tags": [
+ "naas"
+ ]
+ },
+ "source": [
+ "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8edddd04-a2af-47f7-82aa-a9108cdcd3d4",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "**Last update:** 2024-07-10 (Created: 2024-07-10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "naas-description",
+ "metadata": {
+ "papermill": {},
+ "tags": [
+ "description"
+ ]
+ },
+ "source": [
+ "**Description:** This notebook allows users to extract comments from an Instagram post."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "88ed8bb2-2694-4848-a3ef-afc0f4e65e07",
+ "metadata": {},
+ "source": [
+ "### How to retrive API key with apify"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0fca1344-877b-417d-94f0-1f024a029523",
+ "metadata": {},
+ "source": [
+ "1. Go to https://apify.com.\n",
+ "2. Click \"Sign up for free\" and use your google account to sign up.\n",
+ "3. Once your account has been created, navigate to \"Settings\" on the left panel of the screen.\n",
+ "4. Here you will click on the tab labeled \"Integrations\" where your personal API token that was automatically generated with sign up will be.\n",
+ "5. Copy that token and use it to extract data!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "input_cell",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "## Input"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "import_cell",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "### Import libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "d40e70c0-a388-417b-a50f-c50bb82cc0b3",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-24T09:17:53.144020Z",
+ "iopub.status.busy": "2024-07-24T09:17:53.143601Z",
+ "iopub.status.idle": "2024-07-24T09:17:53.884427Z",
+ "shell.execute_reply": "2024-07-24T09:17:53.883642Z",
+ "shell.execute_reply.started": "2024-07-24T09:17:53.143947Z"
+ },
+ "papermill": {},
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "import pandas as pd\n",
+ "import json\n",
+ "import time"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5c3c12ca-5f3e-411a-aa54-c2b4b612a91d",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2022-03-17T10:12:43.371273Z",
+ "iopub.status.busy": "2022-03-17T10:12:43.371011Z",
+ "iopub.status.idle": "2022-03-17T10:12:43.374551Z",
+ "shell.execute_reply": "2022-03-17T10:12:43.373882Z",
+ "shell.execute_reply.started": "2022-03-17T10:12:43.371208Z"
+ },
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "### Setup variables\n",
+ "- `apify_token`: personal token apify creates to access data\n",
+ "- `post_url`: link to the instagram post\n",
+ "- `output_csv`: excel file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "ce903236-60d1-4087-a31e-9321f2df6112",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-24T09:23:44.112883Z",
+ "iopub.status.busy": "2024-07-24T09:23:44.112639Z",
+ "iopub.status.idle": "2024-07-24T09:23:44.115836Z",
+ "shell.execute_reply": "2024-07-24T09:23:44.115219Z",
+ "shell.execute_reply.started": "2024-07-24T09:23:44.112859Z"
+ },
+ "papermill": {},
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU84xxxxx\"\n",
+ "post_url = \"https://www.instagram.com/p/Cn0cUc7KelU/\"\n",
+ "output_csv = f\"{post_url.split('https://www.instagram.com/')[1].replace('/', '_')}instagram_post_comments.csv\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "model_cell",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "## Model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d87a82f5-cb30-4f63-84e0-01ebe1b3fc7e",
+ "metadata": {},
+ "source": [
+ "### Scrape post comments"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "aa4ffac8-3c12-483a-a312-0eb7ff17ffa1",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-24T09:17:53.890462Z",
+ "iopub.status.busy": "2024-07-24T09:17:53.890084Z",
+ "iopub.status.idle": "2024-07-24T09:18:11.543588Z",
+ "shell.execute_reply": "2024-07-24T09:18:11.542798Z",
+ "shell.execute_reply.started": "2024-07-24T09:17:53.890430Z"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Define the input for the Instagram Comment Scraper actor\n",
+ "input_data = {\n",
+ " \"directUrls\": [post_url],\n",
+ " \"resultsType\": \"comments\",\n",
+ "}\n",
+ "\n",
+ "# Make a request to start the actor\n",
+ "start_actor_url = f\"https://api.apify.com/v2/acts/apify~instagram-comment-scraper/runs?token={apify_token}\"\n",
+ "response = requests.post(start_actor_url, json=input_data)\n",
+ "run_details = response.json()\n",
+ "\n",
+ "# Extract the run ID\n",
+ "run_id = run_details['data']['id']\n",
+ "\n",
+ "# Define the URL to fetch the actor run status\n",
+ "run_status_url = f\"https://api.apify.com/v2/acts/apify~instagram-comment-scraper/runs/{run_id}?token={apify_token}\"\n",
+ "\n",
+ "# Wait for the actor to finish\n",
+ "while True:\n",
+ " status_response = requests.get(run_status_url)\n",
+ " status_data = status_response.json()\n",
+ " if status_data['data']['status'] in ['SUCCEEDED', 'FAILED', 'ABORTED']:\n",
+ " break\n",
+ " time.sleep(5) # Wait for 5 seconds before checking again\n",
+ "\n",
+ "if status_data['data']['status'] == 'SUCCEEDED':\n",
+ " # Define the URL to fetch the results\n",
+ " dataset_id = status_data['data']['defaultDatasetId']\n",
+ " dataset_url = f\"https://api.apify.com/v2/datasets/{dataset_id}/items?token={apify_token}&format=json\"\n",
+ "\n",
+ " # Fetch the comments\n",
+ " comments_response = requests.get(dataset_url)\n",
+ " comments_data = comments_response.json()\n",
+ "\n",
+ "else:\n",
+ " print(f\"Actor run did not succeed. Status: {status_data['data']['status']}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3d771a70-1245-4702-9014-324ae540d8ec",
+ "metadata": {},
+ "source": [
+ "### Dataframe structure function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "01deb84d-abd7-4975-ab77-973fe84acf0f",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-24T09:18:11.547048Z",
+ "iopub.status.busy": "2024-07-24T09:18:11.546861Z",
+ "iopub.status.idle": "2024-07-24T09:18:11.551670Z",
+ "shell.execute_reply": "2024-07-24T09:18:11.551119Z",
+ "shell.execute_reply.started": "2024-07-24T09:18:11.547027Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def get_comments(\n",
+ " cid,\n",
+ " text,\n",
+ " username,\n",
+ " profile_picture,\n",
+ " timestamp,\n",
+ " likes_count\n",
+ "):\n",
+ " return {\n",
+ " \"ID\": cid,\n",
+ " \"TEXT\": text,\n",
+ " \"USERNAME\": username,\n",
+ " \"PROFILE_PICTURE\": profile_picture,\n",
+ " \"TIMESTAMP\": timestamp,\n",
+ " \"LIKES_COUNT\": likes_count\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "output_cell",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "## Output"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "display_cell",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "### Display output"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "c7ac65a4-dd93-43c4-8090-c86a2aa28898",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-24T09:18:11.552842Z",
+ "iopub.status.busy": "2024-07-24T09:18:11.552615Z",
+ "iopub.status.idle": "2024-07-24T09:18:11.873573Z",
+ "shell.execute_reply": "2024-07-24T09:18:11.873017Z",
+ "shell.execute_reply.started": "2024-07-24T09:18:11.552814Z"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ID | \n",
+ " TEXT | \n",
+ " USERNAME | \n",
+ " PROFILE_PICTURE | \n",
+ " TIMESTAMP | \n",
+ " LIKES_COUNT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 17858772584879006 | \n",
+ " Promote it on @writing._.skill | \n",
+ " aditya__.7443 | \n",
+ " https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... | \n",
+ " 2023-01-25T01:20:12.000Z | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 17842757270932646 | \n",
+ " Promote at @Thewriters_heaven | \n",
+ " skylarsrwriter | \n",
+ " https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... | \n",
+ " 2023-01-25T01:22:15.000Z | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 17945160482350602 | \n",
+ " @Its_chetram_4444 | \n",
+ " skylarsrwriter | \n",
+ " https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... | \n",
+ " 2023-01-25T01:22:22.000Z | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 18007951324553277 | \n",
+ " Promote at @TheAuthors.World 💫 | \n",
+ " author__mack16 | \n",
+ " https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... | \n",
+ " 2023-01-25T01:24:59.000Z | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ID TEXT USERNAME \\\n",
+ "0 17858772584879006 Promote it on @writing._.skill aditya__.7443 \n",
+ "1 17842757270932646 Promote at @Thewriters_heaven skylarsrwriter \n",
+ "2 17945160482350602 @Its_chetram_4444 skylarsrwriter \n",
+ "3 18007951324553277 Promote at @TheAuthors.World 💫 author__mack16 \n",
+ "\n",
+ " PROFILE_PICTURE \\\n",
+ "0 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n",
+ "1 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n",
+ "2 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n",
+ "3 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n",
+ "\n",
+ " TIMESTAMP LIKES_COUNT \n",
+ "0 2023-01-25T01:20:12.000Z 0 \n",
+ "1 2023-01-25T01:22:15.000Z 0 \n",
+ "2 2023-01-25T01:22:22.000Z 0 \n",
+ "3 2023-01-25T01:24:59.000Z 0 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = []\n",
+ "\n",
+ "for comment in comments_data:\n",
+ " data_comment = get_comments(\n",
+ " comment[\"id\"],\n",
+ " comment[\"text\"],\n",
+ " comment[\"ownerUsername\"],\n",
+ " comment[\"ownerProfilePicUrl\"],\n",
+ " comment[\"timestamp\"],\n",
+ " comment[\"likesCount\"]\n",
+ " )\n",
+ " data.append(data_comment)\n",
+ " \n",
+ "df = pd.DataFrame(data)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "636d22d3-beac-4de3-9ea8-6232e9cdcc6c",
+ "metadata": {},
+ "source": [
+ "### Save dataframe to csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "be26e796-6c38-4152-a0b8-49f51b617a6f",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-24T09:18:11.875551Z",
+ "iopub.status.busy": "2024-07-24T09:18:11.875052Z",
+ "iopub.status.idle": "2024-07-24T09:18:12.151858Z",
+ "shell.execute_reply": "2024-07-24T09:18:12.151261Z",
+ "shell.execute_reply.started": "2024-07-24T09:18:11.875517Z"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df.to_csv(output_csv, index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ },
+ "naas": {
+ "notebook_id": "38c44121d518d242dcfd1209fca1b300a11475f5836b8ae8f214c0b4524816a9",
+ "notebook_path": "Instagram/Instagram_Post_image_and_caption.ipynb"
+ },
+ "papermill": {
+ "default_parameters": {},
+ "environment_variables": {},
+ "parameters": {},
+ "version": "2.3.3"
+ },
+ "toc-autonumbering": false
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Twitter/Twitter_Get_tweets_by_username.ipynb b/Twitter/Twitter_Get_tweets_by_username.ipynb
new file mode 100644
index 0000000000..d36bca2a16
--- /dev/null
+++ b/Twitter/Twitter_Get_tweets_by_username.ipynb
@@ -0,0 +1,873 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "naas-logo",
+ "metadata": {
+ "papermill": {},
+ "tags": [
+ "naas"
+ ]
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "22b3e6a7-eaf5-49ab-93f6-d5b5b0ac4bb7",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "# Twitter - Get tweets by username\n",
+ "Give Feedback | Bug report"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32c26cef-6a0b-4e42-b821-24731046d65c",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "**Tags:** #twitter #username #snippet #content #dataframe"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "naas-author",
+ "metadata": {
+ "papermill": {},
+ "tags": [
+ "naas"
+ ]
+ },
+ "source": [
+ "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f9207fc0-4e6f-4722-b48f-88e8f5fff2ea",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "**Last update:** 2024-07-22 (Created: 2024-07-17)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "naas-description",
+ "metadata": {
+ "papermill": {},
+ "tags": [
+ "description"
+ ]
+ },
+ "source": [
+ "**Description:** This notebook allows users to get all tweets made by inputed twitter username."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "06f7ea29-5966-46c1-8472-2cf7042003be",
+ "metadata": {},
+ "source": [
+ "### How to retrive API key with apify"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9ca4c18d-156e-463d-a3ab-d19d06ea1915",
+ "metadata": {},
+ "source": [
+ "1. Go to https://apify.com.\n",
+ "2. Click \"Sign up for free\" and use your google account to sign up.\n",
+ "3. Once your account has been created, navigate to \"Settings\" on the left panel of the screen.\n",
+ "4. Here you will click on the tab labeled \"Integrations\" where your personal API token that was automatically generated with sign up will be.\n",
+ "5. Copy that token and use it to extract data!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7331465c-f134-4411-b2c2-0b0c3e0a688c",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "## Input"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "import_cell",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "### Import libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "6e4a6247-513e-42de-af1c-70b9a87bdcff",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-22T13:48:06.202830Z",
+ "iopub.status.busy": "2024-07-22T13:48:06.202562Z",
+ "iopub.status.idle": "2024-07-22T13:48:08.377143Z",
+ "shell.execute_reply": "2024-07-22T13:48:08.376550Z",
+ "shell.execute_reply.started": "2024-07-22T13:48:06.202760Z"
+ },
+ "papermill": {},
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "import time\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "60f6074a-9b9a-4869-b34a-b5ecc1857d79",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "### Setup variables"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bce3b3b0-d296-4ac6-bc69-be0f94ece1eb",
+ "metadata": {},
+ "source": [
+ "- `APIFY_API_TOKEN`: personal token apify creates to access data\n",
+ "- `TWITTER_URL`: link to the twitter account\n",
+ "- `OUTPUT_CSV`: excel file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "45564994-08dc-4551-9da3-10c5a755ec74",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-22T13:48:09.433374Z",
+ "iopub.status.busy": "2024-07-22T13:48:09.433151Z",
+ "iopub.status.idle": "2024-07-22T13:48:09.436489Z",
+ "shell.execute_reply": "2024-07-22T13:48:09.435724Z",
+ "shell.execute_reply.started": "2024-07-22T13:48:09.433352Z"
+ },
+ "papermill": {},
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "APIFY_API_TOKEN = 'apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU84xxxxx'\n",
+ "TWITTER_URL = 'https://twitter.com/Spotify'\n",
+ "OUTPUT_CSV = f\"{TWITTER_URL.split('https://twitter.com/')[1].replace('/', '_')}_tweets.csv\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "663c610e-1558-479a-9143-300fcdfc9af6",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "## Model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fa36a148-2182-4f28-9420-13fe49623392",
+ "metadata": {},
+ "source": [
+ "### Setup apify actor"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "25193cda-fb05-4477-9c13-7c9d0a600102",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-18T12:08:47.840248Z",
+ "iopub.status.busy": "2024-07-18T12:08:47.840018Z",
+ "iopub.status.idle": "2024-07-18T12:08:48.637761Z",
+ "shell.execute_reply": "2024-07-18T12:08:48.636664Z",
+ "shell.execute_reply.started": "2024-07-18T12:08:47.840218Z"
+ },
+ "papermill": {},
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Prepare the actor input\n",
+ "actor_input = {\n",
+ " \"startUrls\": [{\"url\": TWITTER_URL}],\n",
+ " \"resultsLimit\": 100, # Adjust the limit as needed\n",
+ " \"mode\": \"own\" # Options: \"own\", \"replies\", \"both\"\n",
+ "}\n",
+ "\n",
+ "# Run the actor\n",
+ "run_actor_url = f'https://api.apify.com/v2/acts/quacker~twitter-scraper/runs?token={APIFY_API_TOKEN}'\n",
+ "run_response = requests.post(run_actor_url, json=actor_input)\n",
+ "run_response.raise_for_status() # Raise an error if the request failed\n",
+ "run = run_response.json()\n",
+ "run_id = run['data']['id']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "27aa7c81-ebed-44b8-b811-7dac6100c285",
+ "metadata": {},
+ "source": [
+ "### Get tweets data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "bf443879-ea6d-4078-8e1b-e628cf465dab",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-18T12:08:48.639234Z",
+ "iopub.status.busy": "2024-07-18T12:08:48.638993Z",
+ "iopub.status.idle": "2024-07-18T12:10:12.522278Z",
+ "shell.execute_reply": "2024-07-18T12:10:12.521647Z",
+ "shell.execute_reply.started": "2024-07-18T12:08:48.639203Z"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "while True:\n",
+ " run_details_url = f'https://api.apify.com/v2/actor-runs/{run_id}?token={APIFY_API_TOKEN}'\n",
+ " run_details_response = requests.get(run_details_url)\n",
+ " run_details_response.raise_for_status()\n",
+ " run_details = run_details_response.json()\n",
+ " if run_details['data']['status'] in ['SUCCEEDED', 'FAILED', 'ABORTED']:\n",
+ " break\n",
+ " time.sleep(10) # Wait for 10 seconds before checking the status again\n",
+ "\n",
+ "# Fetch the results\n",
+ "dataset_id = run_details['data']['defaultDatasetId']\n",
+ "dataset_items_url = f'https://api.apify.com/v2/datasets/{dataset_id}/items?token={APIFY_API_TOKEN}'\n",
+ "dataset_items_response = requests.get(dataset_items_url)\n",
+ "dataset_items_response.raise_for_status()\n",
+ "dataset_items = dataset_items_response.json()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "38f2f926-f52c-4b48-bec1-f8fc028a6dbf",
+ "metadata": {},
+ "source": [
+ "### Dataframe structure function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "a0f65554-5ca0-4f9c-bbd8-0772e9a8efd1",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-18T12:10:23.038935Z",
+ "iopub.status.busy": "2024-07-18T12:10:23.038692Z",
+ "iopub.status.idle": "2024-07-18T12:10:23.042967Z",
+ "shell.execute_reply": "2024-07-18T12:10:23.042236Z",
+ "shell.execute_reply.started": "2024-07-18T12:10:23.038911Z"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "def get_tweets(\n",
+ " tid,\n",
+ " text,\n",
+ " reply_count,\n",
+ " retweet_count,\n",
+ " favorites_count,\n",
+ " hashtags,\n",
+ " user_mentions,\n",
+ " url,\n",
+ " created_at,\n",
+ " view_count\n",
+ "):\n",
+ " return {\n",
+ " \"ID\": tid,\n",
+ " \"TEXT\": text,\n",
+ " \"REPLY_COUNT\": reply_count,\n",
+ " \"RETWEET_COUNT\": retweet_count,\n",
+ " \"FAVORITES_COUNT\": favorites_count,\n",
+ " \"HASHTAGS\": hashtags,\n",
+ " \"USER_MENTIONS\": user_mentions,\n",
+ " \"URL\": url,\n",
+ " \"CREATED_AT\": created_at,\n",
+ " \"VIEW_COUNT\": view_count\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f7d86d62-9cdd-4737-a5de-2da8bc191e30",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "## Output"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f6fe9aff-4c2d-4e2a-b862-006106354bb5",
+ "metadata": {
+ "papermill": {},
+ "tags": []
+ },
+ "source": [
+ "### Display"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "5f00ab06-24f8-4334-a14e-ba53a3773141",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-18T12:11:05.010264Z",
+ "iopub.status.busy": "2024-07-18T12:11:05.010026Z",
+ "iopub.status.idle": "2024-07-18T12:11:05.043314Z",
+ "shell.execute_reply": "2024-07-18T12:11:05.042743Z",
+ "shell.execute_reply.started": "2024-07-18T12:11:05.010240Z"
+ },
+ "papermill": {},
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ID | \n",
+ " TEXT | \n",
+ " REPLY_COUNT | \n",
+ " RETWEET_COUNT | \n",
+ " FAVORITES_COUNT | \n",
+ " HASHTAGS | \n",
+ " USER_MENTIONS | \n",
+ " URL | \n",
+ " CREATED_AT | \n",
+ " VIEW_COUNT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1813743054395998316 | \n",
+ " 🚨New Childish Gambino on Friday. Check out the... | \n",
+ " 17 | \n",
+ " 47 | \n",
+ " 304 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18137430543... | \n",
+ " 2024-07-18T01:10:16.000Z | \n",
+ " 67334 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1813740475398164714 | \n",
+ " Please don’t ask “what’s new?” Just listen to ... | \n",
+ " 30 | \n",
+ " 47 | \n",
+ " 274 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18137404753... | \n",
+ " 2024-07-18T01:00:01.000Z | \n",
+ " 57701 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1813690603894677650 | \n",
+ " we've been blessed once again | \n",
+ " 31 | \n",
+ " 140 | \n",
+ " 628 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18136906038... | \n",
+ " 2024-07-17T21:41:51.000Z | \n",
+ " 112142 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1813660598695268471 | \n",
+ " Spanish DJ is available for Premium users only... | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18136605986... | \n",
+ " 2024-07-17T19:42:37.000Z | \n",
+ " 44484 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1813651095455162566 | \n",
+ " DJ is now available to use in Spanish. Search ... | \n",
+ " 24 | \n",
+ " 13 | \n",
+ " 136 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18136510954... | \n",
+ " 2024-07-17T19:04:51.000Z | \n",
+ " 108677 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 1813636045529092378 | \n",
+ " Munchkins, get in formation. Y2K! arrives July... | \n",
+ " 11 | \n",
+ " 15 | \n",
+ " 108 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18136360455... | \n",
+ " 2024-07-17T18:05:03.000Z | \n",
+ " 57946 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 1813287485922853263 | \n",
+ " Crush’s Spotify follower count just went up by... | \n",
+ " 47 | \n",
+ " 44 | \n",
+ " 380 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18132874859... | \n",
+ " 2024-07-16T19:00:00.000Z | \n",
+ " 109026 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 1813260012132966871 | \n",
+ " https://t.co/mfcYjTY39D | \n",
+ " 10 | \n",
+ " 14 | \n",
+ " 179 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18132600121... | \n",
+ " 2024-07-16T17:10:50.000Z | \n",
+ " 75601 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 1813259964745719939 | \n",
+ " It’s the best time of the week (when the top c... | \n",
+ " 197 | \n",
+ " 1554 | \n",
+ " 4949 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18132599647... | \n",
+ " 2024-07-16T17:10:38.000Z | \n",
+ " 268949 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 1813246671054381297 | \n",
+ " Now Presenting The Gold Standard, an exhibit i... | \n",
+ " 60 | \n",
+ " 74 | \n",
+ " 363 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18132466710... | \n",
+ " 2024-07-16T16:17:49.000Z | \n",
+ " 124731 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 1813196890344731043 | \n",
+ " You’re building the ultimate summer playlist w... | \n",
+ " 987 | \n",
+ " 170 | \n",
+ " 1336 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18131968903... | \n",
+ " 2024-07-16T13:00:00.000Z | \n",
+ " 446499 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 1812945278628209138 | \n",
+ " Which summer mix matches your current mood? ☀️... | \n",
+ " 52 | \n",
+ " 19 | \n",
+ " 192 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18129452786... | \n",
+ " 2024-07-15T20:20:11.000Z | \n",
+ " 105070 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 1812897635453530347 | \n",
+ " Spotify Presents The Gold Standard Exhibition,... | \n",
+ " 15 | \n",
+ " 13 | \n",
+ " 137 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18128976354... | \n",
+ " 2024-07-15T17:10:52.000Z | \n",
+ " 132064 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 1812894069804183850 | \n",
+ " Your last played artist is your new best frien... | \n",
+ " 1515 | \n",
+ " 130 | \n",
+ " 2166 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18128940698... | \n",
+ " 2024-07-15T16:56:42.000Z | \n",
+ " 280762 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 1812603287210020926 | \n",
+ " 🏆 | \n",
+ " 24 | \n",
+ " 75 | \n",
+ " 726 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18126032872... | \n",
+ " 2024-07-14T21:41:14.000Z | \n",
+ " 213146 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 1811943008600023174 | \n",
+ " Favorite song to listen to while you skate on ... | \n",
+ " 199 | \n",
+ " 66 | \n",
+ " 619 | \n",
+ " [SkateNoise] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18119430086... | \n",
+ " 2024-07-13T01:57:32.000Z | \n",
+ " 149877 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 1811883240661352956 | \n",
+ " Screenshot the playlist title that only makes ... | \n",
+ " 579 | \n",
+ " 104 | \n",
+ " 1321 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18118832406... | \n",
+ " 2024-07-12T22:00:02.000Z | \n",
+ " 247966 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 1811871617678778458 | \n",
+ " Moses Sumney takes us back to 1993 in the late... | \n",
+ " 13 | \n",
+ " 18 | \n",
+ " 185 | \n",
+ " [] | \n",
+ " [] | \n",
+ " https://twitter.com/Spotify/status/18118716176... | \n",
+ " 2024-07-12T21:13:51.000Z | \n",
+ " 99306 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 1811838698197254366 | \n",
+ " COUNTER//CULTURE - Vol 15 🤝 @CageTheElephant h... | \n",
+ " 15 | \n",
+ " 14 | \n",
+ " 187 | \n",
+ " [] | \n",
+ " [{'id_str': '19341413', 'name': 'Cage The Elep... | \n",
+ " https://twitter.com/Spotify/status/18118386981... | \n",
+ " 2024-07-12T19:03:02.000Z | \n",
+ " 83954 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 1811837762028617876 | \n",
+ " RT @SpotifyUSA: Welcome to Artificial Paradise... | \n",
+ " 0 | \n",
+ " 60 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [{'id_str': '213687893', 'name': 'Spotify USA'... | \n",
+ " https://twitter.com/Spotify/status/18118377620... | \n",
+ " 2024-07-12T18:59:19.000Z | \n",
+ " 93429 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ID TEXT \\\n",
+ "0 1813743054395998316 🚨New Childish Gambino on Friday. Check out the... \n",
+ "1 1813740475398164714 Please don’t ask “what’s new?” Just listen to ... \n",
+ "2 1813690603894677650 we've been blessed once again \n",
+ "3 1813660598695268471 Spanish DJ is available for Premium users only... \n",
+ "4 1813651095455162566 DJ is now available to use in Spanish. Search ... \n",
+ "5 1813636045529092378 Munchkins, get in formation. Y2K! arrives July... \n",
+ "6 1813287485922853263 Crush’s Spotify follower count just went up by... \n",
+ "7 1813260012132966871 https://t.co/mfcYjTY39D \n",
+ "8 1813259964745719939 It’s the best time of the week (when the top c... \n",
+ "9 1813246671054381297 Now Presenting The Gold Standard, an exhibit i... \n",
+ "10 1813196890344731043 You’re building the ultimate summer playlist w... \n",
+ "11 1812945278628209138 Which summer mix matches your current mood? ☀️... \n",
+ "12 1812897635453530347 Spotify Presents The Gold Standard Exhibition,... \n",
+ "13 1812894069804183850 Your last played artist is your new best frien... \n",
+ "14 1812603287210020926 🏆 \n",
+ "15 1811943008600023174 Favorite song to listen to while you skate on ... \n",
+ "16 1811883240661352956 Screenshot the playlist title that only makes ... \n",
+ "17 1811871617678778458 Moses Sumney takes us back to 1993 in the late... \n",
+ "18 1811838698197254366 COUNTER//CULTURE - Vol 15 🤝 @CageTheElephant h... \n",
+ "19 1811837762028617876 RT @SpotifyUSA: Welcome to Artificial Paradise... \n",
+ "\n",
+ " REPLY_COUNT RETWEET_COUNT FAVORITES_COUNT HASHTAGS \\\n",
+ "0 17 47 304 [] \n",
+ "1 30 47 274 [] \n",
+ "2 31 140 628 [] \n",
+ "3 4 4 64 [] \n",
+ "4 24 13 136 [] \n",
+ "5 11 15 108 [] \n",
+ "6 47 44 380 [] \n",
+ "7 10 14 179 [] \n",
+ "8 197 1554 4949 [] \n",
+ "9 60 74 363 [] \n",
+ "10 987 170 1336 [] \n",
+ "11 52 19 192 [] \n",
+ "12 15 13 137 [] \n",
+ "13 1515 130 2166 [] \n",
+ "14 24 75 726 [] \n",
+ "15 199 66 619 [SkateNoise] \n",
+ "16 579 104 1321 [] \n",
+ "17 13 18 185 [] \n",
+ "18 15 14 187 [] \n",
+ "19 0 60 0 [] \n",
+ "\n",
+ " USER_MENTIONS \\\n",
+ "0 [] \n",
+ "1 [] \n",
+ "2 [] \n",
+ "3 [] \n",
+ "4 [] \n",
+ "5 [] \n",
+ "6 [] \n",
+ "7 [] \n",
+ "8 [] \n",
+ "9 [] \n",
+ "10 [] \n",
+ "11 [] \n",
+ "12 [] \n",
+ "13 [] \n",
+ "14 [] \n",
+ "15 [] \n",
+ "16 [] \n",
+ "17 [] \n",
+ "18 [{'id_str': '19341413', 'name': 'Cage The Elep... \n",
+ "19 [{'id_str': '213687893', 'name': 'Spotify USA'... \n",
+ "\n",
+ " URL \\\n",
+ "0 https://twitter.com/Spotify/status/18137430543... \n",
+ "1 https://twitter.com/Spotify/status/18137404753... \n",
+ "2 https://twitter.com/Spotify/status/18136906038... \n",
+ "3 https://twitter.com/Spotify/status/18136605986... \n",
+ "4 https://twitter.com/Spotify/status/18136510954... \n",
+ "5 https://twitter.com/Spotify/status/18136360455... \n",
+ "6 https://twitter.com/Spotify/status/18132874859... \n",
+ "7 https://twitter.com/Spotify/status/18132600121... \n",
+ "8 https://twitter.com/Spotify/status/18132599647... \n",
+ "9 https://twitter.com/Spotify/status/18132466710... \n",
+ "10 https://twitter.com/Spotify/status/18131968903... \n",
+ "11 https://twitter.com/Spotify/status/18129452786... \n",
+ "12 https://twitter.com/Spotify/status/18128976354... \n",
+ "13 https://twitter.com/Spotify/status/18128940698... \n",
+ "14 https://twitter.com/Spotify/status/18126032872... \n",
+ "15 https://twitter.com/Spotify/status/18119430086... \n",
+ "16 https://twitter.com/Spotify/status/18118832406... \n",
+ "17 https://twitter.com/Spotify/status/18118716176... \n",
+ "18 https://twitter.com/Spotify/status/18118386981... \n",
+ "19 https://twitter.com/Spotify/status/18118377620... \n",
+ "\n",
+ " CREATED_AT VIEW_COUNT \n",
+ "0 2024-07-18T01:10:16.000Z 67334 \n",
+ "1 2024-07-18T01:00:01.000Z 57701 \n",
+ "2 2024-07-17T21:41:51.000Z 112142 \n",
+ "3 2024-07-17T19:42:37.000Z 44484 \n",
+ "4 2024-07-17T19:04:51.000Z 108677 \n",
+ "5 2024-07-17T18:05:03.000Z 57946 \n",
+ "6 2024-07-16T19:00:00.000Z 109026 \n",
+ "7 2024-07-16T17:10:50.000Z 75601 \n",
+ "8 2024-07-16T17:10:38.000Z 268949 \n",
+ "9 2024-07-16T16:17:49.000Z 124731 \n",
+ "10 2024-07-16T13:00:00.000Z 446499 \n",
+ "11 2024-07-15T20:20:11.000Z 105070 \n",
+ "12 2024-07-15T17:10:52.000Z 132064 \n",
+ "13 2024-07-15T16:56:42.000Z 280762 \n",
+ "14 2024-07-14T21:41:14.000Z 213146 \n",
+ "15 2024-07-13T01:57:32.000Z 149877 \n",
+ "16 2024-07-12T22:00:02.000Z 247966 \n",
+ "17 2024-07-12T21:13:51.000Z 99306 \n",
+ "18 2024-07-12T19:03:02.000Z 83954 \n",
+ "19 2024-07-12T18:59:19.000Z 93429 "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = []\n",
+ "\n",
+ "for tweet in dataset_items:\n",
+ " data_tweet = get_tweets(\n",
+ " tweet[\"id\"],\n",
+ " tweet[\"full_text\"],\n",
+ " tweet[\"reply_count\"],\n",
+ " tweet[\"retweet_count\"],\n",
+ " tweet[\"favorite_count\"],\n",
+ " tweet[\"hashtags\"],\n",
+ " tweet[\"user_mentions\"],\n",
+ " tweet[\"url\"],\n",
+ " tweet[\"created_at\"],\n",
+ " tweet[\"view_count\"]\n",
+ " )\n",
+ " data.append(data_tweet)\n",
+ " \n",
+ "df = pd.DataFrame(data)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a4a53832-0571-4cbb-980f-9fcb32309a56",
+ "metadata": {},
+ "source": [
+ "### Save to csv file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "50e4be8c-6e69-492e-bf62-2174f1ad0111",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-07-18T12:11:53.635934Z",
+ "iopub.status.busy": "2024-07-18T12:11:53.635684Z",
+ "iopub.status.idle": "2024-07-18T12:11:53.665887Z",
+ "shell.execute_reply": "2024-07-18T12:11:53.665267Z",
+ "shell.execute_reply.started": "2024-07-18T12:11:53.635909Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "df.to_csv(OUTPUT_CSV, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "38751e48-3b9f-487a-bbd2-7f114d0457f8",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ },
+ "naas": {
+ "notebook_id": "d50dc5991bf4fe05f69298b0b37857587e63f47d21cab0b07fbf0c4f6d4bf0b7",
+ "notebook_path": "Twitter/Twitter_Get_tweets_from_search.ipynb"
+ },
+ "papermill": {
+ "default_parameters": {},
+ "environment_variables": {},
+ "parameters": {},
+ "version": "2.3.3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "state": {},
+ "version_major": 2,
+ "version_minor": 0
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}