diff --git a/Instagram/Instagram_Get_comments_from_post.ipynb b/Instagram/Instagram_Get_comments_from_post.ipynb new file mode 100644 index 0000000000..3c82de40ba --- /dev/null +++ b/Instagram/Instagram_Get_comments_from_post.ipynb @@ -0,0 +1,506 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "rocky-cardiff", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"Instagram.png\"" + ] + }, + { + "cell_type": "markdown", + "id": "judicial-headline", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Instagram - Get comments from post\n", + "Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "1cef8cab-e783-4589-b2c4-c21ee380c773", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #instagram #likes #comments #snippet #content" + ] + }, + { + "cell_type": "markdown", + "id": "naas-author", + "metadata": { + "papermill": {}, + "tags": [ + "naas" + ] + }, + "source": [ + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" + ] + }, + { + "cell_type": "markdown", + "id": "8edddd04-a2af-47f7-82aa-a9108cdcd3d4", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-07-10 (Created: 2024-07-10)" + ] + }, + { + "cell_type": "markdown", + "id": "naas-description", + "metadata": { + "papermill": {}, + "tags": [ + "description" + ] + }, + "source": [ + "**Description:** This notebook allows users to extract comments from an Instagram post." + ] + }, + { + "cell_type": "markdown", + "id": "88ed8bb2-2694-4848-a3ef-afc0f4e65e07", + "metadata": {}, + "source": [ + "### How to retrive API key with apify" + ] + }, + { + "cell_type": "markdown", + "id": "0fca1344-877b-417d-94f0-1f024a029523", + "metadata": {}, + "source": [ + "1. Go to https://apify.com.\n", + "2. Click \"Sign up for free\" and use your google account to sign up.\n", + "3. Once your account has been created, navigate to \"Settings\" on the left panel of the screen.\n", + "4. Here you will click on the tab labeled \"Integrations\" where your personal API token that was automatically generated with sign up will be.\n", + "5. Copy that token and use it to extract data!" + ] + }, + { + "cell_type": "markdown", + "id": "input_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "import_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d40e70c0-a388-417b-a50f-c50bb82cc0b3", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:17:53.144020Z", + "iopub.status.busy": "2024-07-24T09:17:53.143601Z", + "iopub.status.idle": "2024-07-24T09:17:53.884427Z", + "shell.execute_reply": "2024-07-24T09:17:53.883642Z", + "shell.execute_reply.started": "2024-07-24T09:17:53.143947Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd\n", + "import json\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "id": "5c3c12ca-5f3e-411a-aa54-c2b4b612a91d", + "metadata": { + "execution": { + "iopub.execute_input": "2022-03-17T10:12:43.371273Z", + "iopub.status.busy": "2022-03-17T10:12:43.371011Z", + "iopub.status.idle": "2022-03-17T10:12:43.374551Z", + "shell.execute_reply": "2022-03-17T10:12:43.373882Z", + "shell.execute_reply.started": "2022-03-17T10:12:43.371208Z" + }, + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables\n", + "- `apify_token`: personal token apify creates to access data\n", + "- `post_url`: link to the instagram post\n", + "- `output_csv`: excel file" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ce903236-60d1-4087-a31e-9321f2df6112", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:23:44.112883Z", + "iopub.status.busy": "2024-07-24T09:23:44.112639Z", + "iopub.status.idle": "2024-07-24T09:23:44.115836Z", + "shell.execute_reply": "2024-07-24T09:23:44.115219Z", + "shell.execute_reply.started": "2024-07-24T09:23:44.112859Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU84xxxxx\"\n", + "post_url = \"https://www.instagram.com/p/Cn0cUc7KelU/\"\n", + "output_csv = f\"{post_url.split('https://www.instagram.com/')[1].replace('/', '_')}instagram_post_comments.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "model_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "d87a82f5-cb30-4f63-84e0-01ebe1b3fc7e", + "metadata": {}, + "source": [ + "### Scrape post comments" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "aa4ffac8-3c12-483a-a312-0eb7ff17ffa1", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:17:53.890462Z", + "iopub.status.busy": "2024-07-24T09:17:53.890084Z", + "iopub.status.idle": "2024-07-24T09:18:11.543588Z", + "shell.execute_reply": "2024-07-24T09:18:11.542798Z", + "shell.execute_reply.started": "2024-07-24T09:17:53.890430Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Define the input for the Instagram Comment Scraper actor\n", + "input_data = {\n", + " \"directUrls\": [post_url],\n", + " \"resultsType\": \"comments\",\n", + "}\n", + "\n", + "# Make a request to start the actor\n", + "start_actor_url = f\"https://api.apify.com/v2/acts/apify~instagram-comment-scraper/runs?token={apify_token}\"\n", + "response = requests.post(start_actor_url, json=input_data)\n", + "run_details = response.json()\n", + "\n", + "# Extract the run ID\n", + "run_id = run_details['data']['id']\n", + "\n", + "# Define the URL to fetch the actor run status\n", + "run_status_url = f\"https://api.apify.com/v2/acts/apify~instagram-comment-scraper/runs/{run_id}?token={apify_token}\"\n", + "\n", + "# Wait for the actor to finish\n", + "while True:\n", + " status_response = requests.get(run_status_url)\n", + " status_data = status_response.json()\n", + " if status_data['data']['status'] in ['SUCCEEDED', 'FAILED', 'ABORTED']:\n", + " break\n", + " time.sleep(5) # Wait for 5 seconds before checking again\n", + "\n", + "if status_data['data']['status'] == 'SUCCEEDED':\n", + " # Define the URL to fetch the results\n", + " dataset_id = status_data['data']['defaultDatasetId']\n", + " dataset_url = f\"https://api.apify.com/v2/datasets/{dataset_id}/items?token={apify_token}&format=json\"\n", + "\n", + " # Fetch the comments\n", + " comments_response = requests.get(dataset_url)\n", + " comments_data = comments_response.json()\n", + "\n", + "else:\n", + " print(f\"Actor run did not succeed. Status: {status_data['data']['status']}\")" + ] + }, + { + "cell_type": "markdown", + "id": "3d771a70-1245-4702-9014-324ae540d8ec", + "metadata": {}, + "source": [ + "### Dataframe structure function" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "01deb84d-abd7-4975-ab77-973fe84acf0f", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:18:11.547048Z", + "iopub.status.busy": "2024-07-24T09:18:11.546861Z", + "iopub.status.idle": "2024-07-24T09:18:11.551670Z", + "shell.execute_reply": "2024-07-24T09:18:11.551119Z", + "shell.execute_reply.started": "2024-07-24T09:18:11.547027Z" + } + }, + "outputs": [], + "source": [ + "def get_comments(\n", + " cid,\n", + " text,\n", + " username,\n", + " profile_picture,\n", + " timestamp,\n", + " likes_count\n", + "):\n", + " return {\n", + " \"ID\": cid,\n", + " \"TEXT\": text,\n", + " \"USERNAME\": username,\n", + " \"PROFILE_PICTURE\": profile_picture,\n", + " \"TIMESTAMP\": timestamp,\n", + " \"LIKES_COUNT\": likes_count\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "output_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "display_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Display output" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c7ac65a4-dd93-43c4-8090-c86a2aa28898", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:18:11.552842Z", + "iopub.status.busy": "2024-07-24T09:18:11.552615Z", + "iopub.status.idle": "2024-07-24T09:18:11.873573Z", + "shell.execute_reply": "2024-07-24T09:18:11.873017Z", + "shell.execute_reply.started": "2024-07-24T09:18:11.552814Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDTEXTUSERNAMEPROFILE_PICTURETIMESTAMPLIKES_COUNT
017858772584879006Promote it on @writing._.skilladitya__.7443https://instagram.fhyw1-1.fna.fbcdn.net/v/t51....2023-01-25T01:20:12.000Z0
117842757270932646Promote at @Thewriters_heavenskylarsrwriterhttps://instagram.fhyw1-1.fna.fbcdn.net/v/t51....2023-01-25T01:22:15.000Z0
217945160482350602@Its_chetram_4444skylarsrwriterhttps://instagram.fhyw1-1.fna.fbcdn.net/v/t51....2023-01-25T01:22:22.000Z0
318007951324553277Promote at @TheAuthors.World 💫author__mack16https://instagram.fhyw1-1.fna.fbcdn.net/v/t51....2023-01-25T01:24:59.000Z0
\n", + "
" + ], + "text/plain": [ + " ID TEXT USERNAME \\\n", + "0 17858772584879006 Promote it on @writing._.skill aditya__.7443 \n", + "1 17842757270932646 Promote at @Thewriters_heaven skylarsrwriter \n", + "2 17945160482350602 @Its_chetram_4444 skylarsrwriter \n", + "3 18007951324553277 Promote at @TheAuthors.World 💫 author__mack16 \n", + "\n", + " PROFILE_PICTURE \\\n", + "0 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n", + "1 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n", + "2 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n", + "3 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n", + "\n", + " TIMESTAMP LIKES_COUNT \n", + "0 2023-01-25T01:20:12.000Z 0 \n", + "1 2023-01-25T01:22:15.000Z 0 \n", + "2 2023-01-25T01:22:22.000Z 0 \n", + "3 2023-01-25T01:24:59.000Z 0 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = []\n", + "\n", + "for comment in comments_data:\n", + " data_comment = get_comments(\n", + " comment[\"id\"],\n", + " comment[\"text\"],\n", + " comment[\"ownerUsername\"],\n", + " comment[\"ownerProfilePicUrl\"],\n", + " comment[\"timestamp\"],\n", + " comment[\"likesCount\"]\n", + " )\n", + " data.append(data_comment)\n", + " \n", + "df = pd.DataFrame(data)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "636d22d3-beac-4de3-9ea8-6232e9cdcc6c", + "metadata": {}, + "source": [ + "### Save dataframe to csv" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "be26e796-6c38-4152-a0b8-49f51b617a6f", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:18:11.875551Z", + "iopub.status.busy": "2024-07-24T09:18:11.875052Z", + "iopub.status.idle": "2024-07-24T09:18:12.151858Z", + "shell.execute_reply": "2024-07-24T09:18:12.151261Z", + "shell.execute_reply.started": "2024-07-24T09:18:11.875517Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "df.to_csv(output_csv, index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "naas": { + "notebook_id": "38c44121d518d242dcfd1209fca1b300a11475f5836b8ae8f214c0b4524816a9", + "notebook_path": "Instagram/Instagram_Post_image_and_caption.ipynb" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.3.3" + }, + "toc-autonumbering": false + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Twitter/Twitter_Get_tweets_by_username.ipynb b/Twitter/Twitter_Get_tweets_by_username.ipynb new file mode 100644 index 0000000000..d36bca2a16 --- /dev/null +++ b/Twitter/Twitter_Get_tweets_by_username.ipynb @@ -0,0 +1,873 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "naas-logo", + "metadata": { + "papermill": {}, + "tags": [ + "naas" + ] + }, + "source": [ + "\"Twitter.jpeg\"" + ] + }, + { + "cell_type": "markdown", + "id": "22b3e6a7-eaf5-49ab-93f6-d5b5b0ac4bb7", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Twitter - Get tweets by username\n", + "Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "32c26cef-6a0b-4e42-b821-24731046d65c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #twitter #username #snippet #content #dataframe" + ] + }, + { + "cell_type": "markdown", + "id": "naas-author", + "metadata": { + "papermill": {}, + "tags": [ + "naas" + ] + }, + "source": [ + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" + ] + }, + { + "cell_type": "markdown", + "id": "f9207fc0-4e6f-4722-b48f-88e8f5fff2ea", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-07-22 (Created: 2024-07-17)" + ] + }, + { + "cell_type": "markdown", + "id": "naas-description", + "metadata": { + "papermill": {}, + "tags": [ + "description" + ] + }, + "source": [ + "**Description:** This notebook allows users to get all tweets made by inputed twitter username." + ] + }, + { + "cell_type": "markdown", + "id": "06f7ea29-5966-46c1-8472-2cf7042003be", + "metadata": {}, + "source": [ + "### How to retrive API key with apify" + ] + }, + { + "cell_type": "markdown", + "id": "9ca4c18d-156e-463d-a3ab-d19d06ea1915", + "metadata": {}, + "source": [ + "1. Go to https://apify.com.\n", + "2. Click \"Sign up for free\" and use your google account to sign up.\n", + "3. Once your account has been created, navigate to \"Settings\" on the left panel of the screen.\n", + "4. Here you will click on the tab labeled \"Integrations\" where your personal API token that was automatically generated with sign up will be.\n", + "5. Copy that token and use it to extract data!" + ] + }, + { + "cell_type": "markdown", + "id": "7331465c-f134-4411-b2c2-0b0c3e0a688c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "import_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6e4a6247-513e-42de-af1c-70b9a87bdcff", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-22T13:48:06.202830Z", + "iopub.status.busy": "2024-07-22T13:48:06.202562Z", + "iopub.status.idle": "2024-07-22T13:48:08.377143Z", + "shell.execute_reply": "2024-07-22T13:48:08.376550Z", + "shell.execute_reply.started": "2024-07-22T13:48:06.202760Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import time\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "60f6074a-9b9a-4869-b34a-b5ecc1857d79", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables" + ] + }, + { + "cell_type": "markdown", + "id": "bce3b3b0-d296-4ac6-bc69-be0f94ece1eb", + "metadata": {}, + "source": [ + "- `APIFY_API_TOKEN`: personal token apify creates to access data\n", + "- `TWITTER_URL`: link to the twitter account\n", + "- `OUTPUT_CSV`: excel file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "45564994-08dc-4551-9da3-10c5a755ec74", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-22T13:48:09.433374Z", + "iopub.status.busy": "2024-07-22T13:48:09.433151Z", + "iopub.status.idle": "2024-07-22T13:48:09.436489Z", + "shell.execute_reply": "2024-07-22T13:48:09.435724Z", + "shell.execute_reply.started": "2024-07-22T13:48:09.433352Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "APIFY_API_TOKEN = 'apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU84xxxxx'\n", + "TWITTER_URL = 'https://twitter.com/Spotify'\n", + "OUTPUT_CSV = f\"{TWITTER_URL.split('https://twitter.com/')[1].replace('/', '_')}_tweets.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "663c610e-1558-479a-9143-300fcdfc9af6", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "fa36a148-2182-4f28-9420-13fe49623392", + "metadata": {}, + "source": [ + "### Setup apify actor" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "25193cda-fb05-4477-9c13-7c9d0a600102", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-18T12:08:47.840248Z", + "iopub.status.busy": "2024-07-18T12:08:47.840018Z", + "iopub.status.idle": "2024-07-18T12:08:48.637761Z", + "shell.execute_reply": "2024-07-18T12:08:48.636664Z", + "shell.execute_reply.started": "2024-07-18T12:08:47.840218Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "# Prepare the actor input\n", + "actor_input = {\n", + " \"startUrls\": [{\"url\": TWITTER_URL}],\n", + " \"resultsLimit\": 100, # Adjust the limit as needed\n", + " \"mode\": \"own\" # Options: \"own\", \"replies\", \"both\"\n", + "}\n", + "\n", + "# Run the actor\n", + "run_actor_url = f'https://api.apify.com/v2/acts/quacker~twitter-scraper/runs?token={APIFY_API_TOKEN}'\n", + "run_response = requests.post(run_actor_url, json=actor_input)\n", + "run_response.raise_for_status() # Raise an error if the request failed\n", + "run = run_response.json()\n", + "run_id = run['data']['id']" + ] + }, + { + "cell_type": "markdown", + "id": "27aa7c81-ebed-44b8-b811-7dac6100c285", + "metadata": {}, + "source": [ + "### Get tweets data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bf443879-ea6d-4078-8e1b-e628cf465dab", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-18T12:08:48.639234Z", + "iopub.status.busy": "2024-07-18T12:08:48.638993Z", + "iopub.status.idle": "2024-07-18T12:10:12.522278Z", + "shell.execute_reply": "2024-07-18T12:10:12.521647Z", + "shell.execute_reply.started": "2024-07-18T12:08:48.639203Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "while True:\n", + " run_details_url = f'https://api.apify.com/v2/actor-runs/{run_id}?token={APIFY_API_TOKEN}'\n", + " run_details_response = requests.get(run_details_url)\n", + " run_details_response.raise_for_status()\n", + " run_details = run_details_response.json()\n", + " if run_details['data']['status'] in ['SUCCEEDED', 'FAILED', 'ABORTED']:\n", + " break\n", + " time.sleep(10) # Wait for 10 seconds before checking the status again\n", + "\n", + "# Fetch the results\n", + "dataset_id = run_details['data']['defaultDatasetId']\n", + "dataset_items_url = f'https://api.apify.com/v2/datasets/{dataset_id}/items?token={APIFY_API_TOKEN}'\n", + "dataset_items_response = requests.get(dataset_items_url)\n", + "dataset_items_response.raise_for_status()\n", + "dataset_items = dataset_items_response.json()" + ] + }, + { + "cell_type": "markdown", + "id": "38f2f926-f52c-4b48-bec1-f8fc028a6dbf", + "metadata": {}, + "source": [ + "### Dataframe structure function" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a0f65554-5ca0-4f9c-bbd8-0772e9a8efd1", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-18T12:10:23.038935Z", + "iopub.status.busy": "2024-07-18T12:10:23.038692Z", + "iopub.status.idle": "2024-07-18T12:10:23.042967Z", + "shell.execute_reply": "2024-07-18T12:10:23.042236Z", + "shell.execute_reply.started": "2024-07-18T12:10:23.038911Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def get_tweets(\n", + " tid,\n", + " text,\n", + " reply_count,\n", + " retweet_count,\n", + " favorites_count,\n", + " hashtags,\n", + " user_mentions,\n", + " url,\n", + " created_at,\n", + " view_count\n", + "):\n", + " return {\n", + " \"ID\": tid,\n", + " \"TEXT\": text,\n", + " \"REPLY_COUNT\": reply_count,\n", + " \"RETWEET_COUNT\": retweet_count,\n", + " \"FAVORITES_COUNT\": favorites_count,\n", + " \"HASHTAGS\": hashtags,\n", + " \"USER_MENTIONS\": user_mentions,\n", + " \"URL\": url,\n", + " \"CREATED_AT\": created_at,\n", + " \"VIEW_COUNT\": view_count\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "f7d86d62-9cdd-4737-a5de-2da8bc191e30", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "f6fe9aff-4c2d-4e2a-b862-006106354bb5", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Display" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5f00ab06-24f8-4334-a14e-ba53a3773141", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-18T12:11:05.010264Z", + "iopub.status.busy": "2024-07-18T12:11:05.010026Z", + "iopub.status.idle": "2024-07-18T12:11:05.043314Z", + "shell.execute_reply": "2024-07-18T12:11:05.042743Z", + "shell.execute_reply.started": "2024-07-18T12:11:05.010240Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDTEXTREPLY_COUNTRETWEET_COUNTFAVORITES_COUNTHASHTAGSUSER_MENTIONSURLCREATED_ATVIEW_COUNT
01813743054395998316🚨New Childish Gambino on Friday. Check out the...1747304[][]https://twitter.com/Spotify/status/18137430543...2024-07-18T01:10:16.000Z67334
11813740475398164714Please don’t ask “what’s new?” Just listen to ...3047274[][]https://twitter.com/Spotify/status/18137404753...2024-07-18T01:00:01.000Z57701
21813690603894677650we've been blessed once again31140628[][]https://twitter.com/Spotify/status/18136906038...2024-07-17T21:41:51.000Z112142
31813660598695268471Spanish DJ is available for Premium users only...4464[][]https://twitter.com/Spotify/status/18136605986...2024-07-17T19:42:37.000Z44484
41813651095455162566DJ is now available to use in Spanish. Search ...2413136[][]https://twitter.com/Spotify/status/18136510954...2024-07-17T19:04:51.000Z108677
51813636045529092378Munchkins, get in formation. Y2K! arrives July...1115108[][]https://twitter.com/Spotify/status/18136360455...2024-07-17T18:05:03.000Z57946
61813287485922853263Crush’s Spotify follower count just went up by...4744380[][]https://twitter.com/Spotify/status/18132874859...2024-07-16T19:00:00.000Z109026
71813260012132966871https://t.co/mfcYjTY39D1014179[][]https://twitter.com/Spotify/status/18132600121...2024-07-16T17:10:50.000Z75601
81813259964745719939It’s the best time of the week (when the top c...19715544949[][]https://twitter.com/Spotify/status/18132599647...2024-07-16T17:10:38.000Z268949
91813246671054381297Now Presenting The Gold Standard, an exhibit i...6074363[][]https://twitter.com/Spotify/status/18132466710...2024-07-16T16:17:49.000Z124731
101813196890344731043You’re building the ultimate summer playlist w...9871701336[][]https://twitter.com/Spotify/status/18131968903...2024-07-16T13:00:00.000Z446499
111812945278628209138Which summer mix matches your current mood? ☀️...5219192[][]https://twitter.com/Spotify/status/18129452786...2024-07-15T20:20:11.000Z105070
121812897635453530347Spotify Presents The Gold Standard Exhibition,...1513137[][]https://twitter.com/Spotify/status/18128976354...2024-07-15T17:10:52.000Z132064
131812894069804183850Your last played artist is your new best frien...15151302166[][]https://twitter.com/Spotify/status/18128940698...2024-07-15T16:56:42.000Z280762
141812603287210020926🏆2475726[][]https://twitter.com/Spotify/status/18126032872...2024-07-14T21:41:14.000Z213146
151811943008600023174Favorite song to listen to while you skate on ...19966619[SkateNoise][]https://twitter.com/Spotify/status/18119430086...2024-07-13T01:57:32.000Z149877
161811883240661352956Screenshot the playlist title that only makes ...5791041321[][]https://twitter.com/Spotify/status/18118832406...2024-07-12T22:00:02.000Z247966
171811871617678778458Moses Sumney takes us back to 1993 in the late...1318185[][]https://twitter.com/Spotify/status/18118716176...2024-07-12T21:13:51.000Z99306
181811838698197254366COUNTER//CULTURE - Vol 15 🤝 @CageTheElephant h...1514187[][{'id_str': '19341413', 'name': 'Cage The Elep...https://twitter.com/Spotify/status/18118386981...2024-07-12T19:03:02.000Z83954
191811837762028617876RT @SpotifyUSA: Welcome to Artificial Paradise...0600[][{'id_str': '213687893', 'name': 'Spotify USA'...https://twitter.com/Spotify/status/18118377620...2024-07-12T18:59:19.000Z93429
\n", + "
" + ], + "text/plain": [ + " ID TEXT \\\n", + "0 1813743054395998316 🚨New Childish Gambino on Friday. Check out the... \n", + "1 1813740475398164714 Please don’t ask “what’s new?” Just listen to ... \n", + "2 1813690603894677650 we've been blessed once again \n", + "3 1813660598695268471 Spanish DJ is available for Premium users only... \n", + "4 1813651095455162566 DJ is now available to use in Spanish. Search ... \n", + "5 1813636045529092378 Munchkins, get in formation. Y2K! arrives July... \n", + "6 1813287485922853263 Crush’s Spotify follower count just went up by... \n", + "7 1813260012132966871 https://t.co/mfcYjTY39D \n", + "8 1813259964745719939 It’s the best time of the week (when the top c... \n", + "9 1813246671054381297 Now Presenting The Gold Standard, an exhibit i... \n", + "10 1813196890344731043 You’re building the ultimate summer playlist w... \n", + "11 1812945278628209138 Which summer mix matches your current mood? ☀️... \n", + "12 1812897635453530347 Spotify Presents The Gold Standard Exhibition,... \n", + "13 1812894069804183850 Your last played artist is your new best frien... \n", + "14 1812603287210020926 🏆 \n", + "15 1811943008600023174 Favorite song to listen to while you skate on ... \n", + "16 1811883240661352956 Screenshot the playlist title that only makes ... \n", + "17 1811871617678778458 Moses Sumney takes us back to 1993 in the late... \n", + "18 1811838698197254366 COUNTER//CULTURE - Vol 15 🤝 @CageTheElephant h... \n", + "19 1811837762028617876 RT @SpotifyUSA: Welcome to Artificial Paradise... \n", + "\n", + " REPLY_COUNT RETWEET_COUNT FAVORITES_COUNT HASHTAGS \\\n", + "0 17 47 304 [] \n", + "1 30 47 274 [] \n", + "2 31 140 628 [] \n", + "3 4 4 64 [] \n", + "4 24 13 136 [] \n", + "5 11 15 108 [] \n", + "6 47 44 380 [] \n", + "7 10 14 179 [] \n", + "8 197 1554 4949 [] \n", + "9 60 74 363 [] \n", + "10 987 170 1336 [] \n", + "11 52 19 192 [] \n", + "12 15 13 137 [] \n", + "13 1515 130 2166 [] \n", + "14 24 75 726 [] \n", + "15 199 66 619 [SkateNoise] \n", + "16 579 104 1321 [] \n", + "17 13 18 185 [] \n", + "18 15 14 187 [] \n", + "19 0 60 0 [] \n", + "\n", + " USER_MENTIONS \\\n", + "0 [] \n", + "1 [] \n", + "2 [] \n", + "3 [] \n", + "4 [] \n", + "5 [] \n", + "6 [] \n", + "7 [] \n", + "8 [] \n", + "9 [] \n", + "10 [] \n", + "11 [] \n", + "12 [] \n", + "13 [] \n", + "14 [] \n", + "15 [] \n", + "16 [] \n", + "17 [] \n", + "18 [{'id_str': '19341413', 'name': 'Cage The Elep... \n", + "19 [{'id_str': '213687893', 'name': 'Spotify USA'... \n", + "\n", + " URL \\\n", + "0 https://twitter.com/Spotify/status/18137430543... \n", + "1 https://twitter.com/Spotify/status/18137404753... \n", + "2 https://twitter.com/Spotify/status/18136906038... \n", + "3 https://twitter.com/Spotify/status/18136605986... \n", + "4 https://twitter.com/Spotify/status/18136510954... \n", + "5 https://twitter.com/Spotify/status/18136360455... \n", + "6 https://twitter.com/Spotify/status/18132874859... \n", + "7 https://twitter.com/Spotify/status/18132600121... \n", + "8 https://twitter.com/Spotify/status/18132599647... \n", + "9 https://twitter.com/Spotify/status/18132466710... \n", + "10 https://twitter.com/Spotify/status/18131968903... \n", + "11 https://twitter.com/Spotify/status/18129452786... \n", + "12 https://twitter.com/Spotify/status/18128976354... \n", + "13 https://twitter.com/Spotify/status/18128940698... \n", + "14 https://twitter.com/Spotify/status/18126032872... \n", + "15 https://twitter.com/Spotify/status/18119430086... \n", + "16 https://twitter.com/Spotify/status/18118832406... \n", + "17 https://twitter.com/Spotify/status/18118716176... \n", + "18 https://twitter.com/Spotify/status/18118386981... \n", + "19 https://twitter.com/Spotify/status/18118377620... \n", + "\n", + " CREATED_AT VIEW_COUNT \n", + "0 2024-07-18T01:10:16.000Z 67334 \n", + "1 2024-07-18T01:00:01.000Z 57701 \n", + "2 2024-07-17T21:41:51.000Z 112142 \n", + "3 2024-07-17T19:42:37.000Z 44484 \n", + "4 2024-07-17T19:04:51.000Z 108677 \n", + "5 2024-07-17T18:05:03.000Z 57946 \n", + "6 2024-07-16T19:00:00.000Z 109026 \n", + "7 2024-07-16T17:10:50.000Z 75601 \n", + "8 2024-07-16T17:10:38.000Z 268949 \n", + "9 2024-07-16T16:17:49.000Z 124731 \n", + "10 2024-07-16T13:00:00.000Z 446499 \n", + "11 2024-07-15T20:20:11.000Z 105070 \n", + "12 2024-07-15T17:10:52.000Z 132064 \n", + "13 2024-07-15T16:56:42.000Z 280762 \n", + "14 2024-07-14T21:41:14.000Z 213146 \n", + "15 2024-07-13T01:57:32.000Z 149877 \n", + "16 2024-07-12T22:00:02.000Z 247966 \n", + "17 2024-07-12T21:13:51.000Z 99306 \n", + "18 2024-07-12T19:03:02.000Z 83954 \n", + "19 2024-07-12T18:59:19.000Z 93429 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = []\n", + "\n", + "for tweet in dataset_items:\n", + " data_tweet = get_tweets(\n", + " tweet[\"id\"],\n", + " tweet[\"full_text\"],\n", + " tweet[\"reply_count\"],\n", + " tweet[\"retweet_count\"],\n", + " tweet[\"favorite_count\"],\n", + " tweet[\"hashtags\"],\n", + " tweet[\"user_mentions\"],\n", + " tweet[\"url\"],\n", + " tweet[\"created_at\"],\n", + " tweet[\"view_count\"]\n", + " )\n", + " data.append(data_tweet)\n", + " \n", + "df = pd.DataFrame(data)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "a4a53832-0571-4cbb-980f-9fcb32309a56", + "metadata": {}, + "source": [ + "### Save to csv file" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "50e4be8c-6e69-492e-bf62-2174f1ad0111", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-18T12:11:53.635934Z", + "iopub.status.busy": "2024-07-18T12:11:53.635684Z", + "iopub.status.idle": "2024-07-18T12:11:53.665887Z", + "shell.execute_reply": "2024-07-18T12:11:53.665267Z", + "shell.execute_reply.started": "2024-07-18T12:11:53.635909Z" + } + }, + "outputs": [], + "source": [ + "df.to_csv(OUTPUT_CSV, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38751e48-3b9f-487a-bbd2-7f114d0457f8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "naas": { + "notebook_id": "d50dc5991bf4fe05f69298b0b37857587e63f47d21cab0b07fbf0c4f6d4bf0b7", + "notebook_path": "Twitter/Twitter_Get_tweets_from_search.ipynb" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.3.3" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}