diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index ec4e3f9..999ee6d 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -36,14 +36,400 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "492d06e3-92c7-4105-ac72-536db98d3244", "metadata": { "id": "492d06e3-92c7-4105-ac72-536db98d3244" }, "outputs": [], "source": [ - "# Your code goes here" + "import pandas as pd\n", + "\n", + "df1 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n", + "df2 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n", + "df3 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fe258bb4-cbf3-4d12-a0cc-aaadbaafa392", + "metadata": {}, + "outputs": [], + "source": [ + "df1.columns = df1.columns.str.strip().str.lower()\n", + "df2.columns = df2.columns.str.strip().str.lower()\n", + "df3.columns = df3.columns.str.strip().str.lower()\n", + "df1.rename(columns = {\"st\":\"state\"}, inplace=True)\n", + "df2.rename(columns = {\"st\":\"state\"}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dece8c2d-4b35-49cc-a2af-55ab4863c6ef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['customer', 'state', 'gender', 'education', 'customer lifetime value',\n", + " 'income', 'monthly premium auto', 'number of open complaints',\n", + " 'policy type', 'vehicle class', 'total claim amount'],\n", + " dtype='object')\n", + "Index(['customer', 'state', 'gender', 'education', 'customer lifetime value',\n", + " 'income', 'monthly premium auto', 'number of open complaints',\n", + " 'total claim amount', 'policy type', 'vehicle class'],\n", + " dtype='object')\n", + "Index(['customer', 'state', 'customer lifetime value', 'education', 'gender',\n", + " 'income', 'monthly premium auto', 'number of open complaints',\n", + " 'policy type', 'total claim amount', 'vehicle class'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print(df1.columns)\n", + "print(df2.columns)\n", + "print(df3.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4e21e48a-74a7-40f1-afd0-ad46fca2114f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(12074, 11)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.concat([df1, df2, df3], ignore_index=True)\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "841c003d-96fb-44b0-adeb-56d64b37a5f1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['customer', 'state', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n", + " dtype='object')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns = [column.replace(\" \", \"_\") for column in df.columns]\n", + "\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "490500ef-f69f-4162-ab39-4fab87ce5c28", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2939" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "99ab7bd2-b289-4198-a4c2-4e6c8b86f43f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\crist\\AppData\\Local\\Temp\\ipykernel_31220\\2421099744.py:7: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " df[\"gender\"].fillna(\"Unknown\", inplace=True)\n" + ] + } + ], + "source": [ + "gender_mapping = {\"M\":\"M\",\n", + " \"F\":\"F\",\n", + " \"Male\":\"M\",\n", + " \"female\": \"F\", \n", + " \"Femal\": \"F\"}\n", + "df[\"gender\"] = df[\"gender\"].map(gender_mapping)\n", + "df[\"gender\"].fillna(\"Unknown\", inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9518002d-5775-416a-8545-329d0ff72f60", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\crist\\AppData\\Local\\Temp\\ipykernel_31220\\2702104280.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " df[\"state\"].replace(\"Cali\", \"California\", inplace=True)\n", + "C:\\Users\\crist\\AppData\\Local\\Temp\\ipykernel_31220\\2702104280.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " df[\"state\"].replace(\"AZ\", \"Arizona\",inplace=True)\n", + "C:\\Users\\crist\\AppData\\Local\\Temp\\ipykernel_31220\\2702104280.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " df[\"state\"].replace(\"WA\", \"Washington\",inplace=True)\n" + ] + }, + { + "data": { + "text/plain": [ + "array(['Washington', 'Arizona', 'Nevada', 'California', 'Oregon', nan],\n", + " dtype=object)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"state\"].replace(\"Cali\", \"California\", inplace=True)\n", + "df[\"state\"].replace(\"AZ\", \"Arizona\",inplace=True)\n", + "df[\"state\"].replace(\"WA\", \"Washington\",inplace=True)\n", + "\n", + "df[\"state\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0743cb66-ffe2-45d6-a25a-d22ddf2f0fea", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\crist\\AppData\\Local\\Temp\\ipykernel_31220\\2476333873.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " df[\"education\"].replace(\"Bachelors\", \"Bachelor\",inplace=True)\n" + ] + } + ], + "source": [ + "df[\"education\"].replace(\"Bachelors\", \"Bachelor\",inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "33ef5b50-3b5d-4b47-90be-7c8475bfe9a7", + "metadata": {}, + "outputs": [], + "source": [ + "df['customer_lifetime_value'] = df['customer_lifetime_value'].str.replace('%', '', regex=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8891a27b-2703-4241-9e2c-bbebf8db9c20", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_luxury(car):\n", + " if car == \"Sports Car\":\n", + " return \"Luxury\"\n", + " elif car == \"Luxury SUV\":\n", + " return \"Luxury\"\n", + " elif car == \"Luxury Car\":\n", + " return \"Luxury\"\n", + " else:\n", + " return car\n", + "\n", + "df[\"vehicle_class\"] = df[\"vehicle_class\"].apply(convert_luxury)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "0880609b-be9a-4c44-bb93-1a65603b2767", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"customer_lifetime_value\"] = pd.to_numeric(df[\"customer_lifetime_value\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "66758554-04c4-4d4c-bbe9-855d9994bca3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer 2937\n", + "state 2937\n", + "gender 0\n", + "education 2937\n", + "customer_lifetime_value 10014\n", + "income 2937\n", + "monthly_premium_auto 2937\n", + "number_of_open_complaints 2937\n", + "policy_type 2937\n", + "vehicle_class 2937\n", + "total_claim_amount 2937\n", + "dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "2fb340f3-57d0-4b20-89e4-709af4145eb6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer 0\n", + "state 0\n", + "gender 0\n", + "education 0\n", + "customer_lifetime_value 7077\n", + "income 0\n", + "monthly_premium_auto 0\n", + "number_of_open_complaints 0\n", + "policy_type 0\n", + "vehicle_class 0\n", + "total_claim_amount 0\n", + "dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dropna(thresh=6, inplace=True) \n", + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "cefdc694-d386-427a-b8a2-9cf4a8663e36", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False 9134\n", + "True 3\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.duplicated().value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "3ffb74c0-fc6a-4fc9-83a5-a2f287aa0840", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False 9134\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop_duplicates(inplace=True)\n", + "df.duplicated().value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "1889052a-254f-486d-9ad7-a0ef96bb4755", + "metadata": {}, + "outputs": [], + "source": [ + "df.reset_index(inplace=True)" ] }, { @@ -72,14 +458,349 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", "metadata": { "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "marketing_df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\")\n", + "marketing_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "0a843116-b120-4a36-894e-ef640253a8e7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10910 entries, 0 to 10909\n", + "Data columns (total 27 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 unnamed:_0 10910 non-null int64 \n", + " 1 customer 10910 non-null object \n", + " 2 state 10910 non-null object \n", + " 3 customer_lifetime_value 10910 non-null float64\n", + " 4 response 10910 non-null object \n", + " 5 coverage 10910 non-null object \n", + " 6 education 10910 non-null object \n", + " 7 effective_to_date 10910 non-null object \n", + " 8 employmentstatus 10910 non-null object \n", + " 9 gender 10910 non-null object \n", + " 10 income 10910 non-null int64 \n", + " 11 location_code 10910 non-null object \n", + " 12 marital_status 10910 non-null object \n", + " 13 monthly_premium_auto 10910 non-null int64 \n", + " 14 months_since_last_claim 10910 non-null float64\n", + " 15 months_since_policy_inception 10910 non-null int64 \n", + " 16 number_of_open_complaints 10910 non-null float64\n", + " 17 number_of_policies 10910 non-null int64 \n", + " 18 policy_type 10910 non-null object \n", + " 19 policy 10910 non-null object \n", + " 20 renew_offer_type 10910 non-null object \n", + " 21 sales_channel 10910 non-null object \n", + " 22 total_claim_amount 10910 non-null float64\n", + " 23 vehicle_class 10910 non-null object \n", + " 24 vehicle_size 10910 non-null object \n", + " 25 vehicle_type 10910 non-null object \n", + " 26 month 10910 non-null int64 \n", + "dtypes: float64(4), int64(6), object(17)\n", + "memory usage: 2.2+ MB\n" + ] + } + ], "source": [ - "# Your code goes here" + "marketing_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "1045a6ed-0173-4915-9a0c-8a54aba21ea3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 0\n", + "customer 0\n", + "state 0\n", + "customer_lifetime_value 0\n", + "response 0\n", + "coverage 0\n", + "education 0\n", + "effective_to_date 0\n", + "employmentstatus 0\n", + "gender 0\n", + "income 0\n", + "location_code 0\n", + "marital_status 0\n", + "monthly_premium_auto 0\n", + "months_since_last_claim 0\n", + "months_since_policy_inception 0\n", + "number_of_open_complaints 0\n", + "number_of_policies 0\n", + "policy_type 0\n", + "policy 0\n", + "renew_offer_type 0\n", + "sales_channel 0\n", + "total_claim_amount 0\n", + "vehicle_class 0\n", + "vehicle_size 0\n", + "vehicle_type 0\n", + "month 0\n", + "dtype: int64" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "marketing_df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "e6e36267-91fe-495b-9811-063b944af8c1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "marketing_df.duplicated().sum()" ] }, { @@ -93,6 +814,80 @@ "Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 63, + "id": "a7cf5c17-410b-402d-856d-722e0224f4b9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total_claim_amount
sales_channel
Agent1810226.82
Branch1301204.00
Call Center926600.82
Web706600.04
\n", + "
" + ], + "text/plain": [ + " total_claim_amount\n", + "sales_channel \n", + "Agent 1810226.82\n", + "Branch 1301204.00\n", + "Call Center 926600.82\n", + "Web 706600.04" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pivot_df = marketing_df.pivot_table(index=\"sales_channel\",values=\"total_claim_amount\", aggfunc = \"sum\")\n", + "round(pivot_df, 2)" + ] + }, { "cell_type": "markdown", "id": "640993b2-a291-436c-a34d-a551144f8196", @@ -103,6 +898,91 @@ "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 67, + "id": "0303333f-2263-44fc-b7fd-d5c832e99bae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
educationBachelorCollegeDoctorHigh School or BelowMaster
gender
F7874.2694787748.8233257328.5089168675.2202018157.053154
M7703.6016758052.4592887415.3336388149.6877838168.832659
\n", + "
" + ], + "text/plain": [ + "education Bachelor College Doctor High School or Below \\\n", + "gender \n", + "F 7874.269478 7748.823325 7328.508916 8675.220201 \n", + "M 7703.601675 8052.459288 7415.333638 8149.687783 \n", + "\n", + "education Master \n", + "gender \n", + "F 8157.053154 \n", + "M 8168.832659 " + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_customer_liftime_value = marketing_df.pivot_table(index=\"gender\",columns=\"education\", values=\"customer_lifetime_value\", aggfunc =\"mean\")\n", + "avg_customer_liftime_value" + ] + }, { "cell_type": "markdown", "id": "32c7f2e5-3d90-43e5-be33-9781b6069198", @@ -130,14 +1010,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "id": "3a069e0b-b400-470e-904d-d17582191be4", "metadata": { "id": "3a069e0b-b400-470e-904d-d17582191be4" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
month12
number_of_open_complaints5.05.0
\n", + "
" + ], + "text/plain": [ + "month 1 2\n", + "number_of_open_complaints 5.0 5.0" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code goes here" + "highest_complaints = marketing_df.pivot_table(columns=\"month\", values=\"number_of_open_complaints\", aggfunc =\"max\")\n", + "highest_complaints" ] } ], @@ -160,7 +1086,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.12.4" } }, "nbformat": 4,