diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb
index ec4e3f9..999ee6d 100644
--- a/lab-dw-data-structuring-and-combining.ipynb
+++ b/lab-dw-data-structuring-and-combining.ipynb
@@ -36,14 +36,400 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"id": "492d06e3-92c7-4105-ac72-536db98d3244",
"metadata": {
"id": "492d06e3-92c7-4105-ac72-536db98d3244"
},
"outputs": [],
"source": [
- "# Your code goes here"
+ "import pandas as pd\n",
+ "\n",
+ "df1 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n",
+ "df2 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n",
+ "df3 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "fe258bb4-cbf3-4d12-a0cc-aaadbaafa392",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1.columns = df1.columns.str.strip().str.lower()\n",
+ "df2.columns = df2.columns.str.strip().str.lower()\n",
+ "df3.columns = df3.columns.str.strip().str.lower()\n",
+ "df1.rename(columns = {\"st\":\"state\"}, inplace=True)\n",
+ "df2.rename(columns = {\"st\":\"state\"}, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "dece8c2d-4b35-49cc-a2af-55ab4863c6ef",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Index(['customer', 'state', 'gender', 'education', 'customer lifetime value',\n",
+ " 'income', 'monthly premium auto', 'number of open complaints',\n",
+ " 'policy type', 'vehicle class', 'total claim amount'],\n",
+ " dtype='object')\n",
+ "Index(['customer', 'state', 'gender', 'education', 'customer lifetime value',\n",
+ " 'income', 'monthly premium auto', 'number of open complaints',\n",
+ " 'total claim amount', 'policy type', 'vehicle class'],\n",
+ " dtype='object')\n",
+ "Index(['customer', 'state', 'customer lifetime value', 'education', 'gender',\n",
+ " 'income', 'monthly premium auto', 'number of open complaints',\n",
+ " 'policy type', 'total claim amount', 'vehicle class'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(df1.columns)\n",
+ "print(df2.columns)\n",
+ "print(df3.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "4e21e48a-74a7-40f1-afd0-ad46fca2114f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(12074, 11)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.concat([df1, df2, df3], ignore_index=True)\n",
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "841c003d-96fb-44b0-adeb-56d64b37a5f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['customer', 'state', 'gender', 'education', 'customer_lifetime_value',\n",
+ " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n",
+ " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns = [column.replace(\" \", \"_\") for column in df.columns]\n",
+ "\n",
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "490500ef-f69f-4162-ab39-4fab87ce5c28",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2939"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.duplicated().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "99ab7bd2-b289-4198-a4c2-4e6c8b86f43f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\crist\\AppData\\Local\\Temp\\ipykernel_31220\\2421099744.py:7: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+ "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+ "\n",
+ "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+ "\n",
+ "\n",
+ " df[\"gender\"].fillna(\"Unknown\", inplace=True)\n"
+ ]
+ }
+ ],
+ "source": [
+ "gender_mapping = {\"M\":\"M\",\n",
+ " \"F\":\"F\",\n",
+ " \"Male\":\"M\",\n",
+ " \"female\": \"F\", \n",
+ " \"Femal\": \"F\"}\n",
+ "df[\"gender\"] = df[\"gender\"].map(gender_mapping)\n",
+ "df[\"gender\"].fillna(\"Unknown\", inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "9518002d-5775-416a-8545-329d0ff72f60",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\crist\\AppData\\Local\\Temp\\ipykernel_31220\\2702104280.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+ "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+ "\n",
+ "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+ "\n",
+ "\n",
+ " df[\"state\"].replace(\"Cali\", \"California\", inplace=True)\n",
+ "C:\\Users\\crist\\AppData\\Local\\Temp\\ipykernel_31220\\2702104280.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+ "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+ "\n",
+ "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+ "\n",
+ "\n",
+ " df[\"state\"].replace(\"AZ\", \"Arizona\",inplace=True)\n",
+ "C:\\Users\\crist\\AppData\\Local\\Temp\\ipykernel_31220\\2702104280.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+ "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+ "\n",
+ "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+ "\n",
+ "\n",
+ " df[\"state\"].replace(\"WA\", \"Washington\",inplace=True)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array(['Washington', 'Arizona', 'Nevada', 'California', 'Oregon', nan],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"state\"].replace(\"Cali\", \"California\", inplace=True)\n",
+ "df[\"state\"].replace(\"AZ\", \"Arizona\",inplace=True)\n",
+ "df[\"state\"].replace(\"WA\", \"Washington\",inplace=True)\n",
+ "\n",
+ "df[\"state\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "0743cb66-ffe2-45d6-a25a-d22ddf2f0fea",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\crist\\AppData\\Local\\Temp\\ipykernel_31220\\2476333873.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+ "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+ "\n",
+ "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+ "\n",
+ "\n",
+ " df[\"education\"].replace(\"Bachelors\", \"Bachelor\",inplace=True)\n"
+ ]
+ }
+ ],
+ "source": [
+ "df[\"education\"].replace(\"Bachelors\", \"Bachelor\",inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "33ef5b50-3b5d-4b47-90be-7c8475bfe9a7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df['customer_lifetime_value'] = df['customer_lifetime_value'].str.replace('%', '', regex=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "8891a27b-2703-4241-9e2c-bbebf8db9c20",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def convert_luxury(car):\n",
+ " if car == \"Sports Car\":\n",
+ " return \"Luxury\"\n",
+ " elif car == \"Luxury SUV\":\n",
+ " return \"Luxury\"\n",
+ " elif car == \"Luxury Car\":\n",
+ " return \"Luxury\"\n",
+ " else:\n",
+ " return car\n",
+ "\n",
+ "df[\"vehicle_class\"] = df[\"vehicle_class\"].apply(convert_luxury)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "0880609b-be9a-4c44-bb93-1a65603b2767",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[\"customer_lifetime_value\"] = pd.to_numeric(df[\"customer_lifetime_value\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "66758554-04c4-4d4c-bbe9-855d9994bca3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer 2937\n",
+ "state 2937\n",
+ "gender 0\n",
+ "education 2937\n",
+ "customer_lifetime_value 10014\n",
+ "income 2937\n",
+ "monthly_premium_auto 2937\n",
+ "number_of_open_complaints 2937\n",
+ "policy_type 2937\n",
+ "vehicle_class 2937\n",
+ "total_claim_amount 2937\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "2fb340f3-57d0-4b20-89e4-709af4145eb6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer 0\n",
+ "state 0\n",
+ "gender 0\n",
+ "education 0\n",
+ "customer_lifetime_value 7077\n",
+ "income 0\n",
+ "monthly_premium_auto 0\n",
+ "number_of_open_complaints 0\n",
+ "policy_type 0\n",
+ "vehicle_class 0\n",
+ "total_claim_amount 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.dropna(thresh=6, inplace=True) \n",
+ "df.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "cefdc694-d386-427a-b8a2-9cf4a8663e36",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "False 9134\n",
+ "True 3\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.duplicated().value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "3ffb74c0-fc6a-4fc9-83a5-a2f287aa0840",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "False 9134\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.drop_duplicates(inplace=True)\n",
+ "df.duplicated().value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "1889052a-254f-486d-9ad7-a0ef96bb4755",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.reset_index(inplace=True)"
]
},
{
@@ -72,14 +458,349 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 37,
"id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26",
"metadata": {
"id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unnamed:_0 | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " ... | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ " vehicle_type | \n",
+ " month | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unnamed:_0 customer state customer_lifetime_value response \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "\n",
+ " coverage education effective_to_date employmentstatus gender ... \\\n",
+ "0 Basic College 2011-02-18 Employed M ... \n",
+ "1 Basic College 2011-01-18 Unemployed F ... \n",
+ "2 Basic Bachelor 2011-02-10 Employed M ... \n",
+ "3 Extended College 2011-01-11 Employed M ... \n",
+ "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n",
+ "\n",
+ " number_of_policies policy_type policy renew_offer_type \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "\n",
+ " sales_channel total_claim_amount vehicle_class vehicle_size \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "\n",
+ " vehicle_type month \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "\n",
+ "[5 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\")\n",
+ "marketing_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "id": "0a843116-b120-4a36-894e-ef640253a8e7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 10910 entries, 0 to 10909\n",
+ "Data columns (total 27 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 unnamed:_0 10910 non-null int64 \n",
+ " 1 customer 10910 non-null object \n",
+ " 2 state 10910 non-null object \n",
+ " 3 customer_lifetime_value 10910 non-null float64\n",
+ " 4 response 10910 non-null object \n",
+ " 5 coverage 10910 non-null object \n",
+ " 6 education 10910 non-null object \n",
+ " 7 effective_to_date 10910 non-null object \n",
+ " 8 employmentstatus 10910 non-null object \n",
+ " 9 gender 10910 non-null object \n",
+ " 10 income 10910 non-null int64 \n",
+ " 11 location_code 10910 non-null object \n",
+ " 12 marital_status 10910 non-null object \n",
+ " 13 monthly_premium_auto 10910 non-null int64 \n",
+ " 14 months_since_last_claim 10910 non-null float64\n",
+ " 15 months_since_policy_inception 10910 non-null int64 \n",
+ " 16 number_of_open_complaints 10910 non-null float64\n",
+ " 17 number_of_policies 10910 non-null int64 \n",
+ " 18 policy_type 10910 non-null object \n",
+ " 19 policy 10910 non-null object \n",
+ " 20 renew_offer_type 10910 non-null object \n",
+ " 21 sales_channel 10910 non-null object \n",
+ " 22 total_claim_amount 10910 non-null float64\n",
+ " 23 vehicle_class 10910 non-null object \n",
+ " 24 vehicle_size 10910 non-null object \n",
+ " 25 vehicle_type 10910 non-null object \n",
+ " 26 month 10910 non-null int64 \n",
+ "dtypes: float64(4), int64(6), object(17)\n",
+ "memory usage: 2.2+ MB\n"
+ ]
+ }
+ ],
"source": [
- "# Your code goes here"
+ "marketing_df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "1045a6ed-0173-4915-9a0c-8a54aba21ea3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "unnamed:_0 0\n",
+ "customer 0\n",
+ "state 0\n",
+ "customer_lifetime_value 0\n",
+ "response 0\n",
+ "coverage 0\n",
+ "education 0\n",
+ "effective_to_date 0\n",
+ "employmentstatus 0\n",
+ "gender 0\n",
+ "income 0\n",
+ "location_code 0\n",
+ "marital_status 0\n",
+ "monthly_premium_auto 0\n",
+ "months_since_last_claim 0\n",
+ "months_since_policy_inception 0\n",
+ "number_of_open_complaints 0\n",
+ "number_of_policies 0\n",
+ "policy_type 0\n",
+ "policy 0\n",
+ "renew_offer_type 0\n",
+ "sales_channel 0\n",
+ "total_claim_amount 0\n",
+ "vehicle_class 0\n",
+ "vehicle_size 0\n",
+ "vehicle_type 0\n",
+ "month 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_df.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "e6e36267-91fe-495b-9811-063b944af8c1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_df.duplicated().sum()"
]
},
{
@@ -93,6 +814,80 @@
"Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "a7cf5c17-410b-402d-856d-722e0224f4b9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " sales_channel | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Agent | \n",
+ " 1810226.82 | \n",
+ "
\n",
+ " \n",
+ " Branch | \n",
+ " 1301204.00 | \n",
+ "
\n",
+ " \n",
+ " Call Center | \n",
+ " 926600.82 | \n",
+ "
\n",
+ " \n",
+ " Web | \n",
+ " 706600.04 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " total_claim_amount\n",
+ "sales_channel \n",
+ "Agent 1810226.82\n",
+ "Branch 1301204.00\n",
+ "Call Center 926600.82\n",
+ "Web 706600.04"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pivot_df = marketing_df.pivot_table(index=\"sales_channel\",values=\"total_claim_amount\", aggfunc = \"sum\")\n",
+ "round(pivot_df, 2)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "640993b2-a291-436c-a34d-a551144f8196",
@@ -103,6 +898,91 @@
"2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "0303333f-2263-44fc-b7fd-d5c832e99bae",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " education | \n",
+ " Bachelor | \n",
+ " College | \n",
+ " Doctor | \n",
+ " High School or Below | \n",
+ " Master | \n",
+ "
\n",
+ " \n",
+ " gender | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " F | \n",
+ " 7874.269478 | \n",
+ " 7748.823325 | \n",
+ " 7328.508916 | \n",
+ " 8675.220201 | \n",
+ " 8157.053154 | \n",
+ "
\n",
+ " \n",
+ " M | \n",
+ " 7703.601675 | \n",
+ " 8052.459288 | \n",
+ " 7415.333638 | \n",
+ " 8149.687783 | \n",
+ " 8168.832659 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "education Bachelor College Doctor High School or Below \\\n",
+ "gender \n",
+ "F 7874.269478 7748.823325 7328.508916 8675.220201 \n",
+ "M 7703.601675 8052.459288 7415.333638 8149.687783 \n",
+ "\n",
+ "education Master \n",
+ "gender \n",
+ "F 8157.053154 \n",
+ "M 8168.832659 "
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "avg_customer_liftime_value = marketing_df.pivot_table(index=\"gender\",columns=\"education\", values=\"customer_lifetime_value\", aggfunc =\"mean\")\n",
+ "avg_customer_liftime_value"
+ ]
+ },
{
"cell_type": "markdown",
"id": "32c7f2e5-3d90-43e5-be33-9781b6069198",
@@ -130,14 +1010,60 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 71,
"id": "3a069e0b-b400-470e-904d-d17582191be4",
"metadata": {
"id": "3a069e0b-b400-470e-904d-d17582191be4"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " month | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " number_of_open_complaints | \n",
+ " 5.0 | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "month 1 2\n",
+ "number_of_open_complaints 5.0 5.0"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code goes here"
+ "highest_complaints = marketing_df.pivot_table(columns=\"month\", values=\"number_of_open_complaints\", aggfunc =\"max\")\n",
+ "highest_complaints"
]
}
],
@@ -160,7 +1086,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.12.4"
}
},
"nbformat": 4,