diff --git a/Project_2_Haoyue b/Project_2_Haoyue
deleted file mode 100644
index 4e646a1..0000000
--- a/Project_2_Haoyue
+++ /dev/null
@@ -1,96 +0,0 @@
-
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-
-# Data loading and Cleaning
-airing = pd.read_csv('/Users/zhanghaoyue/Downloads/airing.csv')
-ads = pd.read_csv('/Users/zhanghaoyue/Downloads/ads.csv')
-airing['message'] = airing['message'].replace('mix', 'mixed')
-ads = ads[ads['air_count'] != 0]
-
-# Analysis airtime and refrencen count
-airing['start_time'] = pd.to_datetime(airing['start_time'])
-airing['end_time'] = pd.to_datetime(airing['end_time'])
-
-airing['airtime'] = (airing['end_time'] - airing['start_time']).dt.total_seconds()
-
-print(airing.groupby('wp_identifier')['airtime'].agg(lambda x: x.max() - x.min()).sort_values(ascending= False))
-
-airtime_avg = airing.groupby('wp_identifier')[['airtime']].agg(['mean', 'sum']).reset_index()
-airtime_avg.columns = ['_'.join(col).strip('_') for col in airtime_avg.columns.to_flat_index()]
-
-airtime_ref = pd.merge(airtime_avg, ads, how='left', on='wp_identifier')
-airtime_ref[['wp_identifier', 'airtime_mean','airtime_sum', 'reference_count']]
-
-
-airtime_ref.plot(kind='scatter', x = 'airtime_mean', y = 'reference_count')
-plt.xlabel('Airtime(second)' , fontsize = 14)
-plt.ylabel('Reference Count', fontsize = 14)
-plt.title('Single Airing Duration and Reference Count', fontsize=16)
-plt.show()
-
-airtime_ref.plot(kind='scatter', x = 'airtime_sum', y = 'reference_count')
-plt.xlabel('Airtime(total minutes)', fontsize = 14)
-plt.ylabel('Reference Count', fontsize =14)
-plt.title('Total Airing Time and Reference Count', fontsize=16)
-plt.show()
-
-# Pivot and summrize Ads Tone for different Candidate
-airing['cand_list'] = airing['candidates'].str.split(', ')
-candidate_explode = airing.explode('cand_list', ignore_index=True)
-candidate_loc = candidate_explode.groupby('cand_list')[['location']].nunique()
-candidate_loc.sort_values(by='location')
-
-candidate_tone = candidate_explode.groupby(['cand_list', 'message'])[['wp_identifier']].count().reset_index()
-candidate_tone_pivot = candidate_tone.pivot(index='cand_list', columns='message', values='wp_identifier')
-candidate_tone_pivot_perc = candidate_tone_pivot.div(candidate_tone_pivot.sum(axis=1), axis=0) * 100
-candidate_tone_pivot
-candidate_tone_pivot_perc
-
-# Relationshio between tone and reference count
-loc_tone = pd.merge(candidate_tone_pivot_perc, candidate_loc, on='cand_list', how='left')
-loc_tone.plot(x='location', y='con', kind='scatter')
-plt.xlabel('Count of Cities', fontsize = 14)
-plt.ylabel('Negative Messsge %', fontsize =14)
-plt.title('Total Airing Time and Reference Count', fontsize=16)
-plt.show()
-
-cand_tone_top_10 = loc_tone.sort_values(by='location', ascending=False).head(10)
-
-# Getting Subjects of Candidates
-candidate_explode['subjects'] = candidate_explode['subjects'].str.split(', ')
-candidate_explode_contnet = candidate_explode.explode('subjects', ignore_index=True)
-candidate_contnet = candidate_explode_contnet.groupby(['cand_list', 'subjects'])[['wp_identifier']].count().reset_index()
-candidate_contnet.sort_values(by='wp_identifier', ascending=False)
-
-# Analysis between gepgraphic reach and subjects
-loc_content = pd.merge(candidate_loc, candidate_contnet, on='cand_list', how='left')
-loc_tone.plot(x='location', y='con', kind='scatter')
-plt.xlabel('Count of Cities', fontsize = 14)
-plt.ylabel('Negative Messsge %', fontsize =14)
-plt.title('Total Airing Time and Reference Count', fontsize=16)
-plt.show()
-
-loc_content_count = loc_content.groupby('cand_list').agg({
-    'location': 'mean',       # Apply mean to 'location'
-    'subjects': 'nunique'     # Apply unique count to 'subjects'
-})
-loc_content_count.plot(x='location', y='subjects', kind='scatter')
-plt.xlabel('Count of Cities', fontsize = 14)
-plt.ylabel('Count of Subjects', fontsize =14)
-plt.show()
-
-hillary_data = loc_content[loc_content['cand_list'] == 'Hillary Clinton'].nlargest(10, 'wp_identifier')
-joy_data = loc_content[loc_content['cand_list'] == 'Roy Cooper'].nlargest(10, 'wp_identifier')
-
-
-plt.barh(hillary_data['subjects'], hillary_data['wp_identifier'], color='skyblue')
-plt.xlabel('Airing Count',fontsize = 14)
-plt.ylabel('Subjects',fontsize = 14)
-plt.title('Hillary Clinton',fontsize = 14)
-
-plt.barh(joy_data['subjects'], hillary_data['wp_identifier'], color='skyblue')
-plt.xlabel('Airing Count',fontsize = 14)
-plt.ylabel('Subjects',fontsize = 14)
-plt.title('Roy Cooper',fontsize = 14)