Skip to content

Commit

Permalink
streamlit update
Browse files Browse the repository at this point in the history
  • Loading branch information
oyounis19 committed Jun 3, 2024
1 parent 073c898 commit 6f44cab
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 43 deletions.
8 changes: 4 additions & 4 deletions app/model/main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1113,7 +1113,7 @@
" 'genres': ['Children', 'Comedy', 'Animation'],\n",
"}\n",
"\n",
"user_id, user, weights = Utils.preprocess_user(\n",
"user_id, user, weights, _ = Utils.preprocess_user(\n",
" user=new_user,\n",
" num_items=items_data_og.shape[0],\n",
" users=users_data.drop_duplicates(inplace=False).values,\n",
Expand Down Expand Up @@ -1364,7 +1364,7 @@
"}\n",
"\n",
"# preprocess the old user\n",
"user_id, user, _ = Utils.preprocess_user(\n",
"user_id, user, _, _ = Utils.preprocess_user(\n",
" user=old_user,\n",
" num_items=items_data_og.shape[0],\n",
" users=users_data.drop_duplicates(inplace=False).values,\n",
Expand Down Expand Up @@ -2282,7 +2282,7 @@
" 'genres': ['Children', 'Comedy', 'Animation'],\n",
"}\n",
"\n",
"user_id, user, weights = Utils.preprocess_user(\n",
"user_id, user, weights, _ = Utils.preprocess_user(\n",
" user=new_user,\n",
" num_items=items_data_og.shape[0],\n",
" users=users_data.drop_duplicates(inplace=False).values,\n",
Expand Down Expand Up @@ -2533,7 +2533,7 @@
"}\n",
"\n",
"# preprocess the old user\n",
"user_id, user, _ = Utils.preprocess_user(\n",
"user_id, user, _, _ = Utils.preprocess_user(\n",
" user=old_user,\n",
" num_items=items_data_og.shape[0],\n",
" users=users_data.drop_duplicates(inplace=False).values,\n",
Expand Down
73 changes: 62 additions & 11 deletions app/model/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,16 +151,16 @@ def move_column(df: pd.DataFrame, col: list[str], pos: int) -> pd.DataFrame:
return df[cols]

@staticmethod
def preprocess_user(user: dict, num_items: int, users: np.ndarray, weights: list[np.ndarray]=None, topk: int=3, verbose=False) -> tuple[torch.IntTensor, torch.FloatTensor, Union[list[np.ndarray], None]]:
def preprocess_user(user: dict, num_items: int, users: np.ndarray, weights: list[np.ndarray]=None, topk: int=3, verbose=False) -> tuple[torch.IntTensor, torch.FloatTensor, Union[list[np.ndarray], None], Union[np.ndarray, None]]:
'''
Preprocesses user data for model input
'''
if 'age' not in user or user['age'] == None:
if 'age' not in user or not user['age']:
user_ = users[user['id'] - 1]
user_ = np.insert(user_, 0, user['id'])
print(f"User id: {user['id']} top {topk} genres: {np.array(genre)[np.argsort(user_[-18:])[-topk:][::-1]]}") if verbose else None
user_ = np.tile(user_, (num_items, 1))
return torch.IntTensor(user_[:, 0]), torch.FloatTensor(user_[:, 1:]), None
return torch.IntTensor(user_[:, 0]), torch.FloatTensor(user_[:, 1:]), None, np.array(genre)[np.argsort(user_[0, -18:])[-topk:][::-1]]

user_ = np.zeros(31, dtype=float)

Expand Down Expand Up @@ -190,7 +190,7 @@ def preprocess_user(user: dict, num_items: int, users: np.ndarray, weights: list
mf_weights = weights[1][similar_users_ids].mean(axis=0)

user_ = np.tile(user_, (num_items, 1))
return torch.IntTensor(user_[:, 0]), torch.FloatTensor(user_[:, 1:]), [mlp_weights, mf_weights]
return torch.IntTensor(user_[:, 0]), torch.FloatTensor(user_[:, 1:]), [mlp_weights, mf_weights], None

@staticmethod
def preprocess_items(items: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -289,20 +289,19 @@ def ndcg_hit_ratio(y_preds, X_test_users, y_true, k=10) -> tuple[float]:
return ndcg, hit_ratio

@staticmethod
def pipeline(request: any, model: nn.Module, weights: list[np.ndarray], users: np.ndarray, movies: pd.DataFrame, movies_og: pd.DataFrame, ratings: pd.DataFrame, mode: str):
def pipeline(request: any, model: nn.Module, weights: list[np.ndarray], users: np.ndarray, movies: pd.DataFrame, movies_og: pd.DataFrame, ratings: pd.DataFrame, mode: str) -> tuple[list[dict], Union[np.ndarray, None]]:
'''
Pipeline for inference
'''
num_items = 200 # Number of items to retrieve
num_items = 300 # Number of items to retrieve
request = request if isinstance(request, dict) else request.model_dump()

# preprocess the old user
user_id, user, weights = Utils.preprocess_user(
user_id, user, weights, top_n_genres = Utils.preprocess_user(
user=request,
num_items=num_items,
users=users,
weights=weights,
verbose=True
weights=weights
)
user_id, user = user_id.to(model.device), user.to(model.device)

Expand Down Expand Up @@ -331,7 +330,7 @@ def pipeline(request: any, model: nn.Module, weights: list[np.ndarray], users: n

movies_retrieved = movies_og[movies_og['movie_id'].isin(movie_ids.cpu().numpy())].sort_values(by='movie_id', key=lambda x: pd.Categorical(x, categories=movie_ids.cpu().numpy(), ordered=True))

return Utils.order(y_pred, movies_retrieved, mode, top_k=request['top_k']).to_dict(orient='records')
return Utils.order(y_pred, movies_retrieved, mode, top_k=request['top_k']).to_dict(orient='records'), top_n_genres

@staticmethod
def retrieve(movies: pd.DataFrame, user: np.ndarray, k: int, num_genres: int=3, random_state: int=42) -> pd.DataFrame:
Expand Down Expand Up @@ -502,4 +501,56 @@ def save_checkpoint(self, val_loss, model: nn.Module) -> None:
'ratings': ['user_id', 'movie_id', 'rating', 'timestamp'],
'users': ['user_id', 'gender', 'age', 'occupation', 'zip_code'],
'items': ['movie_id', 'title', 'genre'],
}
}

css = """
<style>
.card-container {
display: flex;
flex-direction: row;
justify-content: center;
align-items: start;
gap: 20px;
flex-wrap: wrap;
margin: 20px 0;
}
.card {
width: 100%;
max-width: 300px;
border: 1px solid #ddd;
border-radius: 8px;
padding: 16px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
background-color: #eee;
transition: transform 0.2s ease-in-out;
}
.card:hover {
transform: scale(1.05);
}
.card-title {
font-size: 1.25em;
margin-bottom: 8px;
color: #333;
}
.card-text {
font-size: 1em;
margin-bottom: 8px;
color: #555;
}
.footer {
position: fixed;
left: 0;
bottom: 0;
width: 100%;
background-color: rgb(45, 38, 48);
color: #fff;
text-align: center;
padding: 10px;
}
</style>
"""
105 changes: 77 additions & 28 deletions streamlit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,30 @@
import streamlit as st

from utils.model import NCF, __model_version__
from utils.utils import Utils, cols_dict, occupation, genre
from utils.utils import Utils, cols_dict, occupation, genre, css

# Load data
users_exp = pd.read_csv(abs_path + 'data/users_exp.csv').values
users_imp = pd.read_csv(abs_path + 'data/users_imp.csv').values
movies = pd.read_csv(abs_path + 'data/movies.csv')
movies_og = pd.read_csv(abs_path + 'data/movies.dat', sep='::', names=cols_dict['items'], encoding='latin-1', engine='python')
ratings = pd.read_csv(abs_path + 'data/ratings.dat', sep='::', names=cols_dict['ratings'], engine='python')
@st.cache_data
def load_data():
users_exp = pd.read_csv(abs_path + 'data/users_exp.csv').values
users_imp = pd.read_csv(abs_path + 'data/users_imp.csv').values
movies = pd.read_csv(abs_path + 'data/movies.csv')
movies_og = pd.read_csv(abs_path + 'data/movies.dat', sep='::', names=cols_dict['items'], encoding='latin-1', engine='python')
ratings = pd.read_csv(abs_path + 'data/ratings.dat', sep='::', names=cols_dict['ratings'], engine='python')

model_exp = NCF('explicit', gpu=False)
model_exp.load_weights(abs_path + 'weights/explicit.pth', eval=True)
return users_exp, users_imp, movies, movies_og, ratings

model_imp = NCF('implicit', gpu=False)
model_imp.load_weights(abs_path + 'weights/implicit.pth', eval=True)
@st.cache_resource
def load_models():
model_exp = NCF('explicit', gpu=False)
model_exp.load_weights(abs_path + 'weights/explicit.pth', eval=True)

model_imp = NCF('implicit', gpu=False)
model_imp.load_weights(abs_path + 'weights/implicit.pth', eval=True)

return model_exp, model_imp

users_exp, users_imp, movies, movies_og, ratings = load_data()
model_exp, model_imp = load_models()

# GUI
st.title('NCF Recommender System')
Expand All @@ -28,33 +38,36 @@

model_type = st.radio('Select model type', ['Implicit', 'Explicit'])

new_user = st.checkbox('New user? (no user ID needed)', value=True)

# Input number of recommendations
top_k = st.number_input('Number of recommendations', min_value=1, max_value=20, value=10, step=1)

# Input user ID
user_id = st.number_input('User ID (old user <= 6040)', min_value=1, max_value=10_000, value=1, step=1)
# User ID input
user_id = st.number_input('User ID (MAX: 6040)', min_value=1, max_value=6040, value=3000, step=1, disabled=new_user)

# New user Input user data
st.write('If new user:')
# New user inputs
user_gender = st.selectbox('Gender', ['M', 'F'], disabled=not new_user)
user_age = st.number_input('Age', min_value=1, max_value=99, value=25, step=1, disabled=not new_user)
user_occupation = st.selectbox('Job', occupation, disabled=not new_user, help='Select your job', index=17)
user_genres = st.multiselect('Favourite genres', genre, disabled=not new_user, help='Select at least 3 genres', default=['Comedy', 'Children', 'Animation'], max_selections=5)

user_gender = st.selectbox('Gender (optional)', ['M', 'F'])
user_age = st.number_input('Age (optional)', min_value=1, max_value=99, value=25, step=1)
user_occupation = st.selectbox('Job (optional)', occupation)
user_genres = st.multiselect('Favourite genres (optional)', genre)
# Get recommendations button
recommend = st.button('Get Recommendations')

# create the user dict
user = {
'top_k': top_k,
'id': user_id,
'age': user_age,
'gender': user_gender,
'occupation': user_occupation,
'genres': user_genres
'id': user_id if not new_user else 9000,
'age': user_age if new_user else None,
'gender': user_gender if new_user else None,
'occupation': user_occupation if new_user else None,
'genres': user_genres if new_user else None
}

# Get recommendations
if st.button('Get Recommendations'):
pred_movies = Utils.pipeline(
if recommend and 5 >= len(user_genres) >= 3:
pred_movies, top_n_genres = Utils.pipeline(
request=user,
model=model_exp if model_type == 'Explicit' else model_imp,
users=users_exp if model_type == 'Explicit' else users_imp,
Expand All @@ -66,6 +79,42 @@
)

# Display recommendations
st.write(f'Top {top_k} recommendations for user {user_id}:')
st.write(pred_movies, unsafe_allow_html=True)
st.write(f'Top {top_k} recommendations for user {user_id}:' if not new_user else f'Top {top_k} recommendations for new user:')
# st.write(pred_movies, unsafe_allow_html=True)
if not new_user:
st.write(f'Top genres user with ID {user_id} like: {", ".join(top_n_genres)}')

pred = 'rating' if model_type == 'Explicit' else 'score'

html = """<div class="card-container">"""
for i, movie in enumerate(pred_movies):
# create the movie card
html += f"""<div class="card">
<h5 class="card-title">{i + 1}</h5>
<p class="card-text">Title: <b style="font-size: 1.2em;">{movie['title']}</b></p>
<p class="card-text">Genres: {movie['genre']}</p>
<p class="card-text">Predicted {pred}: {movie['predicted_score'] if model_type == 'Implicit' else movie['predicted_rating']}</p>
</div>"""

st.markdown(
html + '</div>',
unsafe_allow_html=True
)

elif recommend and len(user_genres) < 3:
st.write('Please select 3 to 5 genres')

# Fixed footer
st.markdown(
css,
unsafe_allow_html=True
)

st.markdown(
"""
<div class="footer">
Made with ❤️ by <a href="https://www.linkedin.com/in/omar-younis-3b57a8230">Omar Younis</a>
</div>
""",
unsafe_allow_html=True
)

0 comments on commit 6f44cab

Please sign in to comment.