-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
101 lines (77 loc) · 3.6 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import pandas as pd
pd.set_option('display.float_format', '{:.2f}'.format)
from functions import *
import streamlit as st
import plotly.express as px
st.set_page_config(layout="wide",page_title="DS&ML SIG Houston ", page_icon="🖖")
header = st.container()
with header:
st.title("Primary Rock Types Cluster Dashboard")
st.subheader("Data Analysis for 1 well in the Williston Basin")
@st.cache
def get_data():
las = lasio.read("well_log.las")
df = las.df()
df_tc = df.copy()
df_tc = df_tc[['GR','RHOZ','NPHI']]
df_tc = df_tc[df_tc['RHOZ'].between(1.8, 3.2)]
df_tc = df_tc[df_tc.index > 2000]
df_tc.dropna(inplace=True)
return df_tc
d = get_data()
#Side Bar
col1, col2 = st.columns(2)
with col1:
st.subheader("Select the limits for your Well Data")
basin = st.selectbox('Select a Formation', ('None - User Selection','Bakken Petroleum System'))
if basin == 'Bakken Petroleum System':
min = 9208
max = 9267
st.warning(f"You have selected a particular formation located between {min} ft. and {max} ft.")
else:
min = st.number_input('Select the minimum depth',2000,10000,value=2000)
max = st.number_input('Select the maximum depth',2000,10752,value=10752)
data = d.copy()
data = data.loc[min:max, :]
df_scale = scale_data(data)
st.subheader("Clustering")
al = st.selectbox('Select an Unsupervised Learning Model',
['K-means','Gaussian Mixture',
'Agglomerative Clustering','MeanShift'])
if al == 'K-means' or al == 'Gaussian Mixture' or al == 'Agglomerative Clustering':
k=st.slider('Select number of Clusters',2,10)
cluster_model = creating_model(df_scale, data, al, k)[0]
ss = creating_model(df_scale, data, al, k)[1]
st.info(f"The silhouette score is {round(ss,2)}")
else:
q = st.slider('Select a Quantile', min_value=0.1,
max_value=1.0, value=0.4, step=0.1, format="%.1f")
st.info(f'MeanShift uses the Quantile to automatically detect the bandwith')
if creating_model_ms(df_scale,al,q,data)[1] == 1 or creating_model_ms(df_scale,al,q,data)[1] > 10:
st.error(f"The value setup in the quantile variable yields a cluster number "
f" of {creating_model_ms(df_scale,al,q,data)[1]}, you need a minimum of 2"
f" clusters and no more than 10 for this analysis - Please select another quantile value")
st.stop()
cluster_model = creating_model_ms(df_scale, al, q, data)[0]
with col2:
st.subheader(f"3D Cluster Plot for {al} algorithm")
if al != "MeanShift":
st.plotly_chart(plot_model(cluster_model,k))
else:
st.plotly_chart(plot_model(cluster_model,creating_model_ms(df_scale,al,q,data)[1]))
with col1:
st.info(f'The number of Clusters for the {al} algorithm is {creating_model_ms(df_scale,al,q,data)[1]} for'
f' a {q} quantile')
with col1:
st.subheader(f"Log plot with depths between {min} ft - {max} ft")
st.pyplot(make_plot(cluster_model,min, max))
with col2:
st.subheader("2D Cluster Plot")
if al != "MeanShift":
st.plotly_chart(Plot_2D(cluster_model, k)[0])
st.plotly_chart(Plot_2D(cluster_model, k)[1])
st.plotly_chart(Plot_2D(cluster_model, k)[2])
else:
st.plotly_chart(Plot_2D(cluster_model,creating_model_ms(df_scale,al,q,data)[1])[0])
st.plotly_chart(Plot_2D(cluster_model, creating_model_ms(df_scale,al,q,data)[1])[1])
st.plotly_chart(Plot_2D(cluster_model, creating_model_ms(df_scale,al,q,data)[1])[2])