-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathonpage-seo-analysis.py
145 lines (117 loc) · 5.63 KB
/
onpage-seo-analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import streamlit as st
import requests
from bs4 import BeautifulSoup
import json
import subprocess
from urllib.parse import urlparse
# Function to check if a URL is accessible
def check_url(url):
try:
response = requests.get(url, timeout=5)
return response.status_code == 200
except requests.exceptions.RequestException:
return False
# Function to fetch the HTML content of the page
def get_html_content(url):
response = requests.get(url, timeout=5)
return response.text
# SEO Element Analysis function
def analyze_seo_elements(url):
html = get_html_content(url)
soup = BeautifulSoup(html, "lxml")
title = soup.title.string if soup.title else "No title tag found"
meta_description = soup.find("meta", attrs={"name": "description"})
meta_description_content = meta_description["content"] if meta_description else "No meta description found"
h1_tag = soup.find("h1")
h1_content = h1_tag.string.strip() if h1_tag else "No H1 tag found"
h2_tags = soup.find_all("h2")
h2_contents = [h2.get_text(strip=True) for h2 in h2_tags] if h2_tags else ["No H2 tags found"]
images_without_alt = [img["src"] for img in soup.find_all("img") if not img.get("alt")]
robots_txt = f"{urlparse(url).scheme}://{urlparse(url).hostname}/robots.txt"
sitemap_xml = f"{urlparse(url).scheme}://{urlparse(url).hostname}/sitemap.xml"
return {
"title": title,
"meta_description": meta_description_content,
"h1": h1_content,
"h2": h2_contents,
"images_without_alt": images_without_alt or ["None"],
"robots_txt": check_url(robots_txt),
"sitemap_xml": check_url(sitemap_xml)
}
# Function to analyze page performance using Lighthouse
def analyze_lighthouse(url):
try:
command = f"lighthouse {url} --quiet --output=json --output-path=stdout"
result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
return None, None, None
lighthouse_result = json.loads(result.stdout)
performance_score = lighthouse_result['categories']['performance']['score'] * 100
seo_score = lighthouse_result['categories']['seo']['score'] * 100
mobile_friendly_score = lighthouse_result.get('categories', {}).get('pwa', {}).get('score', 0) * 100
return performance_score, seo_score, mobile_friendly_score
except Exception:
return None, None, None
# Function to generate and save the SEO report
def generate_seo_report(url):
if not check_url(url):
return "Error: Unable to access the URL.", None
seo_elements = analyze_seo_elements(url)
performance_score, seo_score, mobile_friendly_score = analyze_lighthouse(url)
report = {
"Title": seo_elements['title'],
"Meta Description": seo_elements['meta_description'],
"H1": seo_elements['h1'],
"H2 Tags": ", ".join(seo_elements['h2']),
"Images without ALT": ", ".join(seo_elements['images_without_alt']),
"Robots.txt Available": "Yes" if seo_elements['robots_txt'] else "No",
"Sitemap.xml Available": "Yes" if seo_elements['sitemap_xml'] else "No",
"Performance Score": f"{performance_score}%" if performance_score else "N/A",
"SEO Score": f"{seo_score}%" if seo_score else "N/A",
"Mobile Friendly Score": f"{mobile_friendly_score}%" if mobile_friendly_score else "N/A",
}
report_filename = f"seo_report_{urlparse(url).hostname}.txt"
with open(report_filename, "w") as file:
file.write(json.dumps(report, indent=4))
return report, report_filename
# Streamlit UI code starts here
def run_streamlit_ui():
st.title("🔍 Onpage SEO Analysis Tool")
url_to_analyze = st.text_input("Enter the URL to analyze:", "")
if not url_to_analyze:
st.info("Please enter a URL to get started.")
if st.button('Analyze'):
if url_to_analyze:
st.write(f"🔎 **Analyzing SEO elements for:** {url_to_analyze}")
result, report_filename = generate_seo_report(url_to_analyze)
if isinstance(result, dict):
st.subheader("📊 SEO Analysis Results")
st.write(f"**Title Tag:** `{result['Title']}`")
st.write(f"**Meta Description:** `{result['Meta Description']}`")
st.write(f"**H1 Tag:** `{result['H1']}`")
st.write(f"**H2 Tags:** `{result['H2 Tags']}`")
st.write(f"**Images without ALT Text:** `{result['Images without ALT']}`")
st.write(f"**Robots.txt Available:** `{result['Robots.txt Available']}`")
st.write(f"**Sitemap.xml Available:** `{result['Sitemap.xml Available']}`")
# Display Lighthouse Scores in a table
st.subheader("📈 Lighthouse Scores")
scores_data = {
"Metric": ["Performance Score", "SEO Score", "Mobile Friendly Score"],
"Value": [result['Performance Score'], result['SEO Score'], result['Mobile Friendly Score']]
}
st.table(scores_data)
# Provide the option to download the SEO report file
with open(report_filename, "r") as file:
st.download_button(
label="📥 Download SEO Report",
data=file,
file_name=report_filename,
mime="text/plain"
)
else:
st.error(result)
else:
st.error("Please provide a valid URL.")
# Run the Streamlit app
if __name__ == "__main__":
run_streamlit_ui()