-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
99 lines (79 loc) · 2.99 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import requests
import os
from bs4 import BeautifulSoup
def authHeaders(cookieBlob):
return {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0",
"Cookie": cookieBlob,
}
# 181, "CS"
def getEvalURLs(auth, termID, collegeID):
try:
os.remove("data.csv")
except:
print("didn't remove data.csv")
with open("data.csv", "a") as x:
x.write("courseID,teacherID,teacherName,commentsURL\n")
go = True
pge = 1
while go:
call = getEvalClassesInternal(auth, termID, collegeID, pge)
if call == None:
go = False
pge += 1
def getEvalClassesInternal(auth, termID, collegeID, page):
data = {
"page": page,
"rpp": 15,
"termIds": [termID],
"schoolCodes": [collegeID],
"excludeTA": False,
"sort": None
}
print(data)
r = requests.post("https://www.applyweb.com/eval/new/reportbrowser/evaluatedCourses", json=data, headers=auth)
if "json" not in r.headers["Content-type"]:
raise ValueError("json not in http")
if r.status_code != 200:
raise ValueError("non-200 status")
api = r.json()
if len(api["data"]) == 0:
return None
for course in api["data"]:
print("course")
id = course["courseId"]
teacherID = course["instructorId"]
teacher = course["instructorFirstName"] + "-" + course["instructorLastName"]
url = f"https://www.applyweb.com/eval/new/showreport?c={str(id)}&i={str(teacherID)}&t={termID}&r=9&d=true"
with open("data.csv", "a") as d:
d.write(f"{str(id)},{str(teacherID)},{str(teacher)},{str(url)}\n")
return 'ok'
def processEachComments(csvFile, auth):
# assumes CSV file is in the format
# courseID,teacherID,teacherName,commentsURL
with open(csvFile, "r") as file:
for line in file:
l = line.strip()
if "courseID" not in l:
data = l.split(",")
comments = scrapeCommentsPage(data[3], auth)
def scrapeCommentsPage(url, auth):
print("scraping url " + url)
auth["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
r = requests.get(url, headers=auth)
soup = BeautifulSoup(r.text, "html.parser")
# " Q: What were the strengths of this course and/or this instructor? "
good_comments = soup.find("div", {"id": "11_3_402"})
good_comments_table = good_comments.find("table")
# " Q: What could the instructor do to make this course better? "
bad_comments = soup.find("div", {"id": "11_3_403"})
bad_comments_table = bad_comments.find("table")
good = []
for td in good_comments_table.find_all("td"):
if td.find("a") != None:
print(td.text.strip())
bad = []
for td in bad_comments_table.find_all("td"):
if td.find("a") != None:
print(td.text.strip())
return