Skip to content

Commit f3826ed

Browse files
committed
refactored out to function to allow multipages (inspirezonetech#4)
1 parent 341c022 commit f3826ed

File tree

1 file changed

+57
-16
lines changed

1 file changed

+57
-16
lines changed

job-search-web-scraping.py

+57-16
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,76 @@
22
from selenium.webdriver.common.keys import Keys
33

44

5-
def indeed_job_search():
6-
7-
PATH_TO_DRIVER = './geckodriver'
5+
INDEED_URL = "https://www.indeed.com/worldwide"
6+
PATH_TO_DRIVER = "./geckodriver"
7+
WAIT_TIME = 5
8+
PAGES = 10
89

9-
browser = webdriver.Firefox(executable_path=PATH_TO_DRIVER)
1010

11-
browser.get('https://www.indeed.com/worldwide')
11+
def initial_search(search_term, driver_path):
12+
browser = webdriver.Firefox(executable_path=driver_path)
1213

13-
browser.implicitly_wait(5)
14+
browser.get(INDEED_URL)
1415

15-
search_bar = browser.find_element_by_name('q')
16-
search_bar.send_keys('machine learning')
16+
browser.implicitly_wait(WAIT_TIME)
17+
18+
search_bar = browser.find_element_by_name("q")
19+
search_bar.send_keys(search_term)
1720
search_bar.send_keys(Keys.ENTER)
1821

19-
browser.implicitly_wait(5)
22+
browser.implicitly_wait(WAIT_TIME)
23+
return browser
2024

21-
search_results = browser.find_elements_by_xpath('//h2/a')
2225

23-
file = open("job_search.txt", 'a')
26+
def initialize_file(search_term):
27+
file = open(f"job_search_{search_term.replace(' ','_')}.txt", "a")
2428
file.write("\n")
29+
return file
30+
31+
32+
def write_job(job_element, file):
33+
job_title = job_element.text
34+
job_link = job_element.get_attribute("href")
35+
36+
file.write("%s | link: %s \n" % (job_title, job_link))
37+
2538

26-
for job_element in search_results:
39+
def get_jobs(browser):
40+
return browser.find_elements_by_xpath("//h2/a")
2741

28-
job_title = job_element.text
29-
job_link = job_element.get_attribute('href')
3042

31-
file.write("%s | link: %s \n" %(job_title, job_link))
43+
def close_popup_if_present(browser):
44+
try:
45+
popup_cross = browser.find_element_by_class_name("popover-x-button-close")
46+
popup_cross.click()
47+
except:
48+
pass
3249

50+
51+
def clean_up(browser, file):
3352
browser.close()
53+
file.close()
54+
55+
56+
def indeed_job_search(search_term, pages=10, driver_path="./geckodriver"):
57+
browser = initial_search(search_term, driver_path)
58+
file = initialize_file(search_term)
59+
page_number = 1
60+
while True:
61+
search_results = get_jobs(browser)
62+
[write_job(job_element, file) for job_element in search_results]
63+
try:
64+
next_button = browser.find_element_by_xpath("//a[@aria-label='Next']")
65+
if page_number == pages:
66+
clean_up(browser, file)
67+
next_button.click()
68+
page_number += 1
69+
browser.implicitly_wait(WAIT_TIME)
70+
close_popup_if_present(browser)
71+
except Exception:
72+
clean_up(browser, file)
73+
exit
74+
3475

3576
if __name__ == "__main__":
36-
indeed_job_search()
77+
indeed_job_search("machine learning", PAGES, PATH_TO_DRIVER)

0 commit comments

Comments
 (0)