-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathChapter.py
30 lines (23 loc) · 978 Bytes
/
Chapter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import scrapy
from scrapy.crawler import CrawlerProcess
class ChaptersSpider(scrapy.Spider):
name = 'chapters'
def start_requests(self):
url = "https://www.datacamp.com/search"
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
c_link = response.css("div.dc-global-search-result__content>a::attr(href)").getall()
for link in c_link:
yield response.follow(url=link, callback=self.parse1)
def parse1(self, response):
c_titles = response.css("h1.header-hero__title::text").extract()
ch_titles = set(response.css("h4.chapter__title::text").extract())
with open("Chapters.txt", "a") as f:
for title in c_titles:
f.write(title + ':\n')
for chapter in ch_titles:
f.write(chapter)
f.write('\n\n')
process = CrawlerProcess()
process.crawl(ChaptersSpider)
process.start()