针对无限制滚动网站的采集

模拟滚动

from selenium import webdriver
from scrapy.selector import Selector
from selenium.webdriver.support.ui import WebDriverWait
browser = webdriver.Chrome() browser.get(url)
wait = WebDriverWait(browser, 10)
wait.until(lambda dr: dr.find_element_by_class_name('project-detail').is_displayed())

# 一直滚动到最底部
js1 = 'return document.body.scrollHeight'
js2 = 'window.scrollTo(0, document.body.scrollHeight)'
old_scroll_height = 0
while browser.execute_script(js1) >= old_scroll_height:
    old_scroll_height = browser.execute_script(js1)
    browser.execute_script(js2) 
    time.sleep(1)

接下里就可以进行采集了