1、需要设置代理
2、滚屏加载更多歌曲
3、解析歌曲完整下载路径
import os import time import json import requests from selenium import webdriver from scrapy.selector import Selector from selenium.webdriver.support.ui import WebDriverWait proxies = { "http": "http://192.168.1.88:1088", "https": "http://192.168.1.88:1088", } def music_download(url): file_name = url.split('?')[0].split('/')[-1] r = requests.get(url, stream=True) with open(f"music/{file_name}", 'wb') as f: f.write(r.content) if os.path.exists(file_name) and os.path.getsize(file_name) > 1*1000*1000: print(f"{file_name} download success, file size: {os.path.getsize(file_name)/1000/1000}M") else: print (f"{file_name} download fail.") def music_index(url): url_desc_api = f"https://api.soundcloud.com/resolve?url={url}&client_id=LvWovRaJZlWCHql0bISuum8Bd2KX79mb" r = requests.get(url_desc_api, proxies=proxies) json_r = json.loads(r.text) sound_id = json_r['id'] if not sound_id is None: url_download_api = f"https://api.soundcloud.com/i1/tracks/{sound_id}/streams?client_id=LvWovRaJZlWCHql0bISuum8Bd2KX79mb" sound_r = requests.get(url_download_api, proxies=proxies) json_sound_r = json.loads(sound_r.text) print (f"当前任务ID: {sound_id}") try: music_download_url = json_sound_r['http_mp3_128_url'] if music_download_url: music_download(music_download_url) except: pass def soundcloud_index(): url = "https://soundcloud.com/beyond-synth" chromeOptions = webdriver.ChromeOptions() # 加入代理功能 chromeOptions.add_argument(f"--proxy-server=http://192.168.1.88:1088") browser = webdriver.Chrome(chrome_options = chromeOptions) browser.get(url) # 等待滚屏到最后 js1 = 'return document.body.scrollHeight' js2 = 'window.scrollTo(0, document.body.scrollHeight)' old_scroll_height = 0 while browser.execute_script(js1) >= old_scroll_height: old_scroll_height = browser.execute_script(js1) browser.execute_script(js2) time.sleep(1) # 开始处理页面 content = browser.find_elements_by_class_name('sound__coverArt') count = 1 for c in content: single_url = c.get_attribute('href') music_index(single_url) time.sleep(1) print (f"当前第 {count} 条") count += 1 # 结束任务 browser.quit() if __name__ == "__main__": soundcloud_index()
原创文章,作者:Tina,如若转载,请注明出处:https://python.01314.cn/201810513.html