import requests,os,time,sys,re import urllib.request import requests from bs4 import BeautifulSoup import os from scrapy.selector import Selector class wangyiyun(): def __init__(self): self.headers = { ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36‘, ‘Referer‘: ‘http://music.163.com/‘} self.main_url=‘http://music.163.com/‘ self.session = requests.Session() self.session.headers=self.headers self.song_list={} def get_songurls(self,playlist,long): ‘‘‘进入所选歌单页面,得出歌单里每首歌各自的ID 形式就是“song?id=64006"‘‘‘ url=self.main_url+‘playlist?id=%d‘% playlist re= self.session.get(url) #直接用session进入网页 sel=Selector(text=re.text) #用scrapy的Selector,懒得用BS4了 songurls=sel.xpath(‘//ul[@class="f-hide"]/li/a/@href‘).extract() songurls = songurls[:long] # 这里我只选取了列表的前long个元素 return songurls #所有歌曲组成的list ##[‘/song?id=64006‘, ‘/song?id=63959‘, ‘/song?id=25642714‘, ‘/song?id=63914‘, ‘/song?id=4878122‘, ‘/song?id=63650‘] def get_songinfo(self,songurl): ‘‘‘根据songid进入每首歌信息的网址,得到歌曲的信息 return:‘64006‘,‘陈小春-失恋王‘‘‘ url=self.main_url+songurl re=self.session.get(url) sel=Selector(text=re.text) song_id = url.split(‘=‘)[1] song_name = sel.xpath("//em[@class=‘f-ff2‘]/text()").extract_first() singer= ‘&‘.join(sel.xpath("//p[@class=‘des s-fc4‘]/span/a/text()").extract()) songname=singer +‘ - ‘+ song_name b=[‘\\‘,‘/‘,‘:‘,‘*‘,‘?‘,‘"‘,‘<‘,‘>‘,‘"‘] for x in b: if x in songname: songname=songname.replace(x,‘‘) return str(song_id),songname def download_song(self, i, songurl, dir_path): ‘‘‘根据歌曲url,下载mp3文件‘‘‘ try: song_id, songname = self.get_songinfo(songurl) # 根据歌曲url得出ID、歌名 song_url = ‘http://music.163.com/song/media/outer/url?id=%s.mp3‘%song_id path = dir_path + os.sep + songname + ‘.mp3‘ # 文件路径 if os.path.exists(path): print(str(i), songname+‘ exists!‘,sep=‘. ‘) else: song=requests.get(song_url,headers = self.headers) with open(path, ‘wb‘) as f: # 下载文件 print(str(i),songname+‘.mp3‘,sep=‘. ‘) f.write(song.content) self.song_list[i] = path except Exception as error_info: print(‘Error! ======= ‘ + songname) pass def fileSize(self): """ 判断下载下来的文件的大小,如果太小,就删掉。 """ for num in self.song_list: file = self.song_list[num] fsize = os.path.getsize(file) f_kb = fsize/float(1024) if f_kb < 1024: os.unlink(file) print(str(num),file.split(‘\\‘)[-1] + ‘ Small than 1M. Has been deleted.‘,sep=‘. ‘) def work(self, playlist,long,path): songurls = self.get_songurls(playlist,long) # 输入歌单编号,得到歌单所有歌曲的url i = 1 for songurl in songurls: self.download_song(i,songurl, path) # 下载歌曲 i += 1 self.fileSize() if __name__ == ‘__main__‘: long = int(input("How many songs do you want to download?\n")) path = r‘D:\备份\新建文件夹‘ d = wangyiyun() d.work(400931460,long,path) # 400931460是网页版歌单网址中 id 后面的参数
以上内容来自网络,如有侵权,请联系删除。
原文:https://www.cnblogs.com/wztshine/p/12365231.html