抓取QQ音乐歌单
1.通过分析歌曲下载路径来分析所需参数: 通过比较, 得出其中歌曲下载url与参数vkey是可变的,
歌曲下载url中可变得值是请求歌单返回的歌曲数据的strMediaMid参数, 而vkey是通过请求歌单返回的歌曲数据的songmid参数再次请求另一个url返回vkey值
2.通过分析请求歌单url来分析所需参数: dissid
dissid为请求diss的返回数据中的dissid参数
import os
import re
import requests
from urllib.parse import urlencode
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'origin': 'https://y.qq.com',
'referer': 'https://y.qq.com/portal/playlist.html'
}
def fetch_url(url):
try:
r = requests.get(url, headers=headers)
if r.status_code in [200, 201]:
return r.json()
except Exception as e:
print(e)
def down_song(path, strMediaMid, vkey):
params = {
'guid': '5300386295',
'vkey': vkey,
'uin': '0',
'fromtag': '66'
}
url = 'http://222.73.132.154/amobile.music.tc.qq.com/C400{}.m4a?'.format(strMediaMid)
url += urlencode(params)
r = requests.get(url, headers=headers)
if r.status_code in [200, 201]:
with open(path, 'wb') as f:
f.write(r.content)
def get_vkey(songmid):
url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?'
params = {
'-': 'getplaysongvkey7256617694143965',
'g_tk': '5381',
'loginUin': '0',
'hostUin': '0',
'format': 'json',
'inCharset': 'utf8',
'outCharset': 'utf-8',
'notice': '0',
'platform': 'yqq.json',
'needNewCode': '0',
'data': '{"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"5300386295","songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,"platform":"20"}},"comm":{"uin":0,"format":"json","ct":24,"cv":0}}' % songmid
}
url += urlencode(params)
result = fetch_url(url)
vkey = result['req_0']['data']['midurlinfo'][0]['vkey']
return vkey
def get_song_info(disstid):
url = 'https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?'
params = {
'type': '1',
'json': '1',
'utf8': '1',
'onlysong': '0',
'disstid': disstid,
'g_tk': '5381',
'loginUin': '0',
'hostUin': '0',
'format': 'json',
'inCharset': 'utf8',
'outCharset': 'utf-8',
'notice': '0',
'platform': 'yqq.json',
'needNewCode': '0',
}
url += urlencode(params)
result = fetch_url(url)
songlist = result['cdlist'][0]['songlist']
for song in songlist:
strMediaMid = song['strMediaMid']
songMid = song['songmid']
songname = song['songname']
yield strMediaMid, songMid, songname
def get_dist_info(page):
url = 'https://c.y.qq.com/splcloud/fcgi-bin/fcg_get_diss_by_tag.fcg?'
params = {
'picmid': '1',
'rnd': '0.15993662911508766',
'g_tk': '5381',
'loginUin': '0',
'hostUin': '0',
'format': 'json',
'inCharset': 'utf8',
'outCharset': 'utf-8',
'notice': '0',
'platform': 'yqq.json',
'needNewCode': '0',
'categoryId': '10000000',
'sortId': '5',
'sin': int(page)*30-30,
'ein': int(page)*30-1,
}
url += urlencode(params)
result = fetch_url(url)
disslist = result['data']['list']
for diss in disslist:
yield diss['dissid'], diss['dissname']
def main(page):
for item in get_dist_info(page):
dissid, dissname = item
for item in get_song_info(dissid):
strMediaMid, songMid, songname = item
vkey = get_vkey(songMid)
pattern = re.compile(r'[\\/::*?"<>|\r\n]+')
songname = re.sub(pattern, " ", songname)
dissname = re.sub(pattern, " ", dissname)
if not os.path.exists('d://data/{}/'.format(dissname)):
os.mkdir('d://data/{}/'.format(dissname))
path = 'd://data/{0}/{1}.m4a'.format(dissname, songname)
print("正在下载:{}".format(songname))
down_song(path, strMediaMid, vkey)
print("下载完成:{}".format(songname))
if __name__ == '__main__':
page = 1
main(page)
手机扫一扫
移动阅读更方便
你可能感兴趣的文章