码迷,mamicode.com
首页 > 编程语言 > 详细

·「python爬虫入门」网易云音乐下载

时间:2015-06-02 17:36:03      阅读:382      评论:0      收藏:0      [点我收藏+]

标签:

2015-6-2

今天把昨天Git上看的一个下载网易云音乐歌单歌曲的脚本尝试看懂并修改

Git地址:https://github.com/keli/netease-music

技术分享
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import urllib2
import json
import os
import sys
import md5
import string
import random

# Set cookie
cookie_opener = urllib2.build_opener()
cookie_opener.addheaders.append((Cookie, appver=2.0.2))
cookie_opener.addheaders.append((Referer, http://music.163.com))
urllib2.install_opener(cookie_opener)

def encrypted_id(id):
    byte1 = bytearray(3go8&$8*3*3h0k(2)2)
    byte2 = bytearray(id)
    byte1_len = len(byte1)
    for i in xrange(len(byte2)):
        byte2[i] = byte2[i]^byte1[i%byte1_len]
    m = md5.new()
    m.update(byte2)
    result = m.digest().encode(base64)[:-1]
    result = result.replace(/, _)
    result = result.replace(+, -)
    return result

def get_playlist(playlist_id):
    url = http://music.163.com/api/playlist/detail?id=%s % playlist_id
    resp = urllib2.urlopen(url)
    data = json.loads(resp.read())
    return data[result]

def save_track(track, folder, position):
    name = track[hMusic][name]

    if position < 10:
        pos = "0%d" % position
    else:
        pos = "%d" % position

    #fname = pos + ‘ ‘ + name + track[‘hMusic‘][‘extension‘]
    fname = name + . + track[hMusic][extension]
    fname = string.replace(fname, /, _)
    fpath = os.path.normpath(os.path.join(folder, fname))

    if os.path.exists(fpath):
        return

    print "Downloading", fpath, "..."

    dfsId = str(track[hMusic][dfsId])
    url = http://m%d.music.126.net/%s/%s.%s % (random.randrange(1, 3), encrypted_id(dfsId), dfsId, track[hMusic][extension])
    resp = urllib2.urlopen(track[mp3Url])
    data = resp.read()
    resp.close()

    with open(fpath, wb) as mp3:
      mp3.write(data)

def download_playlist(playlist_id, folder=.):
    playlist = get_playlist(playlist_id)

    name = playlist[name]
    folder = os.path.join(folder, name)

    if not os.path.exists(folder):
        os.makedirs(folder)

    for idx, track in enumerate(playlist[tracks]):
        save_track(track, folder, idx+1)

if __name__ == __main__:
    if len(sys.argv) < 2:
        print "Usage: %s <playlist id>" % sys.argv[0] 
        sys.exit(1)
    download_playlist(sys.argv[1])
View Code

这边是对cookie的处理,addheaders的方法之前没有看到过,help查询居然也没有查到,但是有用

cookie_opener = urllib2.build_opener()
cookie_opener.addheaders.append((Cookie, appver=2.0.2))
cookie_opener.addheaders.append((Referer, http://music.163.com))
urllib2.install_opener(cookie_opener)

这一段其实是没有用的,但是挺好奇这段代码里面函数的作用,过段时间再去学一下

def encrypted_id(id):
    byte1 = bytearray(3go8&$8*3*3h0k(2)2)
    byte2 = bytearray(id)
    byte1_len = len(byte1)
    for i in xrange(len(byte2)):
        byte2[i] = byte2[i]^byte1[i%byte1_len]
    m = md5.new()
    m.update(byte2)
    result = m.digest().encode(base64)[:-1]
    result = result.replace(/, _)
    result = result.replace(+, -)
    return result

 

下面这段代码是发挥主要作用的,但是这个脚本是原作者三个月前写的,网易云音乐应该有一些变化

def get_playlist(playlist_id):
    url = http://music.163.com/api/playlist/detail?id=%s % playlist_id
    resp = urllib2.urlopen(url)
    data = json.loads(resp.read())
    return data[result]

def save_track(track, folder, position):
    name = track[hMusic][name]

    if position < 10:
        pos = "0%d" % position
    else:
        pos = "%d" % position

    #fname = pos + ‘ ‘ + name + track[‘hMusic‘][‘extension‘]
    fname = name + . + track[hMusic][extension]
    fname = string.replace(fname, /, _)
    fpath = os.path.normpath(os.path.join(folder, fname))

    if os.path.exists(fpath):
        return

    print "Downloading", fpath, "..."

    dfsId = str(track[hMusic][dfsId])
    url = http://m%d.music.126.net/%s/%s.%s % (random.randrange(1, 3), encrypted_id(dfsId), dfsId, track[hMusic][extension])
    resp = urllib2.urlopen(track[mp3Url])
    data = resp.read()
    resp.close()

    with open(fpath, wb) as mp3:
      mp3.write(data)

def download_playlist(playlist_id, folder=.):
    playlist = get_playlist(playlist_id)

    name = playlist[name]
    folder = os.path.join(folder, name)

    if not os.path.exists(folder):
        os.makedirs(folder)

    for idx, track in enumerate(playlist[tracks]):
        save_track(track, folder, idx+1)

首先是JSON,我之前并没有学过JSON,XML也只是昨天看了一小会儿

http://music.163.com/api/playlist/detail?id=4566307 打开准备下载的歌单

用Firebug看Json挺清晰,结构都很清楚,chrome就感觉有点一堆凑一起

看代码也知道歌曲的链接在tracks里面

我按着原作者的代码调试 发现一直卡在

name = track[hMusic][name]

后来调了半天,才去JSON文件里看了,发现tracks里面不是所有歌曲都有 hMusic 这个 属性

后来直接改成  name = track[‘name‘]

而且再仔细看JSON 发现代码有好几处不对的地方 最后改成了这样

ef get_playlist(playlist_id):
    url = http://music.163.com/api/playlist/detail?id=%s % playlist_id
    resp = urllib2.urlopen(url)
    data = json.loads(resp.read())
    print data[result][name]
    return data[result]

def save_track(track, folder, position):
    name = track[name] #name = track[‘hMusic‘][‘name‘]
    print name
    if position < 10:
        pos = "0%d" % position
    else:
        pos = "%d" % position
   
    #fname = pos + ‘ ‘ + name + track[‘hMusic‘][‘extension‘]
    fname = name + str(position) + .mp3 #fname = name + str(position) + ‘.‘ + track[‘hMusic‘][‘extension‘]
    fname = string.replace(fname, /, _)
    fpath = os.path.normpath(os.path.join(folder, fname))

    if os.path.exists(fpath):
        return
    print "Downloading", fpath, "..."

    # dfsId = str(track[‘hMusic‘][‘dfsId‘])
    # url = ‘http:/7m%d.music.126.net/%s/%s.%s‘ % (random.randrange(1, 3), encrypted_id(dfsId), dfsId, track[‘hMusic‘][‘extension‘])
    try:
        resp = urllib2.urlopen(track[mp3Url], timeout = 5)
        data = resp.read()
        resp.close()
    except urllib2.URLError as e:
        print type(e)    #not catch
        pass
    except socket.timeout as e:
        print type(e)    #catched
        pass  
    else:
        with open(fpath, wb) as mp3:
            mp3.write(data)


def download_playlist(playlist_id, folder=.):
    playlist = get_playlist(playlist_id)

    name = playlist[name]
    folder = os.path.join(folder, name)

    if not os.path.exists(folder):
        os.makedirs(folder)

    for idx, track in enumerate(playlist[tracks]):
        print begin save
        save_track(track, folder, idx+1)

注释掉的部分就是更改的地方

在下载歌曲的时候,经常会卡在一个地方,然后知道urlopen可以设置timeout 就是超时时间,

然后我的想法是超时的话就报错然后继续下载下一首

但是不是很熟悉try except 试了好久

 试过好几个版本

 try:
        resp = urllib2.urlopen(track[mp3Url], timeout = 5)
        data = resp.read()
        resp.close()
    except urllib2.URLError as e:
        print type(e)    #not catch
        pass
    except socket.timeout as e:
        print type(e)    #catched
        pass  

with open(fpath, wb) as mp3:
    mp3.write(data)
 try:
        resp = urllib2.urlopen(track[mp3Url], timeout = 5)
        data = resp.read()
        resp.close()
    except urllib2.URLError as e:
        print type(e)    #not catch
    except socket.timeout as e:
        print type(e)    #catched
    else:
        with open(fpath, wb) as mp3:
            mp3.write(data)
 try:
        resp = urllib2.urlopen(track[mp3Url], timeout = 5)
    except urllib2.URLError as e:
        print type(e)    #not catch
        pass
    except socket.timeout as e:
        print type(e)    #catched
        pass  
data = resp.read()
resp.close()
with open(fpath, wb) as mp3:
    mp3.write(data)

总之都是代码报错,报错了也没有继续运行下去

后来改成这样

    try:
        resp = urllib2.urlopen(track[mp3Url], timeout = 5)
        data = resp.read()
        resp.close()
    except urllib2.URLError as e:
        print type(e)    #not catch
        pass
    except socket.timeout as e:
        print type(e)    #catched
        pass  
    else:
        with open(fpath, wb) as mp3:
            mp3.write(data)

就行了=。=, 还是得再好好看看异常处理那一块

 

总之这就是一下午的学习,虽然下歌什么的对我并没有什么卵用

 

·「python爬虫入门」网易云音乐下载

标签:

原文地址:http://www.cnblogs.com/weicon9/p/4546893.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!