自动下载百度Mp3之Python脚本
[code]#!/usr/bin/python#coding=utf8
"""
getsong is a tool for downloading mp3 automatically, with getsong you can do-
wnload mp3 in a flash
Usage:
Download mp3 file which matches given artist and/or title.
-h --help show this help message.
-d --songsdir dir of your songs repo.
-1 --100 download Baidu Top100 new songs.
-5 --500 download Biadu Top500 new songs.
-a --artist songer
-t --title song name
-v --version show version info
"""复制内容到剪贴板代码:import re
import os
import sys
import time
import glob
import string
import socket
import getopt
import urllib
import urllib2
import threading
from sgmllib import SGMLParser
#############################################################################
#
# self-defined exception classes
#
#############################################################################
class ConnectionError(Exception): pass
class URLUnreachable(Exception):pass
class CanotDownload(Exception):pass
#############################################################################
#
# multiple threads download module starts here
#
#############################################################################
class HttpGetThread(threading.Thread):
def __init__(self, name, url, filename, range=0):
threading.Thread.__init__(self, name=name)
self.url = url
self.filename = filename
self.range = range
self.totalLength = range[1] - range[0] 1
try:
self.downloaded = os.path.getsize(self.filename)
except OSError:
self.downloaded = 0
self.percent = self.downloaded/float(self.totalLength)*100
self.headerrange = (self.range[0] self.downloaded, self.range[1])
self.bufferSize = 8192
def run(self):
try:
self.downloaded = os.path.getsize(self.filename)
except OSError:
self.downloaded = 0
self.percent = self.downloaded/float(self.totalLength)*100
#self.headerrange = (self.range[0] self.downloaded, self.range[1])
self.bufferSize = 8192
#request = urllib2.Request(self.url)
#request.add_header('Range', 'bytes=%d-%d' %self.headerrange)
downloadAll = False
retries = 1
while not downloadAll:
if retries > 10:
break
try:
self.headerrange = (self.range[0] self.downloaded, self.range[1])
request = urllib2.Request(self.url)
request.add_header('Range', 'bytes=%d-%d' %self.headerrange)
conn = urllib2.urlopen(request)
startTime = time.time()
data = conn.read(self.bufferSize)
while data:
f = open(self.filename, 'ab')
f.write(data)
f.close()
self.time = int(time.time() - startTime)
self.downloaded = len(data)
self.percent = self.downloaded/float(self.totalLength) *100
data = conn.read(self.bufferSize)
downloadAll = True
except Exception, err:
retries = 1
time.sleep(1)
continue
def Split(size,blocks):
ranges = []
blocksize = size / blocks
for i in xrange(blocks-1):
ranges.append(( i*blocksize, i*blocksize blocksize-1))
ranges.append(( blocksize*(blocks-1), size-1))
return ranges
def GetHttpFileSize(url):
length = 0
try:
conn = urllib.urlopen(url)
headers = conn.info().headers
for header in headers:
if header.find('Length') != -1:
length = header.split(':')[-1].strip()
length = int(length)
except Exception, err:
pass
return length
def hasLive(ts):
for t in ts:
if t.isAlive():
return True
return False
def MyHttpGet(url, utput=None, connections=4):
"""
arguments:
url, in GBK encoding
output, default encoding, do no convertion
connections, integer
"""
length = GetHttpFileSize(url)
mb = length/1024/1024.0
if length == 0:
raise URLUnreachable
blocks = connections
if output:
filename = output
else:
utput = url.split('/')[-1]
ranges = Split(length, blocks)
names = ["%s_%d" %(filename,i) for i in xrange(blocks)]
ts = []
for i in xrange(blocks):
t = HttpGetThread(i, url, names[i], ranges[i])
t.setDaemon(True)
t.start()
ts.append(t)
live = hasLive(ts)
startSize = sum([t.downloaded for t in ts])
startTime = time.time()
etime = 0
while live:
try:
etime = time.time() - startTime
d = sum([t.downloaded for t in ts])/float(length)*100
downloadedThistime = sum([t.downloaded for t in ts])-startSize
try:
rate = downloadedThistime / float(etime)/1024
except:
rate = 100.0
progressStr = u'\rFilesize: %d(%.2fM) Downloaded: %.2f%% Avg rate: %.1fKB/s' %(length, mb, d, rate)
sys.stdout.write(progressStr)
sys.stdout.flush()
#sys.stdout.write('\b'*(len(progressStr) 1))
live = hasLive(ts)
time.sleep(0.2)
except KeyboardInterrupt:
print "Exit..."
for n in names:
try:
os.remove(n)
except:
pass
sys.exit(1)
print u'耗时: %d:%d, 平均速度:%.2fKB/s' %(int(etime)/60, int(etime)`,rate)
f = open(filename, 'wb')
for n in names:
f.write(open(n,'rb').read())
try:
os.remove(n)
except:
pass
f.close()
#############################################################################
#
# get artist-title pairs from baidu top songs list
#
#############################################################################
class SongParser(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.songs = {}
self.cursong = ''
self.insong = False
self.newsong = False
self.name = ''
def handle_data(self, text):
txt = text.strip()
if txt == '':
return
res = re.search('^(\d{1,3})\.$', txt)
if res:
rank = int(res.groups()[0])
self.cursong = rank
self.songs[rank] = ''
self.insong = True
self.name = 'artist'
else:
if self.insong:
self.songs[self.cursong] = self.songs[self.cursong] txt
if txt == ')':
self.insong = False
def GetArtistAndTitle(url):
html = urllib.urlopen(url).read()
html = html.decode('gbk', 'ignore').encode('utf8')
parser = SongParser()
parser.feed(html)
songs = parser.songs
for k, v in songs.items():
pos = v.find('(')
if pos != -1:
title = v[:pos]
artist = v[pos 1:-1]
artist = artist.replace('/', '_')
else:
title = v
artist = ''
artist = artist.decode('utf8', 'ignore')
title = title.decode('utf8', 'ignore')
info = {'artist':artist, 'title':title}
songs[k] = info
return songs
#############################################################################
#
# mp3 download module starts here
#
#############################################################################
class URLChecker(threading.Thread):
def __init__(self, fakeurl):
threading.Thread.__init__(self, name='')
self.etime = 0
self.length = 0
self.url = GetRealMp3URL(fakeurl)
def run(self):
if not self.url:
self.etime = 200000
return
socket.setdefaulttimeout(10)
try:
start = time.time()
conn = urllib2.urlopen(self.url)
end = time.time()
etime = int((end - start)*1000)
info = conn.info()
for header in info.headers:
if 'Length' in header:
length = int(header.split(':')[-1])
self.etime = etime
self.length = length
if self.length < 2*1024*1024:
self.etime = 200000
except Exception:
self.etime = 200000
self.length = 0
def GetBestUrl(urls):
cthreads = []
for url in urls:
t = URLChecker(url)
cthreads.append(t)
t.start()
live = hasLive(cthreads)
while live:
live = hasLive(cthreads)
time.sleep(0.3)
besturl = ''
setime = 100000
for t in cthreads:
if t.etime < setime:
besturl = t.url
setime = t.etime
return besturl[/code] 够长的呀,辛苦了。 自己想用python做个[url]www.xiaoyaya.com[/url]中mp3的下载软件,所以先在网上搜了一下,找了一个就发在这里了
页:
[1]