Attachment 5908 Details for Bug 32935

Use httplib instead of urllib2

32935_app-pipelining.py (text/x-python), 4.38 KB, created by Philipp Hahn on 2014-05-12 14:17 CEST

(hide)

Description:

Filename:

MIME Type:

Creator: Philipp Hahn

Created: 2014-05-12 14:17 CEST

Size: 4.38 KB

patch

obsolete

>#!/usr/bin/python
>
>import sys
>from time import time
>from cStringIO import StringIO
>from gzip import GzipFile
>from simplejson import loads
>from urlparse import urlsplit
>
>URL = "http://appcenter.test.software-univention.de/meta-inf/3.2/index.json.gz"
>
>
>def u2():
>	"""Using urllib2"""
>	import urllib2
>
>	compressed = urllib2.urlopen(URL, timeout=60).read()
>	stream = StringIO(compressed)
>	content = GzipFile(mode='rb', fileobj=stream).read()
>	json = loads(content)
>	for appname, appinfo in json.iteritems():
>		for appfile, appfileinfo in appinfo.iteritems():
>			url = appfileinfo['url']
>			content = urllib2.urlopen(url, timeout=60).read()
>			print 'url=%s len=%d' % (url, len(content))
>
>
>def h2():
>	"""Using httplib"""
>	from httplib import HTTPConnection
>
>	parts = urlsplit(URL)
>	if parts.port:
>		port = parts.port
>	elif parts.scheme == 'https':
>		port = 443
>	else:
>		port = 80
>	conn = HTTPConnection(parts.hostname, port, timeout=60)
>
>	# print "hostname=%s port=%d path=%s" % (parts.hostname, port, parts.path)
>	# conn.set_debuglevel(65535)
>	conn.connect()
>	headers = {"Connection:": "Keep-alive"}
>	try:
>		conn.request("GET", parts.path, headers=headers)
>		res = conn.getresponse().read()
>
>		stream = StringIO(res)
>		content = GzipFile(mode='rb', fileobj=stream).read()
>		json = loads(content)
>		for appname, appinfo in json.iteritems():
>			for appfile, appfileinfo in appinfo.iteritems():
>				url = appfileinfo['url']
>				parts = urlsplit(url)
>				conn.request("GET", parts.path, headers=headers)
>				content = conn.getresponse().read()
>				print 'url=%s len=%d' % (url, len(content))
>	finally:
>		conn.close()
>
>
>def curl1():
>	"""Using pycurl single"""
>	import pycurl
>
>	c = pycurl.Curl()
>	# c.setopt(pycurl.VERBOSE, 1)
>	c.setopt(pycurl.FOLLOWLOCATION, 1)
>	c.setopt(pycurl.MAXREDIRS, 5)
>	c.setopt(pycurl.CONNECTTIMEOUT, 5)
>	c.setopt(pycurl.TIMEOUT, 60)
>	# c.setopt(pycurl.NOSIGNAL, 1)
>
>	buf = StringIO()
>	c.setopt(pycurl.URL, URL)
>	c.setopt(pycurl.WRITEFUNCTION, buf.write)
>	c.perform()
>	buf.seek(0)
>
>	content = GzipFile(mode='rb', fileobj=buf).read()
>	json = loads(content)
>	for appname, appinfo in json.iteritems():
>		for appfile, appfileinfo in appinfo.iteritems():
>			url = appfileinfo['url']
>			buf = StringIO()
>			c.setopt(pycurl.URL, url)
>			c.setopt(pycurl.WRITEFUNCTION, buf.write)
>			c.perform()
>			buf.seek(0)
>
>			print 'url=%s len=%d' % (url, len(buf.getvalue()))
>
>	c.close()
>
>
>def curl2(max=2):
>	"""Using pycurl multi 2 threads"""
>	# /usr/share/doc/python-pycurl/examples/retriever-multi.py
>	import pycurl
>
>	m = pycurl.CurlMulti()
>	m.handles = []
>	m.setopt(pycurl.M_PIPELINING, 1)
>	try:  # cURL 7.30.0
>		m.setopt(pycurl.M_MAX_HOST_CONNECTIONS, 2)
>		m.setopt(pycurl.M_MAX_PIPELINE_LENGTH, 5)
>	except AttributeError:
>		pass
>
>	for i in range(max):
>		c = pycurl.Curl()
>		c.fp = None
>		c.setopt(pycurl.FOLLOWLOCATION, 1)
>		c.setopt(pycurl.MAXREDIRS, 5)
>		c.setopt(pycurl.CONNECTTIMEOUT, 5)
>		c.setopt(pycurl.TIMEOUT, 60)
>		# c.setopt(pycurl.NOSIGNAL, 1)
>		m.handles.append(c)
>
>	buf = StringIO()
>	c.setopt(pycurl.URL, URL)
>	c.setopt(pycurl.WRITEFUNCTION, buf.write)
>	c.perform()
>	buf.seek(0)
>	content = GzipFile(mode='rb', fileobj=buf).read()
>	json = loads(content)
>
>	queue = []
>	for appname, appinfo in json.iteritems():
>		for appfile, appfileinfo in appinfo.iteritems():
>			url = appfileinfo['url']
>			queue.append(url)
>	num_processed = 0
>	num_urls = len(queue)
>
>	freelist = m.handles[:]
>	while num_processed < num_urls:
>		# Assign URLs to workers
>		while queue and freelist:
>			url = queue.pop()
>			c = freelist.pop()
>			buf = StringIO()
>			c.setopt(pycurl.URL, url)
>			c.setopt(pycurl.WRITEFUNCTION, buf.write)
>			m.add_handle(c)
>			c.url = url
>			c.buf = buf
>		while True:
>			ret, num_handlers = m.perform()
>			if ret != pycurl.E_CALL_MULTI_PERFORM:
>				break
>		while True:
>			num_q, ok_list, err_list = m.info_read()
>			for c in ok_list:
>				buf = c.buf
>				buf.seek(0)
>				m.remove_handle(c)
>				print 'url=%s len=%d' % (c.url, len(buf.getvalue()))
>				freelist.append(c)
>			for c, errno, errmsg in err_list:
>				m.remove_handle(c)
>				print >> sys.stderr, 'FAILED: %s' % (c.url,)
>			num_processed += len(ok_list) + len(err_list)
>			if num_q == 0:
>				break
>		m.select(1.0)
>
>	for c in m.handles:
>		c.close()
>	m.close()
>
>
>def curl2_5(max=5):
>	"""Using pycurl multi 5 threads"""
>	curl2(5)
>
>
>if __name__ == '__main__':
>	for f in (u2, h2, curl1, curl2, curl2_5):
>		start = time()
>		f()
>		end = time()
>		diff = end - start
>		print >> sys.stderr, "Time: %f %s" % (diff, f.__doc__)

Actions: View

Attachments on bug 32935: 5903 | 5908 | 5982