Univention Bugzilla – Attachment 5908 Details for
Bug 32935
AppCenter starts N threads
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
Use httplib instead of urllib2
32935_app-pipelining.py (text/x-python), 4.38 KB, created by
Philipp Hahn
on 2014-05-12 14:17 CEST
(
hide
)
Description:
Use httplib instead of urllib2
Filename:
MIME Type:
Creator:
Philipp Hahn
Created:
2014-05-12 14:17 CEST
Size:
4.38 KB
patch
obsolete
>#!/usr/bin/python > >import sys >from time import time >from cStringIO import StringIO >from gzip import GzipFile >from simplejson import loads >from urlparse import urlsplit > >URL = "http://appcenter.test.software-univention.de/meta-inf/3.2/index.json.gz" > > >def u2(): > """Using urllib2""" > import urllib2 > > compressed = urllib2.urlopen(URL, timeout=60).read() > stream = StringIO(compressed) > content = GzipFile(mode='rb', fileobj=stream).read() > json = loads(content) > for appname, appinfo in json.iteritems(): > for appfile, appfileinfo in appinfo.iteritems(): > url = appfileinfo['url'] > content = urllib2.urlopen(url, timeout=60).read() > print 'url=%s len=%d' % (url, len(content)) > > >def h2(): > """Using httplib""" > from httplib import HTTPConnection > > parts = urlsplit(URL) > if parts.port: > port = parts.port > elif parts.scheme == 'https': > port = 443 > else: > port = 80 > conn = HTTPConnection(parts.hostname, port, timeout=60) > > # print "hostname=%s port=%d path=%s" % (parts.hostname, port, parts.path) > # conn.set_debuglevel(65535) > conn.connect() > headers = {"Connection:": "Keep-alive"} > try: > conn.request("GET", parts.path, headers=headers) > res = conn.getresponse().read() > > stream = StringIO(res) > content = GzipFile(mode='rb', fileobj=stream).read() > json = loads(content) > for appname, appinfo in json.iteritems(): > for appfile, appfileinfo in appinfo.iteritems(): > url = appfileinfo['url'] > parts = urlsplit(url) > conn.request("GET", parts.path, headers=headers) > content = conn.getresponse().read() > print 'url=%s len=%d' % (url, len(content)) > finally: > conn.close() > > >def curl1(): > """Using pycurl single""" > import pycurl > > c = pycurl.Curl() > # c.setopt(pycurl.VERBOSE, 1) > c.setopt(pycurl.FOLLOWLOCATION, 1) > c.setopt(pycurl.MAXREDIRS, 5) > c.setopt(pycurl.CONNECTTIMEOUT, 5) > c.setopt(pycurl.TIMEOUT, 60) > # c.setopt(pycurl.NOSIGNAL, 1) > > buf = StringIO() > c.setopt(pycurl.URL, URL) > c.setopt(pycurl.WRITEFUNCTION, buf.write) > c.perform() > buf.seek(0) > > content = GzipFile(mode='rb', fileobj=buf).read() > json = loads(content) > for appname, appinfo in json.iteritems(): > for appfile, appfileinfo in appinfo.iteritems(): > url = appfileinfo['url'] > buf = StringIO() > c.setopt(pycurl.URL, url) > c.setopt(pycurl.WRITEFUNCTION, buf.write) > c.perform() > buf.seek(0) > > print 'url=%s len=%d' % (url, len(buf.getvalue())) > > c.close() > > >def curl2(max=2): > """Using pycurl multi 2 threads""" > # /usr/share/doc/python-pycurl/examples/retriever-multi.py > import pycurl > > m = pycurl.CurlMulti() > m.handles = [] > m.setopt(pycurl.M_PIPELINING, 1) > try: # cURL 7.30.0 > m.setopt(pycurl.M_MAX_HOST_CONNECTIONS, 2) > m.setopt(pycurl.M_MAX_PIPELINE_LENGTH, 5) > except AttributeError: > pass > > for i in range(max): > c = pycurl.Curl() > c.fp = None > c.setopt(pycurl.FOLLOWLOCATION, 1) > c.setopt(pycurl.MAXREDIRS, 5) > c.setopt(pycurl.CONNECTTIMEOUT, 5) > c.setopt(pycurl.TIMEOUT, 60) > # c.setopt(pycurl.NOSIGNAL, 1) > m.handles.append(c) > > buf = StringIO() > c.setopt(pycurl.URL, URL) > c.setopt(pycurl.WRITEFUNCTION, buf.write) > c.perform() > buf.seek(0) > content = GzipFile(mode='rb', fileobj=buf).read() > json = loads(content) > > queue = [] > for appname, appinfo in json.iteritems(): > for appfile, appfileinfo in appinfo.iteritems(): > url = appfileinfo['url'] > queue.append(url) > num_processed = 0 > num_urls = len(queue) > > freelist = m.handles[:] > while num_processed < num_urls: > # Assign URLs to workers > while queue and freelist: > url = queue.pop() > c = freelist.pop() > buf = StringIO() > c.setopt(pycurl.URL, url) > c.setopt(pycurl.WRITEFUNCTION, buf.write) > m.add_handle(c) > c.url = url > c.buf = buf > while True: > ret, num_handlers = m.perform() > if ret != pycurl.E_CALL_MULTI_PERFORM: > break > while True: > num_q, ok_list, err_list = m.info_read() > for c in ok_list: > buf = c.buf > buf.seek(0) > m.remove_handle(c) > print 'url=%s len=%d' % (c.url, len(buf.getvalue())) > freelist.append(c) > for c, errno, errmsg in err_list: > m.remove_handle(c) > print >> sys.stderr, 'FAILED: %s' % (c.url,) > num_processed += len(ok_list) + len(err_list) > if num_q == 0: > break > m.select(1.0) > > for c in m.handles: > c.close() > m.close() > > >def curl2_5(max=5): > """Using pycurl multi 5 threads""" > curl2(5) > > >if __name__ == '__main__': > for f in (u2, h2, curl1, curl2, curl2_5): > start = time() > f() > end = time() > diff = end - start > print >> sys.stderr, "Time: %f %s" % (diff, f.__doc__)
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
Actions:
View
Attachments on
bug 32935
:
5903
| 5908 |
5982