ExamplesΒΆ
All examples are also available at GitHub
"""Find and show 10 working HTTP(S) proxies."""
import asyncio
from proxybroker import Broker
async def show(proxies):
while True:
proxy = await proxies.get()
if proxy is None: break
print('Found proxy: %s' % proxy)
proxies = asyncio.Queue()
broker = Broker(proxies)
tasks = asyncio.gather(
broker.find(types=['HTTP', 'HTTPS'], limit=10),
show(proxies))
loop = asyncio.get_event_loop()
loop.run_until_complete(tasks)
"""Find 10 working HTTP(S) proxies and save them to a file."""
import asyncio
from proxybroker import Broker
async def save(proxies, filename):
"""Save proxies to a file."""
with open(filename, 'w') as f:
while True:
proxy = await proxies.get()
if proxy is None:
break
proto = 'https' if 'HTTPS' in proxy.types else 'http'
row = '%s://%s:%d\n' % (proto, proxy.host, proxy.port)
f.write(row)
def main():
proxies = asyncio.Queue()
broker = Broker(proxies)
tasks = asyncio.gather(broker.find(types=['HTTP', 'HTTPS'], limit=10),
save(proxies, filename='proxies.txt'))
loop = asyncio.get_event_loop()
loop.run_until_complete(tasks)
if __name__ == '__main__':
main()
"""Find working proxies and use them concurrently.
Note: Pay attention to Broker.serve(), instead of the code listed below.
Perhaps it will be much useful and friendlier.
"""
import asyncio
from urllib.parse import urlparse
import aiohttp
from proxybroker import Broker, ProxyPool
from proxybroker.errors import NoProxyError
async def get_pages(urls, proxy_pool, timeout=10, loop=None):
tasks = [fetch(url, proxy_pool, timeout, loop) for url in urls]
for task in asyncio.as_completed(tasks):
url, content = await task
print('Done! url: %s; content: %.30s' % (url, content))
async def fetch(url, proxy_pool, timeout, loop):
resp, proxy = None, None
try:
proxy = await proxy_pool.get(scheme=urlparse(url).scheme)
proxy_url = 'http://%s:%d' % (proxy.host, proxy.port)
with aiohttp.Timeout(timeout, loop=loop):
async with aiohttp.ClientSession(loop=loop) as session:
async with session.get(url, proxy=proxy_url) as response:
resp = await response.read()
except (aiohttp.errors.ClientOSError, aiohttp.errors.ClientResponseError,
aiohttp.errors.ServerDisconnectedError, asyncio.TimeoutError,
NoProxyError) as e:
print('Error. url: %s; error: %r', url, e)
finally:
if proxy:
proxy_pool.put(proxy)
return (url, resp)
def main():
loop = asyncio.get_event_loop()
proxies = asyncio.Queue(loop=loop)
proxy_pool = ProxyPool(proxies)
judges = ['http://httpbin.org/get?show_env',
'https://httpbin.org/get?show_env']
providers = ['http://www.proxylists.net/', 'http://fineproxy.org/eng/fresh-proxies/']
broker = Broker(
proxies, timeout=8, max_conn=200, max_tries=3, verify_ssl=False,
judges=judges, providers=providers, loop=loop)
types = [('HTTP', ('Anonymous', 'High')), ]
countries = ['US', 'DE', 'FR']
urls = ['http://httpbin.org/get', 'http://httpbin.org/redirect/1',
'http://httpbin.org/anything', 'http://httpbin.org/status/404']
tasks = asyncio.gather(
broker.find(types=types, countries=countries, strict=True, limit=10),
get_pages(urls, proxy_pool, loop=loop))
loop.run_until_complete(tasks)
# broker.show_stats(verbose=True)
if __name__ == '__main__':
main()
"""Find 10 working proxies supporting CONNECT method
to 25 port (SMTP) and save them to a file."""
import asyncio
from proxybroker import Broker
async def save(proxies, filename):
"""Save proxies to a file."""
with open(filename, 'w') as f:
while True:
proxy = await proxies.get()
if proxy is None:
break
f.write('smtp://%s:%d\n' % (proxy.host, proxy.port))
def main():
proxies = asyncio.Queue()
broker = Broker(proxies, judges=['smtp://smtp.gmail.com'], max_tries=1)
# Check proxy in spam databases (DNSBL). By default is disabled.
# more databases: http://www.dnsbl.info/dnsbl-database-check.php
dnsbl = ['bl.spamcop.net', 'cbl.abuseat.org', 'dnsbl.sorbs.net',
'zen.spamhaus.org', 'bl.mcafee.com', 'spam.spamrats.com']
tasks = asyncio.gather(
broker.find(types=['CONNECT:25'], dnsbl=dnsbl, limit=10),
save(proxies, filename='proxies.txt'))
loop = asyncio.get_event_loop()
loop.run_until_complete(tasks)
if __name__ == '__main__':
main()
"""Gather proxies from the providers without
checking and save them to a file."""
import asyncio
from proxybroker import Broker
async def save(proxies, filename):
"""Save proxies to a file."""
with open(filename, 'w') as f:
while True:
proxy = await proxies.get()
if proxy is None:
break
f.write('%s:%d\n' % (proxy.host, proxy.port))
def main():
proxies = asyncio.Queue()
broker = Broker(proxies)
tasks = asyncio.gather(broker.grab(countries=['US', 'GB'], limit=10),
save(proxies, filename='proxies.txt'))
loop = asyncio.get_event_loop()
loop.run_until_complete(tasks)
if __name__ == '__main__':
main()
"""Run a local proxy server that distributes
incoming requests to external proxies."""
import asyncio
import aiohttp
from proxybroker import Broker
async def get_pages(urls, proxy_url):
tasks = [fetch(url, proxy_url) for url in urls]
for task in asyncio.as_completed(tasks):
url, content = await task
print('Done! url: %s; content: %.100s' % (url, content))
async def fetch(url, proxy_url):
resp = None
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, proxy=proxy_url) as response:
resp = await response.read()
except (aiohttp.errors.ClientOSError, aiohttp.errors.ClientResponseError,
aiohttp.errors.ServerDisconnectedError) as e:
print('Error. url: %s; error: %r' % (url, e))
finally:
return (url, resp)
def main():
host, port = '127.0.0.1', 8888 # by default
loop = asyncio.get_event_loop()
types = [('HTTP', 'High'), 'HTTPS', 'CONNECT:80']
codes = [200, 301, 302]
broker = Broker(max_tries=1, loop=loop)
# Broker.serve() also supports all arguments that are accepted
# Broker.find() method: data, countries, post, strict, dnsbl.
broker.serve(host=host, port=port, types=types, limit=10, max_tries=3,
prefer_connect=True, min_req_proxy=5, max_error_rate=0.5,
max_resp_time=8, http_allowed_codes=codes, backlog=100)
urls = ['http://httpbin.org/get', 'https://httpbin.org/get',
'http://httpbin.org/redirect/1', 'http://httpbin.org/status/404']
proxy_url = 'http://%s:%d' % (host, port)
loop.run_until_complete(get_pages(urls, proxy_url))
broker.stop()
if __name__ == '__main__':
main()