ExamplesΒΆ

All examples are also available at GitHub

"""Find and show 10 working HTTP(S) proxies."""

import asyncio
from proxybroker import Broker

async def show(proxies):
    while True:
        proxy = await proxies.get()
        if proxy is None: break
        print('Found proxy: %s' % proxy)

proxies = asyncio.Queue()
broker = Broker(proxies)
tasks = asyncio.gather(
    broker.find(types=['HTTP', 'HTTPS'], limit=10),
    show(proxies))

loop = asyncio.get_event_loop()
loop.run_until_complete(tasks)

Download this example.


"""Find 10 working HTTP(S) proxies and save them to a file."""

import asyncio
from proxybroker import Broker


async def save(proxies, filename):
    """Save proxies to a file."""
    with open(filename, 'w') as f:
        while True:
            proxy = await proxies.get()
            if proxy is None:
                break
            proto = 'https' if 'HTTPS' in proxy.types else 'http'
            row = '%s://%s:%d\n' % (proto, proxy.host, proxy.port)
            f.write(row)


def main():
    proxies = asyncio.Queue()
    broker = Broker(proxies)
    tasks = asyncio.gather(broker.find(types=['HTTP', 'HTTPS'], limit=10),
                           save(proxies, filename='proxies.txt'))
    loop = asyncio.get_event_loop()
    loop.run_until_complete(tasks)


if __name__ == '__main__':
    main()

Download this example.


"""Find working proxies and use them concurrently.

Note: Pay attention to Broker.serve(), instead of the code listed below.
      Perhaps it will be much useful and friendlier.
"""

import asyncio
from urllib.parse import urlparse

import aiohttp

from proxybroker import Broker, ProxyPool
from proxybroker.errors import NoProxyError


async def get_pages(urls, proxy_pool, timeout=10, loop=None):
    tasks = [fetch(url, proxy_pool, timeout, loop) for url in urls]
    for task in asyncio.as_completed(tasks):
        url, content = await task
        print('Done! url: %s; content: %.30s' % (url, content))


async def fetch(url, proxy_pool, timeout, loop):
    resp, proxy = None, None
    try:
        proxy = await proxy_pool.get(scheme=urlparse(url).scheme)
        proxy_url = 'http://%s:%d' % (proxy.host, proxy.port)
        with aiohttp.Timeout(timeout, loop=loop):
            async with aiohttp.ClientSession(loop=loop) as session:
                async with session.get(url, proxy=proxy_url) as response:
                    resp = await response.read()
    except (aiohttp.errors.ClientOSError, aiohttp.errors.ClientResponseError,
            aiohttp.errors.ServerDisconnectedError, asyncio.TimeoutError,
            NoProxyError) as e:
        print('Error. url: %s; error: %r', url, e)
    finally:
        if proxy:
            proxy_pool.put(proxy)
        return (url, resp)


def main():
    loop = asyncio.get_event_loop()

    proxies = asyncio.Queue(loop=loop)
    proxy_pool = ProxyPool(proxies)

    judges = ['http://httpbin.org/get?show_env',
              'https://httpbin.org/get?show_env']
    providers = ['http://www.proxylists.net/', 'http://fineproxy.org/eng/fresh-proxies/']

    broker = Broker(
        proxies, timeout=8, max_conn=200, max_tries=3, verify_ssl=False,
        judges=judges, providers=providers, loop=loop)

    types = [('HTTP', ('Anonymous', 'High')), ]
    countries = ['US', 'DE', 'FR']

    urls = ['http://httpbin.org/get', 'http://httpbin.org/redirect/1',
            'http://httpbin.org/anything', 'http://httpbin.org/status/404']

    tasks = asyncio.gather(
        broker.find(types=types, countries=countries, strict=True, limit=10),
        get_pages(urls, proxy_pool, loop=loop))
    loop.run_until_complete(tasks)

    # broker.show_stats(verbose=True)


if __name__ == '__main__':
    main()

Download this example.


"""Find 10 working proxies supporting CONNECT method
   to 25 port (SMTP) and save them to a file."""

import asyncio
from proxybroker import Broker


async def save(proxies, filename):
    """Save proxies to a file."""
    with open(filename, 'w') as f:
        while True:
            proxy = await proxies.get()
            if proxy is None:
                break
            f.write('smtp://%s:%d\n' % (proxy.host, proxy.port))


def main():
    proxies = asyncio.Queue()
    broker = Broker(proxies, judges=['smtp://smtp.gmail.com'], max_tries=1)

    # Check proxy in spam databases (DNSBL). By default is disabled.
    # more databases: http://www.dnsbl.info/dnsbl-database-check.php
    dnsbl = ['bl.spamcop.net', 'cbl.abuseat.org', 'dnsbl.sorbs.net',
             'zen.spamhaus.org', 'bl.mcafee.com', 'spam.spamrats.com']

    tasks = asyncio.gather(
        broker.find(types=['CONNECT:25'], dnsbl=dnsbl, limit=10),
        save(proxies, filename='proxies.txt'))
    loop = asyncio.get_event_loop()
    loop.run_until_complete(tasks)

if __name__ == '__main__':
    main()

Download this example.


"""Gather proxies from the providers without
   checking and save them to a file."""

import asyncio
from proxybroker import Broker


async def save(proxies, filename):
    """Save proxies to a file."""
    with open(filename, 'w') as f:
        while True:
            proxy = await proxies.get()
            if proxy is None:
                break
            f.write('%s:%d\n' % (proxy.host, proxy.port))


def main():
    proxies = asyncio.Queue()
    broker = Broker(proxies)
    tasks = asyncio.gather(broker.grab(countries=['US', 'GB'], limit=10),
                           save(proxies, filename='proxies.txt'))
    loop = asyncio.get_event_loop()
    loop.run_until_complete(tasks)


if __name__ == '__main__':
    main()

Download this example.


"""Run a local proxy server that distributes
   incoming requests to external proxies."""

import asyncio
import aiohttp

from proxybroker import Broker


async def get_pages(urls, proxy_url):
    tasks = [fetch(url, proxy_url) for url in urls]
    for task in asyncio.as_completed(tasks):
        url, content = await task
        print('Done! url: %s; content: %.100s' % (url, content))


async def fetch(url, proxy_url):
    resp = None
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(url, proxy=proxy_url) as response:
                resp = await response.read()
    except (aiohttp.errors.ClientOSError, aiohttp.errors.ClientResponseError,
            aiohttp.errors.ServerDisconnectedError) as e:
        print('Error. url: %s; error: %r' % (url, e))
    finally:
        return (url, resp)


def main():
    host, port = '127.0.0.1', 8888  # by default

    loop = asyncio.get_event_loop()

    types = [('HTTP', 'High'), 'HTTPS', 'CONNECT:80']
    codes = [200, 301, 302]

    broker = Broker(max_tries=1, loop=loop)

    # Broker.serve() also supports all arguments that are accepted
    # Broker.find() method: data, countries, post, strict, dnsbl.
    broker.serve(host=host, port=port, types=types, limit=10, max_tries=3,
                 prefer_connect=True, min_req_proxy=5, max_error_rate=0.5,
                 max_resp_time=8, http_allowed_codes=codes, backlog=100)

    urls = ['http://httpbin.org/get', 'https://httpbin.org/get',
            'http://httpbin.org/redirect/1', 'http://httpbin.org/status/404']

    proxy_url = 'http://%s:%d' % (host, port)
    loop.run_until_complete(get_pages(urls, proxy_url))

    broker.stop()


if __name__ == '__main__':
    main()

Download this example.