Source code for pyEX.stocks.batch

# *****************************************************************************
#
# Copyright (c) 2020, the pyEX authors.
#
# This file is part of the pyEX library, distributed under the terms of
# the Apache License 2.0.  The full license can be found in the LICENSE file.
#
import itertools
from multiprocessing.pool import ThreadPool

import pandas as pd

from ..common import (
    _BATCH_TYPES,
    _TIMEFRAME_CHART,
    PyEXception,
    _get,
    _quoteSymbols,
    _raiseIfNotStr,
    _strOrDate,
    _toDatetime,
    json_normalize,
)
from .fundamentals import _dividendsToDF, _earningsToDF, _financialsToDF, _splitsToDF
from .news import _newsToDF
from .prices import _bookToDF, _chartToDF, chart
from .profiles import _companyToDF, _peersToDF
from .research import _statsToDF

_MAPPING = {
    "book": _bookToDF,
    "chart": _chartToDF,
    "company": _companyToDF,
    "dividends": _dividendsToDF,
    "earnings": _earningsToDF,
    "financials": _financialsToDF,
    "stats": _statsToDF,
    "news": _newsToDF,
    "peers": _peersToDF,
    "splits": _splitsToDF,
}


[docs]def batch(
    symbols,
    fields=None,
    range_="1m",
    last=10,
    token="",
    version="stable",
    filter="",
    format="json",
):
    """Batch several data requests into one invocation. If no `fields` passed in, will default to `quote`

    https://iexcloud.io/docs/api/#batch-requests


    Args:
        symbols (str or list): List of tickers to request
        fields (str or list): List of fields to request
        range_ (str): Date range for chart
        last (int):
        token (str): Access token
        version (str): API version
        filter (str): filters: https://iexcloud.io/docs/api/#filter-results
        format (str): return format, defaults to json

    Returns:
        dict: results in json
    """
    fields = fields or "quote"

    if not isinstance(symbols, [].__class__) and not isinstance(symbols, str):
        raise PyEXception(
            "batch expects string or list of strings for symbols argument"
        )

    if isinstance(fields, str) and "," not in fields:
        fields = [fields]
    elif isinstance(fields, str):
        fields = fields.split(",")

    for field in fields:
        if field not in _BATCH_TYPES:
            raise PyEXception("Unrecognized batch request field: {}".format(field))

    if range_ not in _TIMEFRAME_CHART:
        raise PyEXception("Range must be in %s" % str(_TIMEFRAME_CHART))

    symbols = _quoteSymbols(symbols)
    if len(symbols.split(",")) > 100:
        raise PyEXception("IEX will only handle up to 100 symbols at a time!")

    if "," not in symbols:
        route = "stock/{}/batch?types={}&range={}&last={}".format(
            symbols, ",".join(fields), range_, last
        )
    else:
        route = "stock/market/batch?symbols={}&types={}&range={}&last={}".format(
            symbols, ",".join(fields), range_, last
        )

    return _get(route, token=token, version=version, filter=filter, format=format)


[docs]def batchDF(
    symbols,
    fields=None,
    range_="1m",
    last=10,
    token="",
    version="stable",
    filter="",
    format="json",
):
    """Batch several data requests into one invocation

    https://iexcloud.io/docs/api/#batch-requests


    Args:
        symbols (list): List of tickers to request
        fields (list): List of fields to request
        range_ (str): Date range for chart
        last (int):
        token (str): Access token
        version (str): API version
        filter (str): filters: https://iexcloud.io/docs/api/#filter-results
        format (str): return format, defaults to json

    Returns:
        DataFrame: results in json
    """
    symbols = _quoteSymbols(symbols)
    x = batch(
        symbols,
        fields,
        range_,
        last,
        token=token,
        version=version,
        filter=filter,
        format=format,
    )

    ret = {}

    if "," not in symbols:
        # one level json, break down
        for field in x.keys():
            ret[field] = _MAPPING.get(field, json_normalize)(x[field])
    else:
        # two level json
        for symbol in x.keys():
            for field in x[symbol].keys():
                if field not in ret:
                    ret[field] = pd.DataFrame()

                dat = x[symbol][field]
                dat = _MAPPING.get(field, json_normalize)(dat)
                dat["symbol"] = symbol

                ret[field] = pd.concat([ret[field], dat], sort=True)
    return ret


[docs]def bulkBatch(
    symbols,
    fields=None,
    range_="1m",
    last=10,
    token="",
    version="stable",
    filter="",
    format="json",
):
    """Optimized batch to fetch as much as possible at once

    https://iexcloud.io/docs/api/#batch-requests


    Args:
        symbols (list): List of tickers to request
        fields (list): List of fields to request
        range_ (str): Date range for chart
        last (int):
        token (str): Access token
        version (str): API version
        filter (str): filters: https://iexcloud.io/docs/api/#filter-results
        format (str): return format, defaults to json

    Returns:
        dict: results in json
    """
    fields = fields or _BATCH_TYPES
    args = []
    empty_data = []
    list_orig = empty_data.__class__

    if not isinstance(symbols, list_orig):
        raise PyEXception("Symbols must be of type list")

    for i in range(0, len(symbols), 99):
        args.append(
            (symbols[i : i + 99], fields, range_, last, token, version, filter, format)
        )

    pool = ThreadPool(20)
    rets = pool.starmap(batch, args)
    pool.close()

    ret = {}

    for i, d in enumerate(rets):
        symbols_subset = args[i][0]
        if len(d) != len(symbols_subset):
            empty_data.extend(list_orig(set(symbols_subset) - set(d.keys())))
        ret.update(d)

    for k in empty_data:
        if k not in ret:
            if isinstance(fields, str):
                ret[k] = {}
            else:
                ret[k] = {x: {} for x in fields}
    return ret


[docs]def bulkBatchDF(
    symbols,
    fields=None,
    range_="1m",
    last=10,
    token="",
    version="stable",
    filter="",
    format="json",
):
    """Optimized batch to fetch as much as possible at once

    https://iexcloud.io/docs/api/#batch-requests


    Args:
        symbols (list): List of tickers to request
        fields (list): List of fields to request
        range_ (str): Date range for chart
        last (int):
        token (str): Access token
        version (str): API version
        filter (str): filters: https://iexcloud.io/docs/api/#filter-results
        format (str): return format, defaults to json

    Returns:
        DataFrame: results in json
    """
    dat = bulkBatch(
        symbols,
        fields,
        range_,
        last,
        token=token,
        version=version,
        filter=filter,
        format=format,
    )
    ret = {}
    for symbol in dat:
        for field in dat[symbol]:
            if field not in ret:
                ret[field] = pd.DataFrame()

            d = dat[symbol][field]
            d = _MAPPING[field](d)
            d["symbol"] = symbol
            ret[field] = pd.concat([ret[field], d], sort=True)

    return ret


[docs]def bulkMinuteBars(symbol, dates, token="", version="stable", filter="", format="json"):
    """fetch many dates worth of minute-bars for a given symbol"""
    _raiseIfNotStr(symbol)
    dates = [_strOrDate(date) for date in dates]
    list_orig = dates.__class__

    args = []
    for date in dates:
        args.append((symbol, "1d", date, token, version, filter, format))

    pool = ThreadPool(20)
    rets = pool.starmap(chart, args)
    pool.close()

    return list_orig(itertools.chain(*rets))


[docs]def bulkMinuteBarsDF(
    symbol, dates, token="", version="stable", filter="", format="json"
):
    """fetch many dates worth of minute-bars for a given symbol"""
    data = bulkMinuteBars(
        symbol, dates, token=token, version=version, filter=filter, format=format
    )
    df = pd.DataFrame(data)
    if df.empty:
        return df
    _toDatetime(df)
    df.set_index(["date", "minute"], inplace=True)
    return df