Source code for nxviz.utils

from collections import Counter

import pandas as pd
import seaborn as sns
from matplotlib.colors import ListedColormap
from palettable.colorbrewer import diverging, qualitative, sequential


[docs]def is_data_homogenous(data_container):
    """
    Checks that all of the data in the container are of the same Python data
    type. This function is called in every other function below, and as such
    need not necessarily be called.

    :param data_container: A generic container of data points.
    :type data_container: `iterable`
    """
    data_types = set([type(i) for i in data_container])
    return len(data_types) == 1


[docs]def infer_data_type(data_container):
    """
    For a given container of data, infer the type of data as one of
    continuous, categorical, or ordinal.

    For now, it is a one-to-one mapping as such:

    - str:   categorical
    - int:   ordinal
    - float: continuous

    There may be better ways that are not currently implemented below. For
    example, with a list of numbers, we can check whether the number of unique
    entries is less than or equal to 12, but has over 10000+ entries. This
    would be a good candidate for floats being categorical.

    :param data_container: A generic container of data points.
    :type data_container: `iterable`

    """
    # Defensive programming checks.
    # 0. Ensure that we are dealing with lists or tuples, and nothing else.
    assert isinstance(data_container, list) or isinstance(
        data_container, tuple
    ), "data_container should be a list or tuple."
    # 1. Don't want to deal with only single values.
    assert (
        len(set(data_container)) > 1
    ), "There should be more than one value in the data container."
    # 2. Don't want to deal with mixed data.
    assert is_data_homogenous(
        data_container
    ), "Data are not of a homogenous type!"

    # Once we check that the data type of the container is homogenous, we only
    # need to check the first element in the data container for its type.
    datum = data_container[0]

    # Return statements below
    # treat binomial data as categorical
    # TODO: make tests for this.
    if len(set(data_container)) == 2:
        return "categorical"

    elif isinstance(datum, str):
        return "categorical"

    elif isinstance(datum, int):
        return "ordinal"

    elif isinstance(datum, float):
        return "continuous"

    else:
        raise ValueError("Not possible to tell what the data type is.")


[docs]def is_data_diverging(data_container):
    """
    We want to use this to check whether the data are diverging or not.

    This is a simple check, can be made much more sophisticated.

    :param data_container: A generic container of data points.
    :type data_container: `iterable`
    """
    assert infer_data_type(data_container) in [
        "ordinal",
        "continuous",
    ], "Data type should be ordinal or continuous"

    # Check whether the data contains negative and positive values.
    has_negative = False
    has_positive = False
    for i in data_container:
        if i < 0:
            has_negative = True
        elif i > 0:
            has_positive = True
    if has_negative and has_positive:
        return True
    else:
        return False


[docs]def is_groupable(data_container):
    """
    Returns whether the data container is a "groupable" container or not.

    By "groupable", we mean it is a 'categorical' or 'ordinal' variable.

    :param data_container: A generic container of data points.
    :type data_container: `iterable`
    """
    is_groupable = False
    if infer_data_type(data_container) in ["categorical", "ordinal"]:
        is_groupable = True
    return is_groupable


[docs]def num_discrete_groups(data_container):
    """
    Returns the number of discrete groups present in a data container.

    :param data_container: A generic container of data points.
    :type data_container: `iterable`
    """
    return len(set(data_container))


[docs]def items_in_groups(data_container):
    """
    Returns discrete groups present in a data container and the number items
    per group.

    :param data_container: A generic container of data points.
    :type data_container: `iterable`
    """
    return Counter(data_container)


[docs]def n_group_colorpallet(n):
    """If more then 8 categorical groups of nodes or edges this function
    creats the matching color_palette
    """
    cmap = ListedColormap(sns.color_palette("hls", n))
    return cmap


cmaps = {
    "Accent_2": qualitative.Accent_3,
    "Accent_3": qualitative.Accent_3,
    "Accent_4": qualitative.Accent_4,
    "Accent_5": qualitative.Accent_5,
    "Accent_6": qualitative.Accent_6,
    "Accent_7": qualitative.Accent_7,
    "Accent_8": qualitative.Accent_8,
    "continuous": sequential.YlGnBu_9,
    "diverging": diverging.RdBu_11,
    "weights": sns.cubehelix_palette(
        50, hue=0.05, rot=0, light=0.9, dark=0, as_cmap=True
    ),
}


[docs]def to_pandas_nodes(G):  # noqa: N803
    """
    Convert nodes in the graph into a pandas DataFrame.
    """
    data = []
    for n, meta in G.nodes(data=True):
        d = dict()
        d["node"] = n
        d.update(meta)
        data.append(d)
    return pd.DataFrame(data)


[docs]def to_pandas_edges(G, x_kw, y_kw, **kwargs):  # noqa: N803
    """
    Convert Graph edges to pandas DataFrame that's readable to Altair.
    """
    # Get all attributes in nodes
    attributes = ["source", "target", "x", "y", "edge", "pair"]
    for e in G.edges():
        attributes += list(G.edges[e].keys())
    attributes = list(set(attributes))

    # Build a dataframe for all edges and their attributes
    df = pd.DataFrame(index=range(G.size() * 2), columns=attributes)

    # Add node data to dataframe.
    for i, (n1, n2, d) in enumerate(G.edges(data=True)):
        idx = i * 2
        x = G.node[n1][x_kw]
        y = G.node[n1][y_kw]
        data1 = dict(
            edge=i, source=n1, target=n2, pair=(n1, n2), x=x, y=y, **d
        )

        data2 = dict(
            edge=i, source=n1, target=n2, pair=(n1, n2), x=x, y=y, **d
        )

        df.loc[idx] = data1
        df.loc[idx + 1] = data2

    return df
Source code for nxviz.utils

nxviz

Navigation

Related Topics