Source code for dataprep.eda.distribution.render

"""
This module implements the visualization for the plot(df) function.
"""  # pylint: disable=too-many-lines
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
import json
import os
import math

import numpy as np
import pandas as pd
from bokeh.layouts import row
from bokeh.models import (
    BasicTicker,
    ColorBar,
    ColumnDataSource,
    CustomJSHover,
    FactorRange,
    FuncTickFormatter,
    Range1d,
    HoverTool,
    LayoutDOM,
    Legend,
    LegendItem,
    LinearColorMapper,
    Panel,
    PrintfTickFormatter,
)
from bokeh.plotting import Figure, figure
from bokeh.transform import cumsum, linear_cmap, transform
from bokeh.util.hex import hexbin
from scipy.stats import norm
from wordcloud import WordCloud

from ..configs import KDE, Bar, Box, Config, Pie, QQNorm, WordFrequency
from ..dtypes_v2 import Continuous, DateTime, Nominal, GeoGraphy, SmallCardNum, GeoPoint
from ..intermediate import Intermediate
from ..palette import CATEGORY20, PASTEL1, RDBU, VIRIDIS, YlGnBu
from ..utils import tweak_figure, _format_ticks, _format_axis, _format_bin_intervals

COUNTRY_MAP_FILE = os.path.join(os.path.split(os.path.abspath(__file__))[0], "country.json")
COUNTRY_NAME_FILE = os.path.join(os.path.split(os.path.abspath(__file__))[0], "name_dict.json")
with open(COUNTRY_MAP_FILE, "r") as fp:
    MAPS = json.load(fp)
with open(COUNTRY_NAME_FILE, "r") as fp:
    NAME_DICT = json.load(fp)

__all__ = ["render"]


def _make_title(grp_cnt_stats: Dict[str, int], x: str, y: str) -> str:
    """
    Format the title to notify the user of sampled output
    """
    x_ttl, y_ttl = None, None
    if f"{x}_ttl" in grp_cnt_stats:
        x_ttl = grp_cnt_stats[f"{x}_ttl"]
        x_shw = grp_cnt_stats[f"{x}_shw"]
    if f"{y}_ttl" in grp_cnt_stats:
        y_ttl = grp_cnt_stats[f"{y}_ttl"]
        y_shw = grp_cnt_stats[f"{y}_shw"]
    if x_ttl and y_ttl:
        if x_ttl > x_shw and y_ttl > y_shw:
            return f"(top {y_shw} out of {y_ttl}) {y} by (top {x_shw} out of {x_ttl}) {x}"
    elif x_ttl:
        if x_ttl > x_shw:
            return f"{y} by (top {x_shw} out of {x_ttl}) {x}"
    elif y_ttl:
        if y_ttl > y_shw:
            return f"(top {y_shw} out of {y_ttl}) {y} by {x}"
    return f"{y} by {x}"


def _format_values(key: str, value: Any) -> str:

    if not isinstance(value, (int, float)):
        # if value is a time
        return str(value)

    if "Memory" in key:
        # for memory usage
        ind = 0
        unit = dict(enumerate(["B", "KB", "MB", "GB", "TB"], 0))
        while value > 1024:
            value /= 1024
            ind += 1
        return f"{value:.1f} {unit[ind]}"

    if (value * 10) % 10 == 0:
        # if value is int but in a float form with 0 at last digit
        value = int(value)
        if abs(value) >= 1000000:
            return f"{value:.5g}"
    elif abs(value) >= 1000000 or abs(value) < 0.001:
        value = f"{value:.5g}"
    elif abs(value) >= 1:
        # eliminate trailing zeros
        pre_value = float(f"{value:.4f}")
        value = int(pre_value) if (pre_value * 10) % 10 == 0 else pre_value
    elif 0.001 <= abs(value) < 1:
        value = f"{value:.4g}"
    else:
        value = str(value)

    if "%" in key:
        # for percentage, only use digits before notation sign for extreme small number
        value = f"{float(value):.1%}"
    return str(value)


def _empty_figure(title: str, plot_height: int, plot_width: int) -> Figure:
    # If no data to render in the heatmap, i.e. no missing values
    # we render a blank heatmap
    fig = Figure(
        x_range=[],
        y_range=[],
        plot_height=plot_height,
        plot_width=plot_width,
        title=title,
        x_axis_location="below",
        tools="hover",
        toolbar_location=None,
        background_fill_color="#fafafa",
    )

    # Add at least one renderer to fig, otherwise bokeh
    # gives us error -1000 (MISSING_RENDERERS): Plot has no renderers
    fig.rect(x=0, y=0, width=0, height=0)
    return fig


def wordcloud_viz(
    word_cnts: pd.Series,
    plot_width: int,
    plot_height: int,
) -> Panel:
    """
    Visualize the word cloud
    """  # pylint: disable=unsubscriptable-object
    ellipse_mask = np.load(f"{Path(__file__).parent.parent.parent}/assets/ellipse.npz").get("image")
    wordcloud = WordCloud(background_color="white", mask=ellipse_mask)
    wordcloud.generate_from_frequencies(word_cnts)
    wcarr = wordcloud.to_array().astype(np.uint8)

    # use image_rgba following this example
    # https://docs.bokeh.org/en/latest/docs/gallery/image_rgba.html
    img = np.empty(wcarr.shape[:2], dtype=np.uint32)
    view = img.view(dtype=np.uint8).reshape((*wcarr.shape[:2], 4))
    alpha = np.full((*wcarr.shape[:2], 1), 255, dtype=np.uint8)
    view[:] = np.concatenate([wcarr, alpha], axis=2)[::-1]

    fig = figure(
        plot_width=plot_width,
        plot_height=plot_height,
        title="Word Cloud",
        toolbar_location=None,
        x_range=(0, 1),
        y_range=(0, 1),
    )
    fig.image_rgba(image=[img], x=0, y=0, dw=1, dh=1)

    fig.axis.visible = False
    fig.grid.visible = False
    return Panel(child=row(fig), title="Word Cloud")


def wordfreq_viz(
    word_cnts: pd.Series,
    nrows: int,
    plot_width: int,
    plot_height: int,
    wordfreq: WordFrequency,
) -> Figure:
    """
    Visualize the word frequency bar chart
    """
    col = word_cnts.name
    df = word_cnts.to_frame()
    df["pct"] = df[col] / nrows * 100

    tooltips = [
        ("Word", "@index"),
        ("Count", f"@{{{col}}}"),
        ("Percent", "@pct{0.2f}%"),
    ]
    fig = figure(
        plot_height=plot_height,
        plot_width=plot_width,
        title="Word Frequency",
        toolbar_location=None,
        tools="hover",
        tooltips=tooltips,
        x_range=list(df.index),
    )
    fig.vbar(
        x="index",
        top=col,
        fill_color=wordfreq.color,
        line_color=wordfreq.color,
        width=0.9,
        source=df,
    )
    fig.yaxis.axis_label = "Count"
    tweak_figure(fig, "bar", True)
    _format_axis(fig, 0, df[col].max(), "y")
    return Panel(child=row(fig), title="Word Frequency")


def bar_viz(
    df: pd.DataFrame,
    ttl_grps: int,
    nrows: int,
    col: str,
    plot_width: int,
    plot_height: int,
    show_yticks: bool,
    bar_cfg: Bar,
) -> Figure:
    """
    Render a bar chart
    """
    # pylint: disable=too-many-arguments
    df["pct"] = df[col] / nrows * 100
    df.index = [str(val) for val in df.index]

    tooltips = [(col, "@index"), ("Count", f"@{{{col}}}"), ("Percent", "@pct{0.2f}%")]
    if show_yticks:
        if len(df) > 10:
            plot_width = 28 * len(df)
    fig = Figure(
        plot_width=plot_width,
        plot_height=plot_height,
        title=col,
        toolbar_location=None,
        tooltips=tooltips,
        tools="hover",
        x_range=list(df.index),
        y_axis_type=bar_cfg.yscale,
    )
    fig.vbar(
        x="index",
        width=0.9,
        top=col,
        fill_color=bar_cfg.color,
        line_color=bar_cfg.color,
        bottom=0.01,
        source=df,
    )
    tweak_figure(fig, "bar", show_yticks)
    fig.yaxis.axis_label = "Count"
    if ttl_grps > len(df):
        fig.xaxis.axis_label = f"Top {len(df)} of {ttl_grps} {col}"
        fig.xaxis.axis_label_standoff = 0
    if show_yticks and bar_cfg.yscale == "linear":
        _format_axis(fig, 0, df[col].max(), "y")
    return fig


def pie_viz(
    df: pd.DataFrame,
    nrows: int,
    col: str,
    plot_width: int,
    plot_height: int,
    pie: Pie,
) -> Tuple[Panel, List[str]]:
    """
    Render a pie chart
    """
    # pylint: disable=too-many-arguments
    npresent = df[col].sum()
    df.index = list(df.index)  # for CategoricalIndex to normal Index
    if nrows > npresent:
        df = df.append(pd.DataFrame({col: [nrows - npresent]}, index=["Others"]))
    df["pct"] = df[col] / nrows * 100
    df["angle"] = df[col] / nrows * 2 * np.pi

    tooltips = [(col, "@index"), ("Count", f"@{{{col}}}"), ("Percent", "@pct{0.2f}%")]
    fig = Figure(
        plot_width=plot_width,
        plot_height=plot_height,
        title=col,
        toolbar_location=None,
        tools="hover",
        tooltips=tooltips,
    )
    if pie.colors is None:
        color_list = list((CATEGORY20 * (len(df) // len(CATEGORY20) + 1))[0 : len(df)])
    else:
        color_list = list(pie.colors[0 : len(df)])
    df["colour"] = color_list
    df.index = df.index.astype(str)
    df.index = df.index.map(lambda x: x[:13] + "..." if len(x) > 13 else x)

    pie = fig.wedge(
        x=0,
        y=1,
        radius=0.9,
        start_angle=cumsum("angle", include_zero=True),
        end_angle=cumsum("angle"),
        line_color="white",
        fill_color="colour",
        source=df,
    )
    legend = Legend(items=[LegendItem(label=dict(field="index"), renderers=[pie])])
    legend.label_text_font_size = "8pt"
    fig.add_layout(legend, "left")
    tweak_figure(fig, "pie")
    fig.axis.major_label_text_font_size = "0pt"
    fig.axis.major_tick_line_color = None
    return Panel(child=row(fig), title="Pie Chart"), color_list


def hist_viz(
    hist: Tuple[np.ndarray, np.ndarray],
    nrows: int,
    col: str,
    yscale: str,
    color: str,
    plot_width: int,
    plot_height: int,
    show_yticks: bool,
) -> Figure:
    """
    Render a histogram
    """
    # pylint: disable=too-many-arguments,too-many-locals
    counts, bins = hist
    if sum(counts) == 0:
        return _empty_figure(col, plot_height, plot_width)
    intvls = _format_bin_intervals(bins)
    df = pd.DataFrame(
        {
            "intvl": intvls,
            "left": bins[:-1],
            "right": bins[1:],
            "freq": counts,
            "pct": counts / nrows * 100,
        }
    )

    tooltips = [("Bin", "@intvl"), ("Frequency", "@freq"), ("Percent", "@pct{0.2f}%")]
    if yscale == "linear" or df.empty:
        bottom = 0.0
    elif yscale == "log" and df["freq"].min() == 0:  # freq >= 1 so we set 0.1 as lower bound
        bottom = 0.1
    else:
        bottom = df["freq"].min() / 2

    if yscale == "linear":
        fig = Figure(
            plot_height=plot_height,
            plot_width=plot_width,
            title=col,
            toolbar_location=None,
            y_axis_type=yscale,
        )
    else:
        fig = Figure(
            plot_height=plot_height,
            plot_width=plot_width,
            title=col,
            toolbar_location=None,
            y_axis_type=yscale,
            y_range=(bottom, df["freq"].max()),
        )

    fig.quad(
        source=df,
        left="left",
        right="right",
        bottom=bottom,
        top="freq",
        fill_color=color,
        line_color=color,
    )
    hover = HoverTool(tooltips=tooltips, mode="vline")
    fig.add_tools(hover)
    tweak_figure(fig, "hist", show_yticks)
    fig.yaxis.axis_label = "Frequency"
    _format_axis(fig, df.iloc[0]["left"], df.iloc[-1]["right"], "x")
    if show_yticks:
        fig.xaxis.axis_label = col
        if yscale == "linear":
            _format_axis(fig, 0, df["freq"].max(), "y")

    return fig


def kde_viz(
    hist: Tuple[np.ndarray, np.ndarray],
    kde: np.ndarray,
    col: str,
    plot_width: int,
    plot_height: int,
    kde_cfg: KDE,
) -> Panel:
    """
    Render histogram with overlayed kde
    """
    # pylint: disable=too-many-arguments, too-many-locals
    dens, bins = hist
    intvls = _format_bin_intervals(bins)
    df = pd.DataFrame(
        {
            "intvl": intvls,
            "left": bins[:-1],
            "right": bins[1:],
            "dens": dens,
        }
    )
    fig = Figure(
        plot_width=plot_width,
        plot_height=plot_height,
        title=col,
        toolbar_location=None,
        y_axis_type=kde_cfg.yscale,
    )
    bottom = 0 if kde_cfg.yscale == "linear" or df.empty else df["dens"].min() / 2
    hist = fig.quad(
        source=df,
        left="left",
        right="right",
        bottom=bottom,
        top="dens",
        fill_color=kde_cfg.hist_color,
        line_color=kde_cfg.hist_color,
    )
    hover_hist = HoverTool(
        renderers=[hist],
        tooltips=[("Bin", "@intvl"), ("Density", "@dens")],
        mode="vline",
    )
    pts_rng = np.linspace(df.loc[0, "left"], df.loc[len(df) - 1, "right"], 1000)
    pdf = kde(pts_rng)
    line = fig.line(x=pts_rng, y=pdf, line_color=kde_cfg.line_color, line_width=2, alpha=0.5)
    hover_dist = HoverTool(renderers=[line], tooltips=[("x", "@x"), ("y", "@y")])
    fig.add_tools(hover_hist)
    fig.add_tools(hover_dist)
    tweak_figure(fig, "kde")
    fig.yaxis.axis_label = "Density"
    fig.xaxis.axis_label = col
    _format_axis(fig, df.iloc[0]["left"], df.iloc[-1]["right"], "x")
    if kde_cfg.yscale == "linear":
        _format_axis(fig, 0, max(df["dens"].max(), pdf.max()), "y")
    return Panel(child=row(fig), title="KDE Plot")


def qqnorm_viz(
    qntls: pd.Series,
    mean: float,
    std: float,
    col: str,
    plot_width: int,
    plot_height: int,
    qqnorm: QQNorm,
) -> Panel:
    """
    Render a qq plot
    """
    # pylint: disable=too-many-arguments
    theory_qntls = norm.ppf(np.linspace(0.01, 0.99, 99), mean, std)
    tooltips = [("x", "@x"), ("y", "@y")]
    fig = Figure(
        plot_width=plot_width,
        plot_height=plot_height,
        title=col,
        tools="hover",
        toolbar_location=None,
        tooltips=tooltips,
    )
    fig.circle(
        x=theory_qntls,
        y=qntls,
        size=3,
        color=qqnorm.point_color,
    )
    vals = np.concatenate((theory_qntls, qntls))
    fig.line(x=[vals.min(), vals.max()], y=[vals.min(), vals.max()], color=qqnorm.line_color)
    tweak_figure(fig, "qq")
    fig.xaxis.axis_label = "Normal Quantiles"
    fig.yaxis.axis_label = f"Quantiles of {col}"
    _format_axis(fig, vals.min(), vals.max(), "x")
    _format_axis(fig, vals.min(), vals.max(), "y")
    return Panel(child=row(fig), title="Normal Q-Q Plot")


def box_viz(
    df: pd.DataFrame,
    x: str,
    plot_width: int,
    plot_height: int,
    box: Box,
    y: Optional[str] = None,
    ttl_grps: Optional[int] = None,
) -> Panel:
    """
    Render a box plot visualization
    """
    # pylint: disable=too-many-arguments,too-many-locals,too-many-statements

    if y and ttl_grps:
        width = 0.7
        grp_cnt_stats = {f"{x}_ttl": ttl_grps, f"{x}_shw": len(df)}
        title = _make_title(grp_cnt_stats, x, y) if ttl_grps else f"{y} by {x}"
    elif y:
        width, title = 0.93, f"{y} by {x}"
        endpts = [grp.left for grp in df["grp"]] + [df["grp"][len(df) - 1].right]
        df["grp"] = df["grp"].astype(str)
    else:
        width, title = 0.7, f"{x}"
    df["x0"], df["x1"] = df.index + 0.2, df.index + 0.8

    fig = figure(
        plot_width=plot_width,
        plot_height=plot_height,
        title=title,
        toolbar_location=None,
        x_range=df["grp"],
    )
    low = fig.segment(x0="x0", y0="lw", x1="x1", y1="lw", line_color="black", source=df)
    ltail = fig.segment(x0="grp", y0="lw", x1="grp", y1="q1", line_color="black", source=df)
    lbox = fig.vbar(
        x="grp",
        width=width,
        top="q2",
        bottom="q1",
        fill_color=box.color,
        line_color="black",
        source=df,
    )
    ubox = fig.vbar(
        x="grp",
        width=width,
        top="q3",
        bottom="q2",
        fill_color=box.color,
        line_color="black",
        source=df,
    )
    utail = fig.segment(x0="grp", y0="uw", x1="grp", y1="q3", line_color="black", source=df)
    upw = fig.segment(x0="x0", y0="uw", x1="x1", y1="uw", line_color="black", source=df)

    df.loc[df["otlrs"].isna(), "otlrs"] = pd.Series(
        [[]] * df["otlrs"].isna().sum(), dtype=np.float64
    ).values
    otlrs = [otl for otls in df["otlrs"] for otl in otls]
    if otlrs:
        gps = [grp for grp, ols in zip(df["grp"], df["otlrs"]) for _ in range(len(ols))]
        circ = fig.circle(
            x=gps,
            y=otlrs,
            size=3,
            line_color="black",
            color="black",
            fill_alpha=0.6,
        )
        fig.add_tools(
            HoverTool(
                renderers=[circ],
                tooltips=[("Outlier", "@y")],
            )
        )
    tooltips = [
        ("Upper Whisker", "@uw"),
        ("Upper Quartile", "@q3"),
        ("Median", "@q2"),
        ("Lower Quartile", "@q1"),
        ("Lower Whisker", "@lw"),
    ]
    if y:
        lbl = f"{x}" if ttl_grps else "Bin"
        tooltips.insert(0, (lbl, "@grp"))
    fig.add_tools(
        HoverTool(
            renderers=[upw, utail, ubox, lbox, ltail, low],
            tooltips=tooltips,
        )
    )
    tweak_figure(fig, "box")
    if y is None:
        fig.xaxis.major_tick_line_color = None
        fig.xaxis.major_label_text_font_size = "0pt"
    fig.xaxis.axis_label = x if y is not None else None
    fig.yaxis.axis_label = x if y is None else y

    minw = min(otlrs) if otlrs else np.nan
    maxw = max(otlrs) if otlrs else np.nan
    _format_axis(fig, min(df["lw"].min(), minw), max(df["uw"].max(), maxw), "y")

    if y and not ttl_grps:  # format categorical axis tick values
        # start by rounding to the length of the largest possible number
        round_to = -len(str(max([abs(int(ept)) for ept in endpts])))
        ticks = np.round(endpts, round_to)
        nticks = len(df) // 5 + 1
        show_ticks = [ticks[i] for i in range(len(ticks)) if i % nticks == 0]
        while len(set(show_ticks)) != len(show_ticks):  # round until show ticks unique
            round_to += 1
            ticks = np.round(endpts, round_to)
            show_ticks = [ticks[i] for i in range(len(ticks)) if i % nticks == 0]
        # format the ticks
        ticks = [int(tick) if tick.is_integer() else tick for tick in ticks]
        ticks = _format_ticks(ticks)
        fig.xaxis.ticker = list(range(len(df) + 1))
        fig.xaxis.formatter = FuncTickFormatter(  # overide bokeh ticks
            args={"vals": ticks, "mod": nticks},
            code="""
                if (index % mod == 0) return vals[index];
                return "";
            """,
        )
        tweak_figure(fig, "boxnum")
        fig.xaxis.major_label_text_font_size = "10pt"

    return Panel(child=row(fig), title="Box Plot")


def latlong_viz(
    df: pd.DataFrame,
    plot_width: int,
    y: Optional[str] = None,
) -> Panel:
    """
    Render a latlong plot visualization
    """
    # pylint: disable=too-many-arguments,too-many-locals,too-many-statements
    # pylint: disable=too-many-function-args
    # title = f"{y} by {x}"

    # no_name=[]
    tooltip_1 = [("Name", "@name"), ("(Long, Lat)", "($x, $y)")]
    tooltip_2 = [(y, "@sizes_ori")]

    fig = Figure(
        plot_width=plot_width,
        plot_height=plot_width // 10 * 7,
        # tools=tools,
    )
    fig.grid.grid_line_color = None
    fig.hover.point_policy = "follow_mouse"
    fig.background_fill_color = "white"
    fig.x_range = Range1d(start=-180, end=180)
    fig.y_range = Range1d(start=-90, end=90)
    world_map = fig.patches("xs", "ys", line_color="white", source=MAPS, line_width=0.5)
    fig.add_tools(HoverTool(renderers=[world_map], tooltips=tooltip_1))
    lat = [item[0] for item in df.index]
    lon = [item[1] for item in df.index]
    minimum = min(df[y])
    maximum = max(df[y])
    num_col = (df[y] - minimum) / (maximum - minimum) * 20
    num_col = num_col.apply(lambda a: a + 5 if a > 0 else a)  # Normalization to {0, [5,25]}
    source = ColumnDataSource(data=dict(lat=lat, lon=lon, sizes=num_col, sizes_ori=df[y]))
    my_dots = fig.circle(
        x="lon", y="lat", size="sizes", fill_color="red", fill_alpha=0.8, source=source
    )
    fig.add_tools(HoverTool(renderers=[my_dots], tooltips=tooltip_2))

    return Panel(child=row(fig), title="Geo Map")


def geo_viz(
    df: pd.DataFrame,
    plot_width: int,
    y: Optional[str] = None,
) -> Panel:
    """
    Render a geo plot visualization
    """
    # pylint: disable=too-many-arguments,too-many-locals,too-many-statements
    # pylint: disable=too-many-function-args

    # title = f"{y} by {x}"

    minimum = min(df[y])
    maximum = max(df[y])

    # no_name=[]
    value = {}
    names = NAME_DICT.keys()
    for i in range(df[y].shape[0]):
        if df.index[i].lower().strip() in names:
            value[NAME_DICT[df.index[i].lower().strip()]] = df[y][i]
        # else:
        #     no_name.append(df.index[i])

    temp_list = []
    for itr in range(len(MAPS["name"])):
        temp_list.append(value.get(MAPS["fip"][itr], "unknown"))
    MAPS["value"] = temp_list

    mapper = LinearColorMapper(
        palette=YlGnBu[33:233], low=minimum, high=maximum, nan_color="#cccccc"
    )
    tools = "pan,wheel_zoom,box_zoom,reset,hover"

    fig = Figure(
        plot_width=plot_width,
        plot_height=plot_width // 10 * 7,
        tools=tools,
        tooltips=[
            ("Name", "@name"),
            (y, "@value"),
            ("(Long, Lat)", "($x, $y)"),
        ],
    )
    fig.grid.grid_line_color = None
    fig.hover.point_policy = "follow_mouse"
    fig.background_fill_color = "white"
    fig.x_range = Range1d(start=-180, end=180)
    fig.y_range = Range1d(start=-90, end=90)
    fig.patches(
        "xs",
        "ys",
        line_color="white",
        source=MAPS,
        fill_color={"field": "value", "transform": mapper},
        line_width=0.5,
    )

    color_bar = ColorBar(
        color_mapper=mapper,
        major_label_text_font_size="7px",
        ticker=BasicTicker(desired_num_ticks=11),
        formatter=PrintfTickFormatter(format="%10.2f"),
        label_standoff=6,
        border_line_color=None,
        location=(0, 0),
    )
    if minimum < maximum:
        fig.add_layout(color_bar, "right")

    return Panel(child=row(fig), title="World Map")


# this function should be removed when datetime is refactored
def box_viz_dt(
    df: pd.DataFrame,
    outx: List[str],
    outy: List[float],
    x: str,
    plot_width: int,
    plot_height: int,
    y: Optional[str] = None,
    grp_cnt_stats: Optional[Dict[str, int]] = None,
    timeunit: Optional[str] = None,
) -> Panel:
    """
    Render a box plot visualization
    """
    # pylint: disable=too-many-arguments,too-many-locals,too-many-statements
    if y:
        width = 0.7 if grp_cnt_stats else 0.93
        title = _make_title(grp_cnt_stats, x, y) if grp_cnt_stats else f"{y} by {x}"
    else:
        width = 0.7
        title = f"{x}"

    if len(df) > 10:
        plot_width = 39 * len(df)
    fig = figure(
        tools="",
        x_range=list(df["grp"]),
        toolbar_location=None,
        title=title,
        plot_width=plot_width,
        plot_height=plot_height,
    )
    utail = fig.segment(x0="grp", y0="uw", x1="grp", y1="q3", line_color="black", source=df)
    ltail = fig.segment(x0="grp", y0="lw", x1="grp", y1="q1", line_color="black", source=df)
    ubox = fig.vbar(
        x="grp",
        width=width,
        top="q3",
        bottom="q2",
        fill_color=CATEGORY20[0],
        line_color="black",
        source=df,
    )
    lbox = fig.vbar(
        x="grp",
        width=width,
        top="q2",
        bottom="q1",
        fill_color=CATEGORY20[0],
        line_color="black",
        source=df,
    )
    loww = fig.segment(x0="x0", y0="lw", x1="x1", y1="lw", line_color="black", source=df)
    upw = fig.segment(x0="x0", y0="uw", x1="x1", y1="uw", line_color="black", source=df)
    if outx:
        circ = fig.circle(  # pylint: disable=too-many-function-args
            outx, outy, size=3, line_color="black", color=CATEGORY20[6], fill_alpha=0.6
        )
        fig.add_tools(
            HoverTool(
                renderers=[circ],
                tooltips=[("Outlier", "@y")],
            )
        )
    tooltips = [
        ("Upper Whisker", "@uw"),
        ("Upper Quartile", "@q3"),
        ("Median", "@q2"),
        ("Lower Quartile", "@q1"),
        ("Lower Whisker", "@lw"),
    ]
    if grp_cnt_stats is None and y is not None:
        lbl = timeunit if timeunit else "Bin"
        tooltips.insert(0, (lbl, "@grp"))
    fig.add_tools(
        HoverTool(
            renderers=[upw, utail, ubox, lbox, ltail, loww],
            tooltips=tooltips,
            point_policy="follow_mouse",
        )
    )
    tweak_figure(fig, "box")
    if y is None:
        fig.xaxis.major_tick_line_color = None
        fig.xaxis.major_label_text_font_size = "0pt"
    fig.xaxis.axis_label = x if y is not None else None
    fig.yaxis.axis_label = x if y is None else y

    minw = min(outy) if outy else np.nan
    maxw = max(outy) if outy else np.nan
    _format_axis(fig, min(df["lw"].min(), minw), max(df["uw"].max(), maxw), "y")

    if not grp_cnt_stats and y and not timeunit:  # format categorical axis tick values
        endpts = list(df["lb"]) + [df.iloc[len(df) - 1]["ub"]]
        # start by rounding to the length of the largest possible number
        round_to = -len(str(max([abs(int(ept)) for ept in endpts])))
        ticks = np.round(endpts, round_to)
        nticks = len(df) // 5 + 1
        show_ticks = [ticks[i] for i in range(len(ticks)) if i % nticks == 0]
        while len(set(show_ticks)) != len(show_ticks):  # round until show ticks unique
            round_to += 1
            ticks = np.round(endpts, round_to)
            show_ticks = [ticks[i] for i in range(len(ticks)) if i % nticks == 0]
        # format the ticks
        ticks = [int(tick) if tick.is_integer() else tick for tick in ticks]
        ticks = _format_ticks(ticks)
        fig.xaxis.ticker = list(range(len(df) + 1))
        fig.xaxis.formatter = FuncTickFormatter(  # overide bokeh ticks
            args={"vals": ticks, "mod": nticks},
            code="""
                if (index % mod == 0) return vals[index];
                return "";
            """,
        )
        tweak_figure(fig, "boxnum")
        fig.xaxis.major_label_text_font_size = "10pt"

    if timeunit == "Week of":
        fig.xaxis.axis_label = x + ", the week of"

    return Panel(child=row(fig), title="Box Plot")


def line_viz(
    df: pd.DataFrame,
    x: str,
    y: str,
    yscale: str,
    plot_width: int,
    plot_height: int,
    ttl_grps: int,
) -> Panel:
    """
    Render multi-line chart
    """
    # pylint: disable=too-many-arguments,too-many-locals
    palette = CATEGORY20 * (len(df) // len(CATEGORY20) + 1)
    title = _make_title({f"{x}_ttl": ttl_grps, f"{x}_shw": len(df)}, x, y)
    df.index = df.index.astype(str)

    fig = figure(
        plot_height=plot_height,
        plot_width=plot_width,
        title=title,
        toolbar_location=None,
        tools=[],
        y_axis_type=yscale,
    )

    # bin endpoints for all histograms
    bins = df[0].iloc[0][1]
    # plot the value for a histgram bin at its midpoint
    ticks = [(bins[i] + bins[i + 1]) / 2 for i in range(len(bins) - 1)]
    # format the bin intervals
    intvls = _format_bin_intervals(bins)

    lns: Dict[str, Figure] = {}
    # add the lines
    for grp, (cnts, _), color in zip(df.index, df[0], palette):
        grp_name = (grp[:14] + "...") if len(grp) > 15 else grp
        source = ColumnDataSource({"x": ticks, "y": cnts, "intvls": intvls})
        lns[grp_name] = fig.line(x="x", y="y", color=color, source=source)
        tooltips = [(f"{x}", f"{grp}"), ("Frequency", "@y"), (f"{y} bin", "@intvls")]
        fig.add_tools(HoverTool(renderers=[lns[grp_name]], tooltips=tooltips))

    fig.add_layout(Legend(items=[(x, [lns[x]]) for x in lns]), "left")
    tweak_figure(fig)
    fig.yaxis.axis_label = "Frequency"
    fig.xaxis.axis_label = y
    _format_axis(fig, bins[0], bins[-1], "x")
    if yscale == "linear":
        yvals = [val for cnts, _ in df[0] for val in cnts]
        _format_axis(fig, min(yvals), max(yvals), "y")

    return Panel(child=row(fig), title="Line Chart")


def scatter_viz(
    df: pd.DataFrame,
    x: str,
    y: str,
    sample_sr_and_name: Tuple[Union[int, float], str],
    plot_width: int,
    plot_height: int,
) -> Any:
    """
    Render a scatter plot
    """
    # pylint: disable=too-many-arguments
    if sample_sr_and_name[1] == "sample size":
        title = (
            f"{y} by {x}"
            if len(df) < sample_sr_and_name[0]
            else f"{y} by {x} (sample size {sample_sr_and_name[0]})"
        )
    elif sample_sr_and_name[1] == "sample rate":
        title = f"{y} by {x} (sample rate {sample_sr_and_name[0]})"
    else:
        raise RuntimeError("parameter name should be either 'sample size' or 'sample rate'")

    tooltips = [("(x, y)", f"(@{{{x}}}, @{{{y}}})")]
    fig = figure(
        tools="hover",
        title=title,
        toolbar_location=None,
        tooltips=tooltips,
        plot_width=plot_width,
        plot_height=plot_height,
    )
    fig.circle(x, y, color=CATEGORY20[0], source=df)  # pylint: disable=too-many-function-args
    tweak_figure(fig)
    fig.xaxis.axis_label = x
    fig.yaxis.axis_label = y
    _format_axis(fig, df[x].min(), df[x].max(), "x")
    _format_axis(fig, df[y].min(), df[y].max(), "y")
    return Panel(child=fig, title="Scatter Plot")


def hexbin_viz(
    df: pd.DataFrame,
    x: str,
    y: str,
    plot_width: int,
    plot_height: int,
    tile_size: str,
    aspect_scale: float,
) -> Panel:
    """
    Render a hexbin plot
    """
    # pylint: disable=too-many-arguments,too-many-locals
    xmin, xmax = df[x].min(), df[x].max()
    ymin, ymax = df[y].min(), df[y].max()
    if tile_size == "auto":
        tile_size = (xmax - xmin) / 25
    title = f"{y} by {x}"
    aspect_scale = (ymax - ymin) / (xmax - xmin + 1e-9)
    bins = hexbin(
        x=df[x],
        y=df[y],
        size=tile_size,
        orientation="flattop",
        aspect_scale=aspect_scale,
    )
    fig = figure(
        title=title,
        tools=[],
        match_aspect=False,
        background_fill_color="#f5f5f5",
        toolbar_location=None,
        plot_width=plot_width,
        plot_height=plot_height,
    )

    palette = list(reversed(VIRIDIS))
    rend = fig.hex_tile(
        q="q",
        r="r",
        size=tile_size,
        line_color=None,
        source=bins,
        orientation="flattop",
        fill_color=linear_cmap(
            field_name="counts",
            palette=palette,
            low=min(bins.counts),
            high=max(bins.counts),
        ),
        aspect_scale=aspect_scale,
    )
    fig.add_tools(
        HoverTool(
            tooltips=[("Count", "@counts")],
            renderers=[rend],
        )
    )
    mapper = LinearColorMapper(palette=palette, low=min(bins.counts), high=max(bins.counts))
    color_bar = ColorBar(color_mapper=mapper, width=8, location=(0, 0))
    color_bar.label_standoff = 8
    fig.add_layout(color_bar, "left")
    tweak_figure(fig, "hex")
    _format_axis(fig, df[x].min(), df[x].max(), "x")
    _format_axis(fig, df[y].min(), df[y].max(), "y")
    fig.xaxis.axis_label = x
    fig.yaxis.axis_label = y

    return Panel(child=fig, title="Hexbin Plot")


def nested_viz(
    df: pd.DataFrame,
    x: str,
    y: str,
    grp_cnt_stats: Dict[str, int],
    plot_width: int,
    plot_height: int,
) -> Panel:
    """
    Render a nested bar chart
    """
    # pylint: disable=too-many-arguments
    df["grp_names"] = list(zip(df[x], df[y]))
    data_source = ColumnDataSource(data=df)
    title = _make_title(grp_cnt_stats, x, y)
    plot_width = 19 * len(df) if len(df) > 50 else plot_width
    fig = figure(
        plot_height=plot_height,
        plot_width=plot_width,
        title=title,
        toolbar_location=None,
        tools="hover",
        tooltips=[(f"{x}", f"@{{{x}}}"), (f"{y}", f"@{{{y}}}"), ("Count", "@cnt")],
        x_range=FactorRange(*df["grp_names"]),
    )

    fig.vbar(
        x="grp_names",
        top="cnt",
        width=1,
        source=data_source,
        line_color="white",
        line_width=3,
    )
    tweak_figure(fig, "nested")
    fig.yaxis.axis_label = "Count"
    fig.xaxis.major_label_orientation = np.pi / 2
    _format_axis(fig, 0, df["cnt"].max(), "y")
    return Panel(child=fig, title="Nested Bar Chart")


def stacked_viz(
    df: pd.DataFrame,
    x: str,
    y: str,
    grp_cnt_stats: Dict[str, int],
    plot_width: int,
    plot_height: int,
    timeunit: Optional[str] = None,
) -> Panel:
    """
    Render a stacked bar chart
    """
    # pylint: disable=too-many-arguments,too-many-locals
    # percent
    df2 = df.div(df.sum(axis=1), axis=0) * 100
    df.columns = [f"{col}_cnt" for col in df.columns]
    # final dataframe contains percent and count
    df = pd.concat([df2, df], axis=1)

    title = _make_title(grp_cnt_stats, x, y)
    if not timeunit:
        if grp_cnt_stats[f"{x}_shw"] > 30:
            plot_width = 32 * grp_cnt_stats[f"{x}_shw"]
    else:
        if len(df) > 30:
            plot_width = 32 * len(df)

    fig = figure(
        plot_height=plot_height,
        plot_width=plot_width,
        title=title,
        toolbar_location=None,
        x_range=list(df.index),
    )
    grps = list(df2.columns)
    palette = PASTEL1 * (len(grps) // len(PASTEL1) + 1)
    if "Others" in grps:
        colours = palette[0 : len(grps) - 1] + ("#636363",)
    else:
        colours = palette[0 : len(grps)]
    source = ColumnDataSource(data=df)
    renderers = fig.vbar_stack(
        stackers=grps,
        x="index",
        width=0.9,
        source=source,
        line_width=1,
        color=colours,
    )
    grps = [(grp[:14] + "...") if len(grp) > 15 else grp for grp in grps]

    legend = Legend(items=[(grp, [rend]) for grp, rend in zip(grps, renderers)])
    legend.label_text_font_size = "8pt"
    fig.add_layout(legend, "right")

    if not timeunit:
        # include percent and count in the tooltip
        formatter = CustomJSHover(
            args=dict(source=source),
            code="""
            const cur_bar = special_vars.data_x - 0.5
            const name_cnt = special_vars.name + '_cnt'
            return source.data[name_cnt][cur_bar] + '';
        """,
        )
        for rend in renderers:
            hover = HoverTool(
                tooltips=[
                    (x, "@index"),
                    (y, "$name"),
                    ("Percentage", "@$name%"),
                    ("Count", "@{%s}{custom}" % rend.name),
                ],
                formatters={"@{%s}" % rend.name: formatter},
                renderers=[rend],
            )
            fig.add_tools(hover)
        fig.yaxis.axis_label = "Percent"
    else:
        # below is for having percent and count in the tooltip
        formatter = CustomJSHover(
            args=dict(source=source),
            code="""
            const columns = Object.keys(source.data)
            const cur_bar = special_vars.data_x - 0.5
            var ttl_bar = 0
            for (let i = 0; i < columns.length; i++) {
                if (columns[i] != 'index'){
                    ttl_bar = ttl_bar + source.data[columns[i]][cur_bar]
                }
            }
            const cur_val = source.data[special_vars.name][cur_bar]
            return (cur_val/ttl_bar * 100).toFixed(2)+'%';
        """,
        )
        for rend in renderers:
            hover = HoverTool(
                tooltips=[
                    (y, "$name"),
                    (timeunit, "@index"),
                    ("Count", "@$name"),
                    ("Percent", "@{%s}{custom}" % rend.name),
                ],
                formatters={"@{%s}" % rend.name: formatter},
                renderers=[rend],
            )
            fig.add_tools(hover)
        fig.yaxis.axis_label = "Count"
        _format_axis(fig, 0, df.sum(axis=1).max(), "y")
        fig.xaxis.axis_label = x
        if timeunit == "Week of":
            fig.xaxis.axis_label = x + ", the week of"

    tweak_figure(fig, "stacked")

    return Panel(child=fig, title="Stacked Bar Chart")


def heatmap_viz(
    df: pd.DataFrame,
    x: str,
    y: str,
    grp_cnt_stats: Dict[str, int],
    plot_width: int,
    plot_height: int,
) -> Panel:
    """
    Render a heatmap
    """
    # pylint: disable=too-many-arguments
    title = _make_title(grp_cnt_stats, x, y)

    source = ColumnDataSource(data=df)
    palette = RDBU[(len(RDBU) // 2 - 1) :]
    mapper = LinearColorMapper(palette=palette, low=df["cnt"].min() - 0.01, high=df["cnt"].max())
    if grp_cnt_stats[f"{x}_shw"] > 60:
        plot_width = 16 * grp_cnt_stats[f"{x}_shw"]
    if grp_cnt_stats[f"{y}_shw"] > 10:
        plot_height = 70 + 18 * grp_cnt_stats[f"{y}_shw"]
    fig = figure(
        x_range=sorted(list(set(df[x]))),
        y_range=sorted(list(set(df[y]))),
        toolbar_location=None,
        tools=[],
        x_axis_location="below",
        title=title,
        plot_width=plot_width,
        plot_height=plot_height,
    )

    renderer = fig.rect(
        x=x,
        y=y,
        width=1,
        height=1,
        source=source,
        line_color=None,
        fill_color=transform("cnt", mapper),
    )

    color_bar = ColorBar(
        color_mapper=mapper,
        location=(0, 0),
        ticker=BasicTicker(desired_num_ticks=7),
        formatter=PrintfTickFormatter(format="%d"),
    )
    fig.add_tools(
        HoverTool(
            tooltips=[
                (x, f"@{{{x}}}"),
                (y, f"@{{{y}}}"),
                ("Count", "@cnt"),
            ],
            mode="mouse",
            renderers=[renderer],
        )
    )
    fig.add_layout(color_bar, "right")

    tweak_figure(fig, "heatmap")
    fig.yaxis.formatter = FuncTickFormatter(
        code="""
            if (tick.length > 15) return tick.substring(0, 14) + '...';
            else return tick;
        """
    )
    return Panel(child=fig, title="Heat Map")


def dt_line_viz(
    df: pd.DataFrame,
    x: str,
    timeunit: str,
    yscale: str,
    plot_width: int,
    plot_height: int,
    show_yticks: bool,
    miss_pct: Optional[float] = None,
    y: Optional[str] = None,
) -> Figure:
    """
    Render a line chart
    """
    # pylint: disable=too-many-arguments
    if miss_pct is not None:
        title = f"{x} ({miss_pct}% missing)" if miss_pct > 0 else f"{x}"
        tooltips = [(timeunit, "@lbl"), ("Frequency", "@freq"), ("Percent", "@pct%")]
        agg = "freq"
    else:
        title = title = f"{df.columns[1]} of {y} by {x}"
        agg = f"{df.columns[1]}"
        tooltips = [(timeunit, "@lbl"), (agg, f"@{df.columns[1]}")]
    fig = Figure(
        plot_width=plot_width,
        plot_height=plot_height,
        toolbar_location=None,
        title=title,
        tools=[],
        y_axis_type=yscale,
        x_axis_type="datetime",
    )
    fig.line(
        source=df,
        x=x,
        y=agg,
        line_width=2,
        line_alpha=0.8,
        color="#7e9ac8",
    )
    hover = HoverTool(
        tooltips=tooltips,
        mode="vline",
    )
    fig.add_tools(hover)

    tweak_figure(fig, "line", show_yticks)
    if show_yticks and yscale == "linear":
        _format_axis(fig, 0, df[agg].max(), "y")

    if y:
        fig.yaxis.axis_label = f"{df.columns[1]} of {y}"
        fig.xaxis.axis_label = x
        return Panel(child=fig, title="Line Chart")

    fig.yaxis.axis_label = "Frequency"
    return fig


def dt_multiline_viz(
    data: Dict[str, Tuple[np.ndarray, np.ndarray, List[str]]],
    x: str,
    y: str,
    timeunit: str,
    yscale: str,
    plot_width: int,
    plot_height: int,
    grp_cnt_stats: Dict[str, int],
    max_lbl_len: int = 15,
    z: Optional[str] = None,
    agg: Optional[str] = None,
) -> Panel:
    """
    Render multi-line chart
    """
    # pylint: disable=too-many-arguments,too-many-locals
    grps = list(data.keys())
    palette = CATEGORY20 * (len(grps) // len(CATEGORY20) + 1)
    if z is None:
        title = _make_title(grp_cnt_stats, x, y)
    else:
        title = f"{agg} of {_make_title(grp_cnt_stats, z, y)} over {x}"
    agg = "Frequency" if agg is None else agg

    fig = figure(
        tools=[],
        title=title,
        toolbar_location=None,
        plot_width=plot_width,
        plot_height=plot_height,
        y_axis_type=yscale,
        x_axis_type="datetime",
    )

    ymin, ymax = np.Inf, -np.Inf
    plot_dict = dict()
    for grp, colour in zip(grps, palette):
        grp_name = (grp[: (max_lbl_len - 1)] + "...") if len(grp) > max_lbl_len else grp
        source = ColumnDataSource({"x": data[grp][1], "y": data[grp][0], "lbl": data[grp][2]})
        plot_dict[grp_name] = fig.line(x="x", y="y", source=source, color=colour, line_width=1.3)
        fig.add_tools(
            HoverTool(
                renderers=[plot_dict[grp_name]],
                tooltips=[
                    (f"{y}", f"{grp}"),
                    (agg, "@y"),
                    (timeunit, "@lbl"),
                ],
                mode="mouse",
            )
        )
        ymin, ymax = min(ymin, min(data[grp][0])), max(ymax, max(data[grp][0]))

    legend = Legend(items=[(x, [plot_dict[x]]) for x in plot_dict])
    tweak_figure(fig, "line", True)
    fig.add_layout(legend, "right")
    fig.legend.click_policy = "hide"
    fig.yaxis.axis_label = f"{agg} of {y}" if z else "Frequency"
    fig.xaxis.axis_label = x
    if yscale == "linear":
        _format_axis(fig, ymin, ymax, "y")

    return Panel(child=fig, title="Line Chart")


def format_ov_stats(stats: Dict[str, Any]) -> Tuple[Dict[str, str], Dict[str, Any]]:
    """
    Render statistics information for distribution grid
    """
    # pylint: disable=too-many-locals
    nrows, ncols, npresent_cells, nrows_wo_dups, mem_use, dtypes_cnt = stats.values()
    ncells = nrows * ncols

    data = {
        "Number of Variables": ncols,
        "Number of Rows": nrows,
        "Missing Cells": float(ncells - npresent_cells),
        "Missing Cells (%)": 1 - (npresent_cells / ncells),
        "Duplicate Rows": nrows - nrows_wo_dups,
        "Duplicate Rows (%)": 1 - (nrows_wo_dups / nrows),
        "Total Size in Memory": float(mem_use),
        "Average Row Size in Memory": mem_use / nrows,
    }
    return {k: _format_values(k, v) for k, v in data.items()}, dtypes_cnt


def format_num_stats(data: Dict[str, Any]) -> Dict[str, Dict[str, str]]:
    """
    Format numerical statistics
    """
    overview = {
        "Approximate Distinct Count": data["nuniq"],
        "Approximate Unique (%)": data["nuniq"] / data["npres"],
        "Missing": data["nrows"] - data["npres"],
        "Missing (%)": 1 - (data["npres"] / data["nrows"]),
        "Infinite": (data["npres"] - data["nreals"]),
        "Infinite (%)": (data["npres"] - data["nreals"]) / data["nrows"],
        "Memory Size": data["mem_use"],
        "Mean": data["mean"],
        "Minimum": data["min"],
        "Maximum": data["max"],
        "Zeros": data["nzero"],
        "Zeros (%)": data["nzero"] / data["nrows"],
        "Negatives": data["nneg"],
        "Negatives (%)": data["nneg"] / data["nrows"],
    }
    data["qntls"].index = np.round(data["qntls"].index, 2)
    quantile = {
        "Minimum": data["min"],
        "5-th Percentile": data["qntls"].loc[0.05],
        "Q1": data["qntls"].loc[0.25],
        "Median": data["qntls"].loc[0.50],
        "Q3": data["qntls"].loc[0.75],
        "95-th Percentile": data["qntls"].loc[0.95],
        "Maximum": data["max"],
        "Range": data["max"] - data["min"],
        "IQR": data["qntls"].loc[0.75] - data["qntls"].loc[0.25],
    }
    descriptive = {
        "Mean": data["mean"],
        "Standard Deviation": data["std"],
        "Variance": data["std"] ** 2,
        "Sum": data["mean"] * data["npres"],
        "Skewness": float(data["skew"]),
        "Kurtosis": float(data["kurt"]),
        "Coefficient of Variation": data["std"] / data["mean"] if data["mean"] != 0 else np.nan,
    }

    return {
        "Overview": {k: _format_values(k, v) for k, v in overview.items()},
        "Quantile Statistics": {k: _format_values(k, v) for k, v in quantile.items()},
        "Descriptive Statistics": {k: _format_values(k, v) for k, v in descriptive.items()},
    }


def format_cat_stats(
    stats: Dict[str, Any],
    len_stats: Dict[str, Any],
    letter_stats: Dict[str, Any],
) -> Dict[str, Dict[str, str]]:
    """
    Format categorical statistics
    """
    ov_stats = {
        "Approximate Distinct Count": stats["nuniq"],
        "Approximate Unique (%)": stats["nuniq"] / stats["npres"],
        "Missing": stats["nrows"] - stats["npres"],
        "Missing (%)": 1 - stats["npres"] / stats["nrows"],
        "Memory Size": stats["mem_use"],
    }
    sampled_rows = ("1st row", "2nd row", "3rd row", "4th row", "5th row")
    smpl = dict(zip(sampled_rows, stats["first_rows"]))

    return {
        "Overview": {k: _format_values(k, v) for k, v in ov_stats.items()},
        "Length": {k: _format_values(k, v) for k, v in len_stats.items()},
        "Sample": {k: f"{v[:18]}..." if len(v) > 18 else v for k, v in smpl.items()},
        "Letter": {k: _format_values(k, v) for k, v in letter_stats.items()},
    }


def stats_viz_dt(stats: Dict[str, Any]) -> Dict[str, Dict[str, str]]:
    """
    Render statistics panel for datetime data
    """

    return {"Overview": {k: _format_values(k, v) for k, v in stats.items()}}


def render_distribution_grid(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Create visualizations for plot(df)
    """
    # pylint: disable=too-many-locals
    plot_width = cfg.plot.width if cfg.plot.width is not None else 324
    plot_height = cfg.plot.height if cfg.plot.height is not None else 300

    figs: List[Figure] = []
    htgs: Dict[str, Any] = {}
    nrows = itmdt["stats"]["nrows"]
    titles: List[str] = []
    for col, dtp, data in itmdt["data"]:
        if isinstance(dtp, (Nominal, GeoGraphy, SmallCardNum, GeoPoint)):
            df, ttl_grps = data
            fig = bar_viz(
                df,
                ttl_grps,
                nrows,
                col,
                plot_width,
                plot_height,
                False,
                cfg.bar,
            )
            htgs[col] = cfg.bar.grid_how_to_guide()
        elif isinstance(dtp, Continuous):
            fig = hist_viz(
                data, nrows, col, cfg.hist.yscale, cfg.hist.color, plot_width, plot_height, False
            )
            htgs[col] = cfg.hist.grid_how_to_guide()
        elif isinstance(dtp, DateTime):
            df, timeunit, miss_pct = data
            fig = dt_line_viz(
                df, col, timeunit, cfg.line.yscale, plot_width, plot_height, False, miss_pct
            )
        else:
            raise ValueError(f"unprocessed col:{col}, type:{dtp}")
        fig.frame_height = plot_height
        titles.append(fig.title.text)
        fig.title.text = ""
        figs.append(fig)

    if cfg.insight.enable and cfg.stats.enable:
        toggle_content = "Stats and Insights"
    elif cfg.stats.enable:
        toggle_content = "Stats"
    else:
        toggle_content = "Insights"
    return {
        "layout": figs,
        "meta": titles,
        "tabledata": format_ov_stats(itmdt["stats"]) if cfg.stats.enable else None,
        "overview_insights": itmdt["overview_insights"] if cfg.insight.enable else None,
        "column_insights": itmdt["column_insights"] if cfg.insight.enable else None,
        "container_width": plot_width * 3,
        "toggle_content": toggle_content,
        "how_to_guide": htgs,
    }


def render_cat(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Create visualizations for plot(df, Nominal)
    """
    # pylint: disable=too-many-locals,too-many-branches

    if cfg.plot.report:
        plot_width = 450
        plot_height = 400
        plot_width_bar = 280
        plot_height_bar = 248
    else:
        plot_width = cfg.plot.width if cfg.plot.width is not None else 450
        plot_height = cfg.plot.height if cfg.plot.height is not None else 400
        plot_width_bar = plot_width
        plot_height_bar = plot_height
    tabs: List[Panel] = []
    htgs: Dict[str, List[Tuple[str, str]]] = {}
    col, data = itmdt["col"], itmdt["data"]
    # overview, word length, and charater level statistcs
    if cfg.stats.enable:
        stats, len_stats, letter_stats = (
            data["stats"],
            data["len_stats"],
            data["letter_stats"],
        )
    if cfg.bar.enable:
        fig = bar_viz(
            data["bar"].to_frame(),
            data["nuniq"],
            data["nrows"],
            col,
            plot_width_bar,
            plot_height_bar,
            True,
            cfg.bar,
        )
        tabs.append(Panel(child=row(fig), title="Bar Chart"))
        htgs["Bar Chart"] = cfg.bar.how_to_guide(plot_height, plot_width)
    if cfg.pie.enable:
        fig, color_list = pie_viz(
            data["pie"].to_frame(), data["nrows"], col, plot_width, plot_height, cfg.pie
        )
        tabs.append(fig)
        htgs["Pie Chart"] = cfg.pie.how_to_guide(color_list, plot_height, plot_width)
    if cfg.wordcloud.enable:
        if data["nuniq_words_cloud"] > 0:
            tabs.append(wordcloud_viz(data["word_cnts_cloud"], plot_width, plot_height))
            htgs["Word Cloud"] = cfg.wordcloud.how_to_guide(plot_height, plot_width)
    if cfg.wordfreq.enable:
        if data["nwords_freq"] > 0:
            tabs.append(
                wordfreq_viz(
                    data["word_cnts_freq"],
                    data["nwords_freq"],
                    plot_width,
                    plot_height,
                    cfg.wordfreq,
                )
            )
            htgs["Word Frequency"] = cfg.wordfreq.how_to_guide(plot_height, plot_width)
    if cfg.wordlen.enable:
        length_dist = hist_viz(
            data["len_hist"],
            data["nrows"],
            "Word Length",
            cfg.wordlen.yscale,
            cfg.wordlen.color,
            plot_width,
            plot_height,
            True,
        )
        tabs.append(Panel(child=row(length_dist), title="Word Length"))
        htgs["Word Length"] = cfg.wordlen.how_to_guide(plot_height, plot_width)

    if cfg.value_table.enable:
        htgs["Value Table"] = cfg.value_table.how_to_guide()
        stats = data["stats"]
        value_table = _value_table(
            data["value_table"], stats["nrows"], stats["npres"], stats["nuniq"]
        )
    else:
        value_table = []

    # panel.child.children[0] is a figure
    for panel in tabs[0:]:
        panel.child.children[0].frame_width = int(plot_width * 0.9)

    if len(tabs) > 0:
        tabs[0].child.children[0].frame_width = int(plot_width_bar * 0.9)

    return {
        "tabledata": format_cat_stats(stats, len_stats, letter_stats) if cfg.stats.enable else [],
        "value_table": value_table,
        "insights": nom_insights(data, col, cfg) if cfg.insight.enable else [],
        "layout": [panel.child.children[0] for panel in tabs],
        "meta": [tab.title for tab in tabs],
        "container_width": plot_width + 110,
        "how_to_guide": htgs,
    }


def render_geo(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Create visualizations for plot(df, GeoGraphy)
    """
    # pylint: disable=too-many-locals,too-many-branches

    if cfg.plot.report:
        plot_width = 450
        plot_height = 400
        plot_width_bar = 280
        plot_height_bar = 248
    else:
        plot_width = cfg.plot.width if cfg.plot.width is not None else 450
        plot_height = cfg.plot.height if cfg.plot.height is not None else 400
        plot_width_bar = plot_width
        plot_height_bar = plot_height
    tabs: List[Panel] = []
    htgs: Dict[str, List[Tuple[str, str]]] = {}
    col, data = itmdt["col"], itmdt["data"]
    # overview, word length, and charater level statistcs
    if cfg.stats.enable:
        stats, len_stats, letter_stats = (
            data["stats"],
            data["len_stats"],
            data["letter_stats"],
        )
    if cfg.bar.enable:
        fig = bar_viz(
            data["bar"].to_frame(),
            data["nuniq"],
            data["nrows"],
            col,
            plot_width_bar,
            plot_height_bar,
            True,
            cfg.bar,
        )
        tabs.append(Panel(child=row(fig), title="Bar Chart"))
        htgs["Bar Chart"] = cfg.bar.how_to_guide(plot_height, plot_width)
    if cfg.pie.enable:
        fig, color_list = pie_viz(
            data["pie"].to_frame(), data["nrows"], col, plot_width, plot_height, cfg.pie
        )
        tabs.append(fig)
        htgs["Pie Chart"] = cfg.pie.how_to_guide(color_list, plot_height, plot_width)
    if cfg.wordcloud.enable:
        if data["nuniq_words_cloud"] > 0:
            tabs.append(wordcloud_viz(data["word_cnts_cloud"], plot_width, plot_height))
            htgs["Word Cloud"] = cfg.wordcloud.how_to_guide(plot_height, plot_width)
    if cfg.wordfreq.enable:
        if data["nwords_freq"] > 0:
            tabs.append(
                wordfreq_viz(
                    data["word_cnts_freq"],
                    data["nwords_freq"],
                    plot_width,
                    plot_height,
                    cfg.wordfreq,
                )
            )
            htgs["Word Frequency"] = cfg.wordfreq.how_to_guide(plot_height, plot_width)

    geo_df = geo_viz(data["geo"].to_frame().rename(columns={col: "count"}), plot_width, "count")
    tabs.append(geo_df)

    if cfg.value_table.enable:
        htgs["Value Table"] = cfg.value_table.how_to_guide()
        value_table = _value_table(
            data["value_table"], stats["nrows"], stats["npres"], stats["nuniq"]
        )
    else:
        value_table = []

    # panel.child.children[0] is a figure
    for panel in tabs[0:]:
        panel.child.children[0].frame_width = int(plot_width * 0.9)
    tabs[0].child.children[0].frame_width = int(plot_width_bar * 0.9)
    return {
        "tabledata": format_cat_stats(stats, len_stats, letter_stats) if cfg.stats.enable else [],
        "value_table": value_table,
        "insights": nom_insights(data, col, cfg) if cfg.insight.enable else [],
        "layout": [panel.child.children[0] for panel in tabs],
        "meta": [tab.title for tab in tabs],
        "container_width": plot_width + 110,
        "how_to_guide": htgs,
    }


def _value_table(srs: pd.Series, nrows: int, npres: int, nuniq: int) -> List[Dict[str, Any]]:
    """
    Render the rows for the frequency table.
    """

    df = srs.to_frame()
    (col,) = df.columns

    val_sum = df[col].sum()  # sum of all value counts that appear in the table
    nothers = npres - val_sum  # count of values that do not appear in the table
    nmissing = nrows - npres  # number of missing values
    df["pct"] = (df[col] / nrows * 100).round(1)

    max_freq = max(df[col].max(), nothers, nmissing)

    rows: List[Dict[str, Any]] = []
    for index, record in df.iterrows():
        rows.append(
            {
                "label": index,
                "width": record[col] / max_freq,
                "width_perc": f"{record[col] / max_freq * 100}%",
                "count": int(record[col]),
                "percentage": f"{record['pct']}%" if record["pct"] >= 0.1 else "< 0.1%",
                "n": nrows,
                "extra_class": "",
            }
        )
    if nothers > 0:
        pct = round(nothers / nrows * 100, 1)
        rows.append(
            {
                "label": f"Other values ({nuniq - len(df)})",
                "width": nothers / max_freq,
                "width_perc": f"{nothers / max_freq * 100}%",
                "count": int(nothers),
                "percentage": f"{pct}%" if pct >= 0.1 else "< 0.1%",
                "n": nrows,
                "extra_class": "other",
            }
        )
    if nmissing > 0:
        pct = round(nmissing / nrows * 100, 1)
        rows.append(
            {
                "label": "(Missing)",
                "width": nmissing / max_freq,
                "width_perc": f"{nmissing / max_freq * 100}%",
                "count": int(nmissing),
                "percentage": f"{pct}%" if pct >= 0.1 else "< 0.1%",
                "n": nrows,
                "extra_class": "missing",
            }
        )

    return rows


def nom_insights(data: Dict[str, Any], col: str, cfg: Config) -> Dict[str, List[str]]:
    """
    Format the insights for plot(df, Nominal)
    """
    # pylint: disable=too-many-branches
    # insight dictionary, with a list associated with each plot
    ins: Dict[str, List[str]] = {
        "Stats": [],
        "Bar Chart": [],
        "Pie Chart": [],
        "Word Cloud": [],
        "Word Frequency": [],
        "Word Length": [],
    }

    if cfg.stats.enable:
        if data["nuniq"] == cfg.insight.constant__threshold:
            ins["Stats"].append(f"{col} has a constant value")
        if data["nuniq"] > cfg.insight.high_cardinality__threshold:
            nuniq = data["nuniq"]
            ins["Stats"].append(f"{col} has a high cardinality: {nuniq} distinct values")
        pmiss = round((data["nrows"] - data["stats"]["npres"]) / data["nrows"] * 100, 2)
        if pmiss > cfg.insight.missing__threshold:
            nmiss = data["nrows"] - data["stats"]["npres"]
            ins["Stats"].append(f"{col} has {nmiss} ({pmiss}%) missing values")
        if data["stats"]["nuniq"] == data["stats"]["npres"]:
            ins["Stats"].append(f"{col} has all distinct values")

    if cfg.bar.enable:
        if data["chisq"][1] > cfg.insight.uniform__threshold:
            ins["Bar Chart"].append(f"{col} is relatively evenly distributed")
        factor = round(data["bar"].iloc[0] / data["bar"].iloc[1], 2) if len(data["bar"]) > 1 else 0
        if factor > cfg.insight.outstanding_no1__threshold:
            val1, val2 = data["bar"].index[0], data["bar"].index[1]
            ins["Bar Chart"].append(
                f"""The largest value ({val1}) is over {factor} times larger than the second
                largest value ({val2})"""
            )
    if cfg.pie.enable:
        if (
            data["pie"].iloc[:2].sum() / data["nrows"] > cfg.insight.attribution__threshold
            and len(data["pie"]) >= 2
        ):
            vals = ", ".join(str(data["pie"].index[i]) for i in range(2))
            ins["Pie Chart"].append(f"The top 2 categories ({vals}) take over {0.5*100}%")
    if cfg.wordcloud.enable:
        if data["nuniq_words_cloud"] > cfg.insight.high_word_cardinality__threshold:
            nwords = data["nuniq_words_cloud"]
            ins["Word Cloud"].append(f"{col} contains many words: {nwords} words")
    if cfg.wordfreq.enable:
        factor = (
            round(data["word_cnts_freq"].iloc[0] / data["word_cnts_freq"].iloc[1], 2)
            if len(data["word_cnts_freq"]) > 1
            else 0
        )
        if factor > cfg.insight.outstanding_no1_word__threshold:
            val1, val2 = (
                data["word_cnts_freq"].index[0],
                data["word_cnts_freq"].index[1],
            )
            ins["Word Frequency"].append(
                f"""The largest value ({val1}) is over {factor} times larger than the second
                largest value ({val2})"""
            )
        if data["len_stats"]["Minimum"] == data["len_stats"]["Maximum"]:
            ins["Word Frequency"].append(f"{col} has words of constant length")

    return ins


def render_num(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Create visualizations for plot(df, Continuous)
    """
    # pylint: disable=too-many-locals
    if cfg.plot.report:
        plot_width = 450
        plot_height = 400
        plot_width_hist = 280
        plot_height_hist = 248
    else:
        plot_width = cfg.plot.width if cfg.plot.width is not None else 450
        plot_height = cfg.plot.height if cfg.plot.height is not None else 400
        plot_width_hist = plot_width
        plot_height_hist = plot_height
    col, data = itmdt["col"], itmdt["data"]
    tabs: List[Panel] = []
    htgs: Dict[str, List[Tuple[str, str]]] = {}

    if cfg.hist.enable:
        fig = hist_viz(
            data["hist"],
            data["nrows"],
            col,
            cfg.hist.yscale,
            cfg.hist.color,
            plot_width_hist,
            plot_height_hist,
            True,
        )
        tabs.append(Panel(child=row(fig), title="Histogram"))
        htgs["Histogram"] = cfg.hist.how_to_guide(plot_height, plot_width)
    if cfg.kde.enable:
        # when the column is constant, we wont display kde plot
        if data["kde"] is not None and (not math.isclose(data["min"], data["max"])):
            dens, kde = data["dens"], data["kde"]
            tabs.append(kde_viz(dens, kde, col, plot_width, plot_height, cfg.kde))
            htgs["KDE Plot"] = cfg.kde.how_to_guide(plot_height, plot_width)
    if cfg.qqnorm.enable and (not math.isclose(data["min"], data["max"])):
        # when the column is constant, we wont display qq plot
        if data["qntls"].any():
            qntls, mean, std = data["qntls"], data["mean"], data["std"]
            tabs.append(qqnorm_viz(qntls, mean, std, col, plot_width, plot_height, cfg.qqnorm))
            htgs["Normal Q-Q Plot"] = cfg.qqnorm.how_to_guide(plot_height, plot_width)
    if cfg.box.enable:
        box_data = {
            "grp": col,
            "q1": data["qrtl1"],
            "q2": data["qrtl2"],
            "q3": data["qrtl3"],
            "lw": data["lw"],
            "uw": data["uw"],
            "otlrs": [data["otlrs"]],
        }
        df = pd.DataFrame(box_data, index=[0])
        tabs.append(box_viz(df, col, plot_width, plot_height, cfg.box))
        htgs["Box Plot"] = cfg.box.univar_how_to_guide(plot_height, plot_width)

    if cfg.value_table.enable:
        htgs["Value Table"] = cfg.value_table.how_to_guide()
        value_table = _value_table(data["value_table"], data["nrows"], data["npres"], data["nuniq"])
    else:
        value_table = []

    # panel.child.children[0] is a figure
    for panel in tabs[0:]:
        panel.child.children[0].frame_width = int(plot_width * 0.9)
    if cfg.hist.enable:
        tabs[0].child.children[0].frame_width = int(plot_width_hist * 0.9)
    return {
        "tabledata": format_num_stats(data) if cfg.stats.enable else [],
        "value_table": value_table,
        "insights": cont_insights(data, col, cfg) if cfg.insight.enable else [],
        "layout": [panel.child for panel in tabs],
        "meta": [tab.title for tab in tabs],
        "container_width": plot_width + 110,
        "how_to_guide": htgs,
    }


def cont_insights(data: Dict[str, Any], col: str, cfg: Config) -> Dict[str, List[str]]:
    """
    Format the insights for plot(df, Continuous)
    """
    # pylint: disable=too-many-branches
    # insight dictionary with a list associated with each plot
    ins: Dict[str, List[str]] = {
        "Stats": [],
        "Histogram": [],
        "KDE Plot": [],
        "Normal Q-Q Plot": [],
        "Box Plot": [],
    }

    if cfg.stats.enable:
        pinf = round((data["npres"] - data["nreals"]) / data["nrows"] * 100, 2)
        if pinf > cfg.insight.infinity__threshold:
            ninf = data["npres"] - data["nreals"]
            ins["Stats"].append(f"{col} has {ninf} ({pinf}%) infinite values")
        pmiss = round((data["nrows"] - data["npres"]) / data["nrows"] * 100, 2)
        if pmiss > cfg.insight.missing__threshold:
            nmiss = data["nrows"] - data["npres"]
            ins["Stats"].append(f"{col} has {nmiss} ({pmiss}%) missing values")
        pneg = round(data["nneg"] / data["nrows"] * 100, 2)
        if pneg > cfg.insight.negatives__threshold:
            nneg = data["nneg"]
            ins["Stats"].append(f"{col} has {nneg} ({pneg}%) negatives")
        pzero = round(data["nzero"] / data["nrows"] * 100, 2)
        if pzero > cfg.insight.zeros__threshold:
            nzero = data["nzero"]
            ins["Stats"].append(f"{col} has {nzero} ({pzero}%) zeros")

    if cfg.hist.enable:
        if data["norm"][1] > cfg.insight.normal__threshold:
            ins["Histogram"].append(f"{col} is normally distributed")
        if data["chisq"][1] > cfg.insight.uniform__threshold:
            ins["Histogram"].append(f"{col} is uniformly distributed")
        skw = np.round(data["skew"], 4)
        if skw >= cfg.insight.skewed__threshold:
            ins["Histogram"].append(f"{col} is skewed right (\u03B31 = {skw})")
        if skw <= -cfg.insight.skewed__threshold:
            ins["Histogram"].append(f"{col} is skewed left (\u03B31 = {skw})")

    if cfg.qqnorm.enable:
        if data["norm"][1] <= 1 - cfg.insight.normal__threshold:
            pval = data["norm"][1]
            ins["Normal Q-Q Plot"].append(f"{col} is not normally distributed (p-value {pval})")

    if cfg.box.enable:
        if data["notlrs"] > cfg.insight.outlier__threshold:
            notlrs = data["notlrs"]
            ins["Box Plot"].append(f"{col} has {notlrs} outliers")

    return ins


def render_dt(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Create visualizations for plot(df, DateTime)
    """
    if cfg.plot.report:
        plot_width = 280
        plot_height = 248
    else:
        plot_width = cfg.plot.width if cfg.plot.width is not None else 450
        plot_height = cfg.plot.height if cfg.plot.height is not None else 400

    tabs: List[Panel] = []
    if cfg.line.enable:
        df, timeunit, miss_pct = itmdt["line"]
        fig = dt_line_viz(
            df, itmdt["col"], timeunit, cfg.line.yscale, plot_width, plot_height, True, miss_pct
        )
        fig.frame_width = int(plot_width * 0.95)
        tabs.append(Panel(child=fig, title="Line Chart"))
    return {
        "tabledata": stats_viz_dt(itmdt["data"]) if cfg.stats.enable else [],
        "insights": None,
        "layout": [panel.child for panel in tabs],
        "meta": [tab.title for tab in tabs],
        "container_width": plot_width + 50,
    }


def render_cat_num(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Create visualizations for plot(df, Nominal, Continuous)
    """
    plot_width = cfg.plot.width if cfg.plot.width is not None else 450
    plot_height = cfg.plot.height if cfg.plot.height is not None else 400

    tabs: List[Panel] = []
    htgs: Dict[str, List[Tuple[str, str]]] = {}
    data, x, y = itmdt["data"], itmdt["x"], itmdt["y"]

    if cfg.box.enable:
        # box plot
        df = data["box"].to_frame().reset_index()[: 5 * cfg.box.ngroups]
        df = df.pivot(index=x, columns="level_1", values=[0]).reset_index()
        df.columns = df.columns.get_level_values(1)
        df.columns = ["grp"] + list(df.columns[1:])
        tabs.append(box_viz(df, x, plot_width, plot_height, cfg.box, y, data["ttl_grps"]))
        htgs["Box Plot"] = cfg.box.nom_cont_how_to_guide(plot_height, plot_width)

    if cfg.line.enable:
        # multiline plot
        df = data["hist"].to_frame()[: cfg.line.ngroups]
        tabs.append(
            line_viz(
                df,
                x,
                y,
                cfg.line.yscale,
                plot_width,
                plot_height,
                data["ttl_grps"],
            )
        )
        htgs["Line Chart"] = cfg.line.nom_cont_how_to_guide(plot_height, plot_width)
    for panel in tabs:
        panel.child.children[0].frame_width = int(plot_width * 0.9)
    return {
        "layout": [panel.child for panel in tabs],
        "meta": [panel.title for panel in tabs],
        "container_width": plot_width + 170,
        "how_to_guide": htgs,
    }


def render_geo_num(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Render plots from plot(df, x, y) when x is a geography column
    and y is a numerical column
    """
    plot_width = cfg.plot.width if cfg.plot.width is not None else 450
    plot_height = cfg.plot.height if cfg.plot.height is not None else 400

    tabs: List[Panel] = []
    htgs: Dict[str, List[Tuple[str, str]]] = {}
    data, x, y = itmdt["data"], itmdt["x"], itmdt["y"]

    if cfg.box.enable:
        # box plot
        df = data["box"].to_frame().reset_index()[: 5 * cfg.box.ngroups]
        df = df.pivot(index=x, columns="level_1", values=[0]).reset_index()
        df.columns = df.columns.get_level_values(1)
        df.columns = ["grp"] + list(df.columns[1:])
        tabs.append(box_viz(df, x, plot_width, plot_height, cfg.box, y, data["ttl_grps"]))
        htgs["Box Plot"] = cfg.box.nom_cont_how_to_guide(plot_height, plot_width)

    if cfg.line.enable:
        # multiline plot
        df = data["hist"].to_frame()[: cfg.line.ngroups]
        tabs.append(
            line_viz(
                df,
                x,
                y,
                cfg.line.yscale,
                plot_width,
                plot_height,
                data["ttl_grps"],
            )
        )
        htgs["Line Chart"] = cfg.line.nom_cont_how_to_guide(plot_height, plot_width)

    df = data["value"].to_frame()
    tabs.append(geo_viz(df, plot_width, y))

    for panel in tabs:
        panel.child.children[0].frame_width = int(plot_width * 0.9)
    return {
        "layout": [panel.child for panel in tabs],
        "meta": [panel.title for panel in tabs],
        "container_width": plot_width + 170,
        "how_to_guide": htgs,
    }


def render_latlong_num(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Render plots from plot(df, x, y) when x is a latlong column
    and y is a numerical column
    """
    plot_width = cfg.plot.width if cfg.plot.width is not None else 450

    tabs: List[Panel] = []
    data, y = itmdt["data"], itmdt["y"]

    df = data["value"].to_frame()

    # latlong statstic plot
    tabs.append(latlong_viz(df, plot_width, y))

    for panel in tabs:
        panel.child.children[0].frame_width = int(plot_width * 0.9)
    return {
        "layout": [panel.child for panel in tabs],
        "meta": [panel.title for panel in tabs],
        "container_width": plot_width + 170,
    }


def render_two_num(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Create visualizations for plot(df, Continuous, Continuous)
    """
    plot_width = cfg.plot.width if cfg.plot.width is not None else 450
    plot_height = cfg.plot.height if cfg.plot.height is not None else 400

    tabs: List[Panel] = []
    htgs: Dict[str, List[Tuple[str, str]]] = {}
    data, x, y = itmdt["data"], itmdt["x"], itmdt["y"]

    if cfg.scatter.enable:
        # scatter plot
        if cfg.scatter.sample_size is not None:
            sample_sr_and_name: Tuple[Union[int, float], str] = (
                cfg.scatter.sample_size,
                "sample size",
            )
        elif cfg.scatter.sample_rate is not None:
            sample_sr_and_name = (cfg.scatter.sample_rate, "sample rate")
        else:
            raise RuntimeError("In scatter plot, sample size and sample rate are both not None")
        tabs.append(
            scatter_viz(
                data["scat"],
                x,
                y,
                sample_sr_and_name,
                plot_width,
                plot_height,
            )
        )
        htgs["Scatter Plot"] = cfg.scatter.how_to_guide(plot_height, plot_width)
    tile_size = None
    if cfg.hexbin.enable:
        # hexbin plot
        x_diff = data["hex"][x].max() - data["hex"][x].min()
        tile_size = cfg.hexbin.tile_size if cfg.hexbin.tile_size != "auto" else x_diff / 25
        aspect_scale = (data["hex"][y].max() - data["hex"][y].min()) / (x_diff + 1e-9)
        tabs.append(
            hexbin_viz(
                data["hex"],
                x,
                y,
                plot_width,
                plot_height,
                tile_size,
                aspect_scale,
            )
        )
        htgs["Hexbin Plot"] = cfg.hexbin.how_to_guide(tile_size, plot_height, plot_width)
    if cfg.box.enable:
        # box plot
        df = data["box"].to_frame().reset_index()
        df = df.pivot(index="grp", columns="level_1", values=[0]).reset_index()
        df.columns = df.columns.get_level_values(1)
        df.columns = ["grp"] + list(df.columns[1:])
        tabs.append(box_viz(df, x, plot_width, plot_height, cfg.box, y))
        htgs["Box Plot"] = cfg.box.two_cont_how_to_guide(plot_height, plot_width)

    for panel in tabs:
        try:
            panel.child.frame_width = int(plot_width * 0.9)
        except AttributeError:
            panel.child.children[0].frame_width = int(plot_width * 0.9)
    return {
        "layout": [panel.child for panel in tabs],
        "meta": [panel.title for panel in tabs],
        "container_width": plot_width + 80,
        "how_to_guide": htgs,
    }


def render_two_cat(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Create visualizations for plot(df, Nominal, Nominal)
    """
    # pylint: disable=too-many-locals
    plot_width = cfg.plot.width if cfg.plot.width is not None else 972
    plot_height = cfg.plot.height if cfg.plot.height is not None else 300

    tabs: List[Panel] = []
    htgs: Dict[str, List[Tuple[str, str]]] = {}
    df = itmdt["data"].to_frame("cnt").reset_index()
    x, y = itmdt["x"], itmdt["y"]
    xgrps, ygrps = df.groupby(x)["cnt"].sum(), df.groupby(y)["cnt"].sum()

    if (
        cfg.nested.enable
        and cfg.stacked.enable
        and cfg.heatmap.enable
        and cfg.nested.ngroups == cfg.stacked.ngroups == cfg.heatmap.ngroups
        and cfg.nested.nsubgroups == cfg.stacked.nsubgroups == cfg.heatmap.nsubgroups
    ):
        # parse the dataframe to consist of the ngroups largest x groups
        # and the nsubgroups largest y groups
        df, stats = parse_grps(df, cfg.nested.ngroups, cfg.nested.nsubgroups, x, y, xgrps, ygrps)
        df_nest = df_stack = df_heat = df
        stats_nest = stats_stack = stats_heat = stats
    else:
        if cfg.nested.enable:
            df_nest, stats_nest = parse_grps(
                df, cfg.nested.ngroups, cfg.nested.nsubgroups, x, y, xgrps, ygrps
            )
        if cfg.stacked.enable:
            df_stack, stats_stack = parse_grps(
                df, cfg.stacked.ngroups, cfg.stacked.nsubgroups, x, y, xgrps, ygrps
            )
        if cfg.heatmap.enable:
            df_heat, stats_heat = parse_grps(
                df, cfg.heatmap.ngroups, cfg.heatmap.nsubgroups, x, y, xgrps, ygrps
            )

    # nested bar chart
    if cfg.nested.enable:
        tabs.append(nested_viz(df_nest, x, y, stats_nest, plot_width, plot_height))
        htgs["Nested Bar Chart"] = cfg.nested.how_to_guide(x, y, plot_height, plot_width)
    # stacked bar chart
    if cfg.stacked.enable:
        # stacked bar chart
        # wrangle the dataframe into a pivot table format
        df_stack = df_stack.pivot(index=x, columns=y, values="cnt")
        df_stack.index.name = None
        # aggregate remaining groups into "Others"
        df_stack["Others"] = xgrps - df_stack.sum(axis=1)
        if df_stack["Others"].sum() < 1e-6:
            df_stack = df_stack.drop(columns="Others")
        tabs.append(stacked_viz(df_stack, x, y, stats_stack, plot_width, plot_height))
        htgs["Stacked Bar Chart"] = cfg.stacked.how_to_guide(x, y, plot_height, plot_width)
    # heat map
    if cfg.heatmap.enable:
        tabs.append(heatmap_viz(df_heat, x, y, stats_heat, plot_width, plot_height))
        htgs["Heat Map"] = cfg.heatmap.how_to_guide(x, y, plot_height, plot_width)

    return {
        "layout": [panel.child for panel in tabs],
        "meta": [panel.title for panel in tabs],
        "container_width": plot_width,
        "how_to_guide": htgs,
    }


def parse_grps(
    df: pd.DataFrame,
    ngroups: int,
    nsubgroups: int,
    x: str,
    y: str,
    xgrps: pd.DataFrame,
    ygrps: pd.DataFrame,
) -> Any:
    """
    Parse the data for nested bar chart, stacked bar chart, heat map
    according to the given ngroups and nsubgroups
    """
    # pylint: disable=too-many-arguments

    x_lrgst = xgrps.nlargest(ngroups)
    df = df[df[x].isin(x_lrgst.index)]
    stats = {f"{x}_ttl": len(xgrps), f"{x}_shw": len(x_lrgst)}
    y_lrgst = ygrps.nlargest(nsubgroups)
    df = df[df[y].isin(y_lrgst.index)]
    stats.update(zip((f"{y}_ttl", f"{y}_shw"), (len(ygrps), len(y_lrgst))))
    df[[x, y]] = df[[x, y]].astype(str)
    # final format
    df = df.pivot_table(index=y, columns=x, values="cnt", fill_value=0, aggfunc="sum")
    df = df.unstack().to_frame("cnt").reset_index()

    return df, stats


def render_dt_num(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Render plots from plot(df, x, y) when x is dt and y is num
    """
    plot_width = cfg.plot.width if cfg.plot.width is not None else 450
    plot_height = cfg.plot.height if cfg.plot.height is not None else 400

    tabs: List[Panel] = []
    if cfg.line.enable:
        linedf, timeunit = itmdt["linedata"]
        tabs.append(
            dt_line_viz(
                linedf,
                itmdt["x"],
                timeunit,
                cfg.line.yscale,
                plot_width,
                plot_height,
                True,
                y=itmdt["y"],
            )
        )
    if cfg.box.enable:
        boxdf, outx, outy, timeunit = itmdt["boxdata"]
        tabs.append(
            box_viz_dt(
                boxdf,
                outx,
                outy,
                itmdt["x"],
                plot_width,
                plot_height,
                itmdt["y"],
                timeunit=timeunit,
            )
        )

    return {
        "layout": [panel.child for panel in tabs],
        "meta": [panel.title for panel in tabs],
        "container_width": plot_width + 220,
    }


def render_dt_cat(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Render plots from plot(df, x, y) when x is dt and y is num
    """
    plot_width = cfg.plot.width if cfg.plot.width is not None else 972
    plot_height = cfg.plot.height if cfg.plot.height is not None else 400

    tabs: List[Panel] = []
    if cfg.line.enable:
        data, grp_cnt_stats, timeunit = itmdt["linedata"]
        tabs.append(
            dt_multiline_viz(
                data,
                itmdt["x"],
                itmdt["y"],
                timeunit,
                cfg.line.yscale,
                plot_width,
                plot_height,
                grp_cnt_stats,
            )
        )
    if cfg.stacked.enable:
        df, grp_cnt_stats, timeunit = itmdt["stackdata"]
        tabs.append(
            stacked_viz(
                df,
                itmdt["x"],
                itmdt["y"],
                grp_cnt_stats,
                plot_width,
                plot_height,
                timeunit,
            )
        )

    return {
        "layout": [panel.child for panel in tabs],
        "meta": [panel.title for panel in tabs],
        "container_width": plot_width,
    }


def render_dt_num_cat(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
    """
    Render plots from plot(df, x, y) when x is dt and y is num
    """
    plot_width = cfg.plot.width if cfg.plot.width is not None else 972
    plot_height = cfg.plot.height if cfg.plot.height is not None else 400

    tabs: List[Panel] = []
    data, grp_cnt_stats, timeunit = itmdt["data"]
    tabs.append(
        dt_multiline_viz(
            data,
            itmdt["x"],
            itmdt["y"],
            timeunit,
            cfg.line.yscale,
            plot_width,
            plot_height,
            grp_cnt_stats,
            z=itmdt["z"],
            agg=itmdt["agg"],
        )
    )
    return {
        "layout": [panel.child for panel in tabs],
        "meta": [panel.title for panel in tabs],
        "container_width": plot_width,
    }


[docs]def render(itmdt: Intermediate, cfg: Config) -> Union[LayoutDOM, Dict[str, Any]]: """ Render a basic plot Parameters ---------- itmdt The Intermediate containing results from the compute function. cfg Config instance """ # pylint: disable = too-many-branches if itmdt.visual_type == "distribution_grid": visual_elem = render_distribution_grid(itmdt, cfg) elif itmdt.visual_type == "categorical_column": visual_elem = render_cat(itmdt, cfg) elif itmdt.visual_type == "geography_column": visual_elem = render_geo(itmdt, cfg) elif itmdt.visual_type == "numerical_column": visual_elem = render_num(itmdt, cfg) elif itmdt.visual_type == "datetime_column": visual_elem = render_dt(itmdt, cfg) elif itmdt.visual_type == "cat_and_num_cols": visual_elem = render_cat_num(itmdt, cfg) elif itmdt.visual_type == "geo_and_num_cols": visual_elem = render_geo_num(itmdt, cfg) elif itmdt.visual_type == "latlong_and_num_cols": visual_elem = render_latlong_num(itmdt, cfg) elif itmdt.visual_type == "two_num_cols": visual_elem = render_two_num(itmdt, cfg) elif itmdt.visual_type == "two_cat_cols": visual_elem = render_two_cat(itmdt, cfg) elif itmdt.visual_type == "dt_and_num_cols": visual_elem = render_dt_num(itmdt, cfg) elif itmdt.visual_type == "dt_and_cat_cols": visual_elem = render_dt_cat(itmdt, cfg) elif itmdt.visual_type == "dt_cat_num_cols": visual_elem = render_dt_num_cat(itmdt, cfg) return visual_elem