Source code for dtale.query

import pandas as pd
from pkg_resources import parse_version

import dtale.global_state as global_state

from dtale.utils import get_bool_arg


[docs]def build_col_key(col): # Use backticks for pandas >= 0.25 to handle column names with spaces or protected words return "`{}`".format(col)
[docs]def build_query(data_id, query=None): curr_settings = global_state.get_settings(data_id) or {} return inner_build_query(curr_settings, query)
[docs]def inner_build_query(settings, query=None): query_segs = [] for p in ["columnFilters", "outlierFilters"]: curr_filters = settings.get(p) or {} for col, filter_cfg in curr_filters.items(): query_segs.append(filter_cfg["query"]) if query not in [None, ""]: query_segs.append(query) joined_query_segs = " and ".join(query_segs) if joined_query_segs and settings.get("invertFilter", False): joined_query_segs = "~({})".format(joined_query_segs) return joined_query_segs
[docs]def run_query( df, query, context_vars=None, ignore_empty=False, pct=100, pct_type="random" ): """ Utility function for running :func:`pandas:pandas.DataFrame.query` . This function contains extra logic to handle when column names contain special characters. Looks like pandas will be handling this in a future version: https://github.com/pandas-dev/pandas/issues/27017 The logic to handle these special characters in the meantime is only available in Python 3+ :param df: input dataframe :type df: :class:`pandas:pandas.DataFrame` :param query: query string :type query: str :param context_vars: dictionary of user-defined variables which can be referenced by name in query strings :type context_vars: dict, optional :param pct: random percentage of dataframe to load :type pct: int, optional :return: filtered dataframe """ def _load_pct(df): if pct is not None and pct < 100: if pct_type == "random": return df.sample(frac=pct / 100.0) records = int(len(df) * (pct / 100.0)) return df.head(records) if pct_type == "head" else df.tail(records) return df if (query or "") == "": return _load_pct(df) is_pandas25 = parse_version(pd.__version__) >= parse_version("0.25.0") curr_app_settings = global_state.get_app_settings() engine = curr_app_settings.get("query_engine", "python") df = df.query( query if is_pandas25 else query.replace("`", ""), local_dict=context_vars or {}, engine=engine, ) if not len(df) and not ignore_empty: raise Exception('query "{}" found no data, please alter'.format(query)) return _load_pct(df)
[docs]def handle_predefined(data_id, df=None): import dtale.predefined_filters as predefined_filters df = global_state.get_data(data_id) if df is None else df filters = predefined_filters.get_filters() if not filters: return df curr_settings = global_state.get_settings(data_id) or {} filter_values = curr_settings.get("predefinedFilters") if not filter_values: return df for f in filters: if f.name in filter_values: filter_value = filter_values[f.name] if ( filter_value.get("active", True) and filter_value.get("value") is not None ): df = f.handler(df, filter_value["value"]) return df
[docs]def load_filterable_data(data_id, req, query=None): filtered = get_bool_arg(req, "filtered") curr_settings = global_state.get_settings(data_id) or {} if filtered: final_query = query or build_query(data_id, curr_settings.get("query")) return run_query( handle_predefined(data_id), final_query, global_state.get_context_variables(data_id), ignore_empty=True, ) return global_state.get_data(data_id)