Source code for dataprep.eda.intermediate

Intermediate class
from typing import Any, Dict, Tuple, Union, Optional

from pathlib import Path
import json
import os
import numpy as np
import pandas as pd

[docs]class Intermediate(Dict[str, Any]): """This class contains intermediate results.""" visual_type: str def __init__(self, *args: Any, **kwargs: Any) -> None: if ( len(args) == 1 and isinstance(args[0], dict) and len(kwargs) == 1 and "visual_type" in kwargs ): super().__init__(args[0]) self.visual_type = kwargs["visual_type"] elif len(args) == 0: visual_type = kwargs.pop("visual_type") super().__init__(**kwargs) self.visual_type = visual_type else: raise ValueError("Unsupported initialization")
[docs] def save(self, path: Optional[str] = None) -> None: """ Save intermediate to current working directory. Parameters ---------- filename: Optional[str], default 'intermediate' The filename used for saving intermediate without the extension name. to: Optional[str], default Path.cwd() The path to where the intermediate will be saved. """ saved_file_path = None if path: extension = os.path.splitext(path)[1] posix_path = Path(path).expanduser() if posix_path.is_dir(): if path.endswith("/"): path += "imdt.json" else: path += "/imdt.json" elif extension: if extension != ".json": raise ValueError( "Format '{extension}' is not supported (supported formats: json)" ) else: path += ".json" saved_file_path = Path(path).expanduser() else: path = str(Path.cwd()) + "/imdt.json" saved_file_path = Path(path).expanduser() # pylint: disable=no-member inter_dict: Dict[str, Any] = {} for key in self.keys(): inter_dict[key] = self[key] self._standardize_type(inter_dict) with open(path, "w") as outfile: json.dump(inter_dict, outfile, indent=4) print(f"Intermediate has been saved to {saved_file_path}!")
def _standardize_type(self, inter_dict: Dict[str, Any]) -> None: """ In order to make intermediate could be saved as json file, check the type of data contained in the intermediate Parameters ---------- inter_dict: Dict[str, Any], default "Intermediate" The intermediate result Returns ------- """ for key in inter_dict: if isinstance(inter_dict[key], dict): self._standardize_type(inter_dict[key]) elif isinstance( inter_dict[key], ( np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, ), ): inter_dict[key] = int(inter_dict[key]) elif isinstance(inter_dict[key], (np.float_, np.float16, np.float32, np.float64)): inter_dict[key] = float(inter_dict[key]) elif isinstance(inter_dict[key], (np.ndarray,)): inter_dict[key] = inter_dict[key].tolist() elif isinstance(inter_dict[key], tuple): inter_dict[key] = list(inter_dict[key]) for index in range(len(inter_dict[key])): if isinstance(inter_dict[key][index], (np.ndarray,)): inter_dict[key][index] = inter_dict[key][index].tolist() inter_dict[key] = tuple(inter_dict[key]) elif isinstance(inter_dict[key], pd.DataFrame): inter_dict[key] = inter_dict[key].to_dict() else: pass
[docs]class ColumnsMetadata: """Container for storing each column's metadata.""" metadata: pd.DataFrame def __init__(self) -> None: self.metadata = pd.DataFrame() = "Column Name" def __setitem__(self, key: Tuple[str, str], val: Any) -> None: col, vtype = key if ( isinstance(val, (tuple, list, dict)) and vtype not in self.metadata.columns # pylint: disable=unsupported-membership-test ): self.metadata[vtype] = pd.Series(dtype="object") self.metadata.loc[col, vtype] = val def __getitem__(self, key: Union[str, Tuple[str, str]]) -> Any: if isinstance(key, tuple): col, vtype = key return self.metadata.loc[col, vtype] else: return ColumnMetadata(self.metadata.loc[key])
[docs]class ColumnMetadata: """Container for storing a single column's metadata. This is immutable. """ metadata: pd.Series def __init__(self, meta: pd.Series) -> None: self.metadata = meta def __getitem__(self, key: str) -> Any: return self.metadata.loc[key]