Source code for visions.functional

from typing import Dict, List, Sequence, Tuple, Type, Union

import pandas as pd

from visions.types.type import VisionsBaseType
from visions.typesets.typeset import VisionsTypeset

T = Type[VisionsBaseType]


[docs]def cast_to_detected(data: Sequence, typeset: VisionsTypeset) -> Sequence: """Casts a DataFrame into a typeset by first performing column wise type detection against a provided typeset Args: data: the DataFrame to cast typeset: the Typeset in which we cast Returns: A tuple of the casted DataFrame and the types to which the columns were cast """ return typeset.cast_to_detected(data)
[docs]def cast_to_inferred(data: Sequence, typeset: VisionsTypeset) -> Sequence: """Casts a DataFrame into a typeset by first performing column wise type inference against a provided typeset Args: data: the DataFrame to cast typeset: the Typeset in which we cast Returns: A tuple of the casted DataFrame and the types to which the columns were cast """ return typeset.cast_to_inferred(data)
[docs]def infer_type(data: Sequence, typeset: VisionsTypeset) -> Union[Dict[str, T], T]: """Infer the current types of each column in the DataFrame given the typeset. Args: data: the DataFrame to infer types on typeset: the Typeset that provides the type context Returns: A dictionary with a mapping from column name to type """ return typeset.infer_type(data)
[docs]def detect_type(data: Sequence, typeset: VisionsTypeset) -> Union[Dict[str, T], T]: """Detect the type in the base graph Args: data: the DataFrame to detect types on typeset: the Typeset that provides the type context Returns: A dictionary with a mapping from column name to type """ return typeset.detect_type(data)
[docs]def compare_detect_inference_frame( data: Sequence, typeset: VisionsTypeset ) -> List[Tuple[str, T, T]]: """Compare the types given by inference on the base graph and the relational graph Args: data: the sequence to detect types on typeset: the Typeset that provides the type context Examples: >>> for column, type_before, type_after in compare_detect_inference_frame(data, typeset): >>> print(f"{column} was {type_before} is {type_after}") See Also: :doc:`type_inference_report_frame <visions.functional.type_inference_report_frame>`: Formatted report of the output of this function """ comparisons = [] detected_types = detect_type(data, typeset) inferred_types = infer_type(data, typeset) assert isinstance(detected_types, dict) and isinstance( inferred_types, dict ) # Placate the MyPy Gods for key in detected_types.keys() & inferred_types.keys(): comparisons.append((key, detected_types[key], inferred_types[key])) return comparisons
# TODO: make independent of pandas
[docs]def type_inference_report_frame(df: pd.DataFrame, typeset: VisionsTypeset) -> str: """Return formatted report of the output of `compare_detect_inference_frame`. Args: df: the DataFrame to detect types on typeset: the Typeset that provides the type context Returns: Text-based comparative type inference report Examples: >>> import pandas as pd >>> from visions.functional import type_inference_report_frame >>> from visions.typesets import StandardSet >>> >>> typeset = StandardSet() >>> df = pd.read_csv('dataset.csv') >>> >>> report = type_inference_report_frame(df, typeset) >>> print(report) """ padding = 5 max_column_length = max(len(column) for column in df.columns) + padding max_type_length = 30 report = "" change_count = 0 for column, type_before, type_after in compare_detect_inference_frame(df, typeset): changed = type_before != type_after if changed: fill = "!=" change_count += 1 else: fill = "==" report += ( f"{column: <{max_column_length}} {type_before: <{max_type_length}} " f"{fill} " f"{type_after: <{max_type_length}} \n" ) report += ( "In total {change_count} out of {type_count} types were changed.\n".format( change_count=change_count, type_count=len(df.columns) ) ) return report