gen_ai_hub.evaluations.models.evaluation_run
index
/home/jenkins/agent/workspace/ation_generative-ai-hub-sdk_main/gen_ai_hub/evaluations/models/evaluation_run.py

 
Modules
       
pandas
re
uuid

 
Classes
       
builtins.object
EvaluationRun
ExecutionStatusDetails
Results

 
class EvaluationRun(builtins.object)
    EvaluationRun(run_id: str, execution_id: str, ai_core_client: ai_core_sdk.ai_core_v2_client.AICoreV2Client, configuration_id: str = None, artifact_id: str = None, resource_group: str = None, object_store_credentials: gen_ai_hub.evaluations._internal._models._AWSObjectStoreData = None, metrics_list: List[str] = None)
 
Represents an individual EvaluationRun object and its associated context.
 
:param run_id: Unique identifier for the evaluation run
:type run_id: str
:param execution_id: ID of the AI Core execution
:type execution_id: str
:param ai_core_client: AI Core client instance
:type ai_core_client: AICoreV2Client
:param configuration_id: ID of the configuration, defaults to None
:type configuration_id: str
:param artifact_id: ID of the artifact, defaults to None
:type artifact_id: str
:param resource_group: Resource group name, defaults to None
:type resource_group: str
:param object_store_credentials: Object store credentials, defaults to None
:type object_store_credentials: _AWSObjectStoreData
:param metrics_list: List of metrics to evaluate, defaults to None
:type metrics_list: List[str]
 
  Methods defined here:
__init__(self, run_id: str, execution_id: str, ai_core_client: ai_core_sdk.ai_core_v2_client.AICoreV2Client, configuration_id: str = None, artifact_id: str = None, resource_group: str = None, object_store_credentials: gen_ai_hub.evaluations._internal._models._AWSObjectStoreData = None, metrics_list: List[str] = None)
Initialize self.  See help(type(self)) for accurate signature.
get_current_status(self)
Get the current status of the evaluation run.
 
:return: Current status of the run
:rtype: Status
:raises ValueError: If failed to retrieve the current status
get_debug_info(self) -> gen_ai_hub.evaluations.models.evaluation_run.ExecutionStatusDetails
Provide debug information when execution status is FAILED or DEAD.
 
:return: Execution status details including failed pod information
:rtype: ExecutionStatusDetails
get_debug_logs(self)
Get the complete trace of execution logs.
 
:return: List of log entries as dictionaries
:rtype: list
load_results_tables(self)
Download results from S3 and load the required table data.
 
:return: Dictionary containing completions and metrics table data
:rtype: dict
:raises RuntimeError: If failed to download results
results(self)
Get the results of the evaluation run.
 
:return: Results object for accessing completion and metric results
:rtype: Results
:raises ValueError: If execution is not completed
set_cached_results_data(self, data)
Set the cached results data from the child results class.
 
:param data: Results data to cache
:type data: Any
wait_for_completion(self, timeout: Optional[int] = None)
Wait for the evaluation run to complete by polling status.
 
:param timeout: Maximum time to wait in seconds, defaults to 3600 (1 hour)
:type timeout: Optional[int]

Data descriptors defined here:
__dict__
dictionary for instance variables (if defined)
__weakref__
list of weak references to the object (if defined)

 
class ExecutionStatusDetails(builtins.object)
    ExecutionStatusDetails(details: Any, status: Any) -> None
 
Dataclass for execution status details.
 
:param details: Detailed information about the execution status
:type details: Any
:param status: Current status of the execution
:type status: Any
 
  Methods defined here:
__eq__(self, other)
Return self==value.
__init__(self, details: Any, status: Any) -> None
Initialize self.  See help(type(self)) for accurate signature.
__repr__(self)
Return repr(self).

Data descriptors defined here:
__dict__
dictionary for instance variables (if defined)
__weakref__
list of weak references to the object (if defined)

Data and other attributes defined here:
__annotations__ = {'details': typing.Any, 'status': typing.Any}
__dataclass_fields__ = {'details': Field(name='details',type=typing.Any,default=<da...appingproxy({}),kw_only=False,_field_type=_FIELD), 'status': Field(name='status',type=typing.Any,default=<dat...appingproxy({}),kw_only=False,_field_type=_FIELD)}
__dataclass_params__ = _DataclassParams(init=True,repr=True,eq=True,order=False,unsafe_hash=False,frozen=False)
__hash__ = None
__match_args__ = ('details', 'status')

 
class Results(builtins.object)
    Results(run: gen_ai_hub.evaluations.models.evaluation_run.EvaluationRun)
 
Represents the Results handler for an EvaluationRun object.
 
This class provides methods to access completion results, metric results,
and aggregated results for a specific evaluation run.
 
:param run: The parent EvaluationRun object
:type run: EvaluationRun
 
  Methods defined here:
__init__(self, run: gen_ai_hub.evaluations.models.evaluation_run.EvaluationRun)
Initialize self.  See help(type(self)) for accurate signature.
aggregations(self)
Get the aggregated results for the run from the tracking service.
 
:return: JSON response containing aggregated metric results
:rtype: dict
:raises ValueError: If error occurs while fetching aggregation results
completions(self)
Get the completion results for the run.
 
:return: DataFrame containing completion results for the run
:rtype: pd.DataFrame
:raises ValueError: If error occurs while fetching completions
metrics(self)
Get the metric-level results for the run.
 
:return: DataFrame containing metric results for the run
:rtype: pd.DataFrame
:raises ValueError: If error occurs while fetching metric results

Data descriptors defined here:
__dict__
dictionary for instance variables (if defined)
__weakref__
list of weak references to the object (if defined)

 
Functions
       
configure_pandas_display()

 
Data
        ADDITIONAL_INFO_KEY = 'additional_info'
AWS_OSS_BUCKET_URL_KEY = 'storage.ai.sap.com/bucket'
AWS_OSS_PATH_PREFIX_URL_KEY = 'storage.ai.sap.com/pathPrefix'
AWS_OSS_REGION_URL_KEY = 'storage.ai.sap.com/region'
Any = typing.Any
COMPLETIONS_TABLE_KEY = 'submission_result'
DEFAULT_KEY = 'default'
DEFAULT_TIMEOUT = 3600
List = typing.List
METRICS_TABLE_KEY = 'evaluation_result'
Optional = typing.Optional
RESULTS_FILE_KEY = 'results.db'