codeoceansdk.DataAsset
1import logging 2from enum import Enum 3from typing import Optional 4 5from dataclasses import dataclass, field, asdict 6from codeoceansdk.CodeOcean import CodeOcean 7 8logger = logging.getLogger(__name__) 9 10 11@dataclass(slots=True) 12class DataSource: 13 type: str 14 15 16@dataclass(kw_only=True, slots=True) 17class GCPCloudStorage(DataSource): 18 """Google cloud data source""" 19 20 bucket: str 21 """GCP bucket""" 22 prefix: str = None 23 """GCP secret""" 24 client_secret: Optional[str] = None 25 """GCP secret key""" 26 client_id: Optional[str] = None 27 """GCP client id""" 28 type: str = "gcp" 29 30 31@dataclass(kw_only=True, slots=True) 32class AWSS3(DataSource): 33 """AWS S3 source""" 34 35 bucket: str 36 """AWS bucket""" 37 keep_on_external_storage: bool = None 38 """When this property is set to true, the data asset files will not be copied over to CO. Also the prefix 39 property will be ignored and the entire S3 bucket would be used. """ 40 prefix: str = None 41 """AWS prefix""" 42 index_data: bool = None 43 """When this property is set to true, CO will index the files in the remote bucket, allowing to view 44 the file tree in the dataset and capsule pages. This is only relevant when keep_on_external_storage is set to 45 true (when keep_on_external_storage is false CO will always index the files) """ 46 access_key_id: Optional[str] = None 47 """AWS ACCESS KEY ID (only needed when source bucket is private)""" 48 secret_access_key: Optional[str] = None 49 """AWS SECRET ACCESS KEY (only needed when source bucket is private)""" 50 type: str = "aws" 51 52 53@dataclass(kw_only=True, slots=True) 54class ComputationSource(DataSource): 55 """Result from computation source""" 56 57 id: str 58 """Metadata id for computation""" 59 type: str = "computation" 60 61 62@dataclass(frozen=True, slots=True) 63class SourceBucket: 64 """In the data asset, what was the original source""" 65 66 bucket: str = None 67 """original bucket’s name""" 68 origin: Enum("origin", ["aws", "local", "gcp"]) = None 69 """Which cloud did this come from (aws, local, gcp).""" 70 prefix: str = None 71 """Bucket prefix""" 72 73 74@dataclass(frozen=True, slots=True) 75class UpdateMetadataParams: 76 name: str = None 77 """Name of data asset to update""" 78 description: str = None 79 """Description of data asset to update""" 80 mount: str = None 81 """Mount point of data asset to update""" 82 custom_metadata: Optional[dict] = field(default_factory=dict) 83 """Custom metadata to update""" 84 tags: Optional[list[str]] = field(default_factory=list) 85 """Tags to update""" 86 87 88@dataclass(frozen=True, slots=True) 89class SearchRange: 90 min: int = None 91 """Minimum of the range""" 92 max: int = None 93 """Maximum of the range""" 94 95 96@dataclass(frozen=True, slots=True) 97class SearchFilter: 98 key: str 99 """Field key, can be each of title, description, tags, any custom field""" 100 exclude: bool = None 101 """Whether to include/exclude the field value""" 102 value: Optional[str] = None 103 """Field value to be included/excluded""" 104 values: Optional[list[str]] = field(default_factory=list) 105 """field values in case of multiple values""" 106 range: Optional[SearchRange] = None 107 """Field range to be included/excluded (one of min/max must be set)""" 108 109 110@dataclass(frozen=True, slots=True) 111class SearchParams: 112 query: str = None 113 """determines the search query. can be a free text or in the form of 'name:... tag:... run_script:... 114 commit_id:...' """ 115 offset: Optional[int] = None 116 """If 30 items are returned and this is set to 10, the results 10-20 will be returned""" 117 limit: Optional[int] = None 118 """How many items to return""" 119 sort_order: Optional[Enum("sort_order", ["asc", "desc"])] = None 120 """Sort results by ascending or descending order""" 121 sort_field: Optional[Enum("sort_field", ["created", "type", "name", "size"])] = None 122 """Sort results by specified field""" 123 type: Optional[Enum("type", ["dataset", "result"])] = None 124 """If omitted results may include both datasets and results.""" 125 ownership: Optional[Enum("ownership", ["private", "created", "shared"])] = None 126 """search data asset by ownership - created - only datasets created by the user, shared- datasets shared with 127 the user, private - datasets that the user has not shared""" 128 favorite: Optional[bool] = None 129 """Only search favorited data assets""" 130 archived: Optional[bool] = None 131 """Only search archived data assets""" 132 origin: Optional[Enum("origin", ["internal", "external"])] = None 133 """determines whether to get only external/local datasets""" 134 filters: Optional[list[SearchFilter]] = field(default_factory=list) 135 """List of fields to filter by.""" 136 137 138@dataclass(frozen=True, slots=True) 139class SearchResults: 140 has_more: bool 141 """Indicates whether there are more results than those returned""" 142 results: Optional[list] = field(default_factory=list) 143 """DataAssets that match the search criteria""" 144 145 146@dataclass(frozen=True, slots=True) 147class PermissionGroup: 148 """Add/update permissions for a group to give access to a data asset (only relevant with certain 149 SSOs)""" 150 151 name: str 152 """Group name""" 153 role: Enum("role", ["owner", "viewer"]) 154 """Role to set for group""" 155 156 157@dataclass(frozen=True, slots=True) 158class PermissionUser: 159 email: str 160 """User email""" 161 role: Enum("role", ["owner", "viewer", "editor"]) 162 """Role to set for user""" 163 164 165@dataclass(frozen=True, slots=True) 166class PermissionParams: 167 """Parameters to use to set permissions""" 168 169 users: Optional[list[PermissionUser]] = field(default_factory=list) 170 """Users to add/update permissions for""" 171 groups: Optional[list[PermissionGroup]] = field(default_factory=list) 172 """Group to add/update permissions for""" 173 everyone: Optional[Enum("role", ["viewer", "none"])] = None 174 """Set permissions for everyone. Can only be viewer or none""" 175 176 177@dataclass(frozen=True, slots=True) 178class Provenance: 179 capsule: str = None 180 """Source capsule""" 181 commit: str = None 182 """Git commit for result""" 183 run_script: str = None 184 """Script used to generate results""" 185 docker_image: str = None 186 """Environment docker image used for result""" 187 data_assets: Optional[list[str]] = field(default_factory=list) 188 """Data assets used to generate results""" 189 190 191@dataclass(kw_only=True) 192class DataAsset(CodeOcean): 193 id: str 194 """Metadata id""" 195 created: int = 0 196 """Data asset creation time""" 197 description: str = "" 198 """Description of the data asset.""" 199 files: int = 0 200 """Number of files in the data asset. Not relevant if the data asset was created with keep_on_external_storage = 201 true and index_data = false. """ 202 last_used: int = 0 203 """Time this data asset was last used""" 204 size: int = 0 205 """Size in bytes of the data asset. Not relevant if the data asset was created with keep_on_external_storage = 206 true and index_data = false. """ 207 sourceBucket: Optional[SourceBucket] = None 208 """Info on bucket from which dataset was created""" 209 tags: Optional[list[str]] = field(default_factory=list) 210 """Keywords for searching the data asset by.""" 211 type: Enum("type", ["dataset", "result"]) = "DATA_ASSET_TYPE_DATASET" 212 """Type of the data asset. (DATA_ASSET_TYPE_DATASET, DATA_ASSET_TYPE_RESULT)""" 213 custom_metadata: dict = field(default_factory=dict) 214 """Map of key value pairs, according to custom metadata fields defined by the admin and values that were set by 215 the user """ 216 app_parameters: Optional[list[dict]] = field(default_factory=list[dict]) 217 """Parameters used to generate the data asset""" 218 provenance: Optional[Provenance] = None 219 name: str = "" 220 """Name of dataset""" 221 state: Enum("state", ["draft", "ready", "failed"]) = "DATA_ASSET_STATE_DRAFT" 222 """data asset creation state. Can be one of the following: 223 DATA_ASSET_STATE_DRAFT - the data asset is still being created. 224 DATA_ASSET_STATE_READY - the data asset is ready for use. 225 DATA_ASSET_STATE_FAILED - the data asset creation failed.""" 226 227 def __post_init__(self): 228 super().__post_init__() 229 self.data_asset_url = f"{self.api_url}/data_assets/{self.id}" 230 231 @staticmethod 232 def from_dict(dataset_dict, domain, api_key): 233 """ 234 235 :param dataset_dict: Dictionary containing Dataset parameters 236 :param domain: Code Ocean Domain 237 :param api_key: API key to access data asset 238 :return: DataAsset 239 """ 240 if "sourceBucket" in dataset_dict: 241 dataset_dict["sourceBucket"] = SourceBucket(**dataset_dict["sourceBucket"]) 242 if "provenance" in dataset_dict: 243 dataset_dict["provenance"] = Provenance(**dataset_dict["provenance"]) 244 dataset_dict["domain"] = domain 245 dataset_dict["api_key"] = api_key 246 return DataAsset(**dataset_dict) 247 248 def get_data_asset(self): 249 """ 250 Get data asset parameters for given data asset id. 251 """ 252 logger.debug(f"Retrieving data asset from {self.data_asset_url}") 253 req = self.get(self.data_asset_url) 254 new_comp = self.from_dict(req.json(), self.domain, self.api_key) 255 self.__dict__.update(new_comp.__dict__) 256 257 def delete_data_asset(self): 258 """Delete data asset""" 259 logger.info(f"Deleting data asset {self.id}") 260 self.delete(self.data_asset_url) 261 262 @staticmethod 263 def create_data_asset( 264 name: str, 265 tags: list[str], 266 data_source: DataSource, 267 mount: str, 268 environment: CodeOcean, 269 custom_metadata: dict = None, 270 description: str = None, 271 ): 272 """ 273 Create data asset 274 :param name: Data asset name 275 :param tags: Data asset tags 276 :param data_source: Data source, can be AWSSource, GCPSource, or DataSource 277 :param mount: Mountpoint for data asset in capsule 278 :param environment: CodeOcean environment to create data asset in. 279 :param custom_metadata: Custom metadata values to set 280 :param description: Description of data asset 281 :return: DataAsset 282 """ 283 input_url = f"{environment.api_url}/data_assets" 284 data_source_dict = asdict(data_source) 285 data_type = data_source_dict["type"] 286 del data_source_dict["type"] 287 payload = { 288 "name": name, 289 "tags": tags, 290 "source": {data_type: data_source_dict}, 291 "mount": mount, 292 } 293 if custom_metadata: 294 payload["custom_metadata"] = custom_metadata 295 if description: 296 payload["description"] = description 297 logger.info(f"Creating data asset from {data_type}") 298 response = environment.post(input_url, payload).json() 299 new_data_asset = DataAsset.from_dict( 300 response, environment.domain, environment.api_key 301 ) 302 return new_data_asset 303 304 def update_metadata(self, updated_parameters: UpdateMetadataParams): 305 """ 306 Update metadata for data asset 307 :param updated_parameters: DataAssetUpdateParams with updated values 308 :return: Updated DataAsset object 309 """ 310 logger.info(f"Updating metadata for asset {self.id}") 311 response = self.put(self.data_asset_url, asdict(updated_parameters)).json() 312 new_data_asset = DataAsset.from_dict(response, self.domain, self.api_key) 313 return new_data_asset 314 315 def archive_asset(self): 316 """ 317 Archive data asset 318 """ 319 logger.info(f"Unarchiving asset {self.id}") 320 input_url = f"{self.data_asset_url}/archive" 321 self.patch(input_url, params={"archive": True}) 322 323 def unarchive_asset(self): 324 """ 325 Unarchive data asset 326 """ 327 logger.info(f"Unarchiving asset {self.id}") 328 input_url = f"{self.data_asset_url}/archive" 329 self.patch(input_url, params={"archive": False}) 330 331 @staticmethod 332 def search_data_assets(environment: CodeOcean, search_params: SearchParams): 333 """ 334 Search data assets 335 :param environment: CodeOcean environment 336 :param search_params: DataAssetSearchParams object containing the search parameters 337 :return: DataAssetSearchResults containing the found results 338 """ 339 search_params = asdict(search_params) 340 logger.info(f"Searching data assets assets") 341 logger.debug(search_params) 342 343 input_url = f"{environment.api_url}/data_assets/search" 344 response = environment.post(input_url, search_params).json() 345 search_response = SearchResults( 346 has_more=response["has_more"], 347 results=[ 348 DataAsset.from_dict(x, environment.domain, environment.api_key) 349 for x in response["results"] 350 ], 351 ) 352 return search_response 353 354 def set_permissions(self, permissions: PermissionParams): 355 """ 356 Set permissions on Data Asset 357 :param permissions: Permissions to add or update 358 """ 359 logger.info(f"Setting permissions on {self.id}") 360 permissions = asdict(permissions) 361 logger.debug(permissions) 362 input_url = f"{self.data_asset_url}/permissions" 363 self.post(input_url, permissions)
17@dataclass(kw_only=True, slots=True) 18class GCPCloudStorage(DataSource): 19 """Google cloud data source""" 20 21 bucket: str 22 """GCP bucket""" 23 prefix: str = None 24 """GCP secret""" 25 client_secret: Optional[str] = None 26 """GCP secret key""" 27 client_id: Optional[str] = None 28 """GCP client id""" 29 type: str = "gcp"
Google cloud data source
32@dataclass(kw_only=True, slots=True) 33class AWSS3(DataSource): 34 """AWS S3 source""" 35 36 bucket: str 37 """AWS bucket""" 38 keep_on_external_storage: bool = None 39 """When this property is set to true, the data asset files will not be copied over to CO. Also the prefix 40 property will be ignored and the entire S3 bucket would be used. """ 41 prefix: str = None 42 """AWS prefix""" 43 index_data: bool = None 44 """When this property is set to true, CO will index the files in the remote bucket, allowing to view 45 the file tree in the dataset and capsule pages. This is only relevant when keep_on_external_storage is set to 46 true (when keep_on_external_storage is false CO will always index the files) """ 47 access_key_id: Optional[str] = None 48 """AWS ACCESS KEY ID (only needed when source bucket is private)""" 49 secret_access_key: Optional[str] = None 50 """AWS SECRET ACCESS KEY (only needed when source bucket is private)""" 51 type: str = "aws"
AWS S3 source
When this property is set to true, the data asset files will not be copied over to CO. Also the prefix property will be ignored and the entire S3 bucket would be used.
When this property is set to true, CO will index the files in the remote bucket, allowing to view the file tree in the dataset and capsule pages. This is only relevant when keep_on_external_storage is set to true (when keep_on_external_storage is false CO will always index the files)
54@dataclass(kw_only=True, slots=True) 55class ComputationSource(DataSource): 56 """Result from computation source""" 57 58 id: str 59 """Metadata id for computation""" 60 type: str = "computation"
Result from computation source
63@dataclass(frozen=True, slots=True) 64class SourceBucket: 65 """In the data asset, what was the original source""" 66 67 bucket: str = None 68 """original bucket’s name""" 69 origin: Enum("origin", ["aws", "local", "gcp"]) = None 70 """Which cloud did this come from (aws, local, gcp).""" 71 prefix: str = None 72 """Bucket prefix"""
In the data asset, what was the original source
75@dataclass(frozen=True, slots=True) 76class UpdateMetadataParams: 77 name: str = None 78 """Name of data asset to update""" 79 description: str = None 80 """Description of data asset to update""" 81 mount: str = None 82 """Mount point of data asset to update""" 83 custom_metadata: Optional[dict] = field(default_factory=dict) 84 """Custom metadata to update""" 85 tags: Optional[list[str]] = field(default_factory=list) 86 """Tags to update"""
97@dataclass(frozen=True, slots=True) 98class SearchFilter: 99 key: str 100 """Field key, can be each of title, description, tags, any custom field""" 101 exclude: bool = None 102 """Whether to include/exclude the field value""" 103 value: Optional[str] = None 104 """Field value to be included/excluded""" 105 values: Optional[list[str]] = field(default_factory=list) 106 """field values in case of multiple values""" 107 range: Optional[SearchRange] = None 108 """Field range to be included/excluded (one of min/max must be set)"""
111@dataclass(frozen=True, slots=True) 112class SearchParams: 113 query: str = None 114 """determines the search query. can be a free text or in the form of 'name:... tag:... run_script:... 115 commit_id:...' """ 116 offset: Optional[int] = None 117 """If 30 items are returned and this is set to 10, the results 10-20 will be returned""" 118 limit: Optional[int] = None 119 """How many items to return""" 120 sort_order: Optional[Enum("sort_order", ["asc", "desc"])] = None 121 """Sort results by ascending or descending order""" 122 sort_field: Optional[Enum("sort_field", ["created", "type", "name", "size"])] = None 123 """Sort results by specified field""" 124 type: Optional[Enum("type", ["dataset", "result"])] = None 125 """If omitted results may include both datasets and results.""" 126 ownership: Optional[Enum("ownership", ["private", "created", "shared"])] = None 127 """search data asset by ownership - created - only datasets created by the user, shared- datasets shared with 128 the user, private - datasets that the user has not shared""" 129 favorite: Optional[bool] = None 130 """Only search favorited data assets""" 131 archived: Optional[bool] = None 132 """Only search archived data assets""" 133 origin: Optional[Enum("origin", ["internal", "external"])] = None 134 """determines whether to get only external/local datasets""" 135 filters: Optional[list[SearchFilter]] = field(default_factory=list) 136 """List of fields to filter by."""
determines the search query. can be a free text or in the form of 'name:... tag:... run_script:... commit_id:...'
If 30 items are returned and this is set to 10, the results 10-20 will be returned
Sort results by ascending or descending order
If omitted results may include both datasets and results.
search data asset by ownership - created - only datasets created by the user, shared- datasets shared with the user, private - datasets that the user has not shared
147@dataclass(frozen=True, slots=True) 148class PermissionGroup: 149 """Add/update permissions for a group to give access to a data asset (only relevant with certain 150 SSOs)""" 151 152 name: str 153 """Group name""" 154 role: Enum("role", ["owner", "viewer"]) 155 """Role to set for group"""
Add/update permissions for a group to give access to a data asset (only relevant with certain SSOs)
166@dataclass(frozen=True, slots=True) 167class PermissionParams: 168 """Parameters to use to set permissions""" 169 170 users: Optional[list[PermissionUser]] = field(default_factory=list) 171 """Users to add/update permissions for""" 172 groups: Optional[list[PermissionGroup]] = field(default_factory=list) 173 """Group to add/update permissions for""" 174 everyone: Optional[Enum("role", ["viewer", "none"])] = None 175 """Set permissions for everyone. Can only be viewer or none"""
Parameters to use to set permissions
178@dataclass(frozen=True, slots=True) 179class Provenance: 180 capsule: str = None 181 """Source capsule""" 182 commit: str = None 183 """Git commit for result""" 184 run_script: str = None 185 """Script used to generate results""" 186 docker_image: str = None 187 """Environment docker image used for result""" 188 data_assets: Optional[list[str]] = field(default_factory=list) 189 """Data assets used to generate results"""
192@dataclass(kw_only=True) 193class DataAsset(CodeOcean): 194 id: str 195 """Metadata id""" 196 created: int = 0 197 """Data asset creation time""" 198 description: str = "" 199 """Description of the data asset.""" 200 files: int = 0 201 """Number of files in the data asset. Not relevant if the data asset was created with keep_on_external_storage = 202 true and index_data = false. """ 203 last_used: int = 0 204 """Time this data asset was last used""" 205 size: int = 0 206 """Size in bytes of the data asset. Not relevant if the data asset was created with keep_on_external_storage = 207 true and index_data = false. """ 208 sourceBucket: Optional[SourceBucket] = None 209 """Info on bucket from which dataset was created""" 210 tags: Optional[list[str]] = field(default_factory=list) 211 """Keywords for searching the data asset by.""" 212 type: Enum("type", ["dataset", "result"]) = "DATA_ASSET_TYPE_DATASET" 213 """Type of the data asset. (DATA_ASSET_TYPE_DATASET, DATA_ASSET_TYPE_RESULT)""" 214 custom_metadata: dict = field(default_factory=dict) 215 """Map of key value pairs, according to custom metadata fields defined by the admin and values that were set by 216 the user """ 217 app_parameters: Optional[list[dict]] = field(default_factory=list[dict]) 218 """Parameters used to generate the data asset""" 219 provenance: Optional[Provenance] = None 220 name: str = "" 221 """Name of dataset""" 222 state: Enum("state", ["draft", "ready", "failed"]) = "DATA_ASSET_STATE_DRAFT" 223 """data asset creation state. Can be one of the following: 224 DATA_ASSET_STATE_DRAFT - the data asset is still being created. 225 DATA_ASSET_STATE_READY - the data asset is ready for use. 226 DATA_ASSET_STATE_FAILED - the data asset creation failed.""" 227 228 def __post_init__(self): 229 super().__post_init__() 230 self.data_asset_url = f"{self.api_url}/data_assets/{self.id}" 231 232 @staticmethod 233 def from_dict(dataset_dict, domain, api_key): 234 """ 235 236 :param dataset_dict: Dictionary containing Dataset parameters 237 :param domain: Code Ocean Domain 238 :param api_key: API key to access data asset 239 :return: DataAsset 240 """ 241 if "sourceBucket" in dataset_dict: 242 dataset_dict["sourceBucket"] = SourceBucket(**dataset_dict["sourceBucket"]) 243 if "provenance" in dataset_dict: 244 dataset_dict["provenance"] = Provenance(**dataset_dict["provenance"]) 245 dataset_dict["domain"] = domain 246 dataset_dict["api_key"] = api_key 247 return DataAsset(**dataset_dict) 248 249 def get_data_asset(self): 250 """ 251 Get data asset parameters for given data asset id. 252 """ 253 logger.debug(f"Retrieving data asset from {self.data_asset_url}") 254 req = self.get(self.data_asset_url) 255 new_comp = self.from_dict(req.json(), self.domain, self.api_key) 256 self.__dict__.update(new_comp.__dict__) 257 258 def delete_data_asset(self): 259 """Delete data asset""" 260 logger.info(f"Deleting data asset {self.id}") 261 self.delete(self.data_asset_url) 262 263 @staticmethod 264 def create_data_asset( 265 name: str, 266 tags: list[str], 267 data_source: DataSource, 268 mount: str, 269 environment: CodeOcean, 270 custom_metadata: dict = None, 271 description: str = None, 272 ): 273 """ 274 Create data asset 275 :param name: Data asset name 276 :param tags: Data asset tags 277 :param data_source: Data source, can be AWSSource, GCPSource, or DataSource 278 :param mount: Mountpoint for data asset in capsule 279 :param environment: CodeOcean environment to create data asset in. 280 :param custom_metadata: Custom metadata values to set 281 :param description: Description of data asset 282 :return: DataAsset 283 """ 284 input_url = f"{environment.api_url}/data_assets" 285 data_source_dict = asdict(data_source) 286 data_type = data_source_dict["type"] 287 del data_source_dict["type"] 288 payload = { 289 "name": name, 290 "tags": tags, 291 "source": {data_type: data_source_dict}, 292 "mount": mount, 293 } 294 if custom_metadata: 295 payload["custom_metadata"] = custom_metadata 296 if description: 297 payload["description"] = description 298 logger.info(f"Creating data asset from {data_type}") 299 response = environment.post(input_url, payload).json() 300 new_data_asset = DataAsset.from_dict( 301 response, environment.domain, environment.api_key 302 ) 303 return new_data_asset 304 305 def update_metadata(self, updated_parameters: UpdateMetadataParams): 306 """ 307 Update metadata for data asset 308 :param updated_parameters: DataAssetUpdateParams with updated values 309 :return: Updated DataAsset object 310 """ 311 logger.info(f"Updating metadata for asset {self.id}") 312 response = self.put(self.data_asset_url, asdict(updated_parameters)).json() 313 new_data_asset = DataAsset.from_dict(response, self.domain, self.api_key) 314 return new_data_asset 315 316 def archive_asset(self): 317 """ 318 Archive data asset 319 """ 320 logger.info(f"Unarchiving asset {self.id}") 321 input_url = f"{self.data_asset_url}/archive" 322 self.patch(input_url, params={"archive": True}) 323 324 def unarchive_asset(self): 325 """ 326 Unarchive data asset 327 """ 328 logger.info(f"Unarchiving asset {self.id}") 329 input_url = f"{self.data_asset_url}/archive" 330 self.patch(input_url, params={"archive": False}) 331 332 @staticmethod 333 def search_data_assets(environment: CodeOcean, search_params: SearchParams): 334 """ 335 Search data assets 336 :param environment: CodeOcean environment 337 :param search_params: DataAssetSearchParams object containing the search parameters 338 :return: DataAssetSearchResults containing the found results 339 """ 340 search_params = asdict(search_params) 341 logger.info(f"Searching data assets assets") 342 logger.debug(search_params) 343 344 input_url = f"{environment.api_url}/data_assets/search" 345 response = environment.post(input_url, search_params).json() 346 search_response = SearchResults( 347 has_more=response["has_more"], 348 results=[ 349 DataAsset.from_dict(x, environment.domain, environment.api_key) 350 for x in response["results"] 351 ], 352 ) 353 return search_response 354 355 def set_permissions(self, permissions: PermissionParams): 356 """ 357 Set permissions on Data Asset 358 :param permissions: Permissions to add or update 359 """ 360 logger.info(f"Setting permissions on {self.id}") 361 permissions = asdict(permissions) 362 logger.debug(permissions) 363 input_url = f"{self.data_asset_url}/permissions" 364 self.post(input_url, permissions)
Number of files in the data asset. Not relevant if the data asset was created with keep_on_external_storage = true and index_data = false.
Size in bytes of the data asset. Not relevant if the data asset was created with keep_on_external_storage = true and index_data = false.
Type of the data asset. (DATA_ASSET_TYPE_DATASET, DATA_ASSET_TYPE_RESULT)
Map of key value pairs, according to custom metadata fields defined by the admin and values that were set by the user
data asset creation state. Can be one of the following: DATA_ASSET_STATE_DRAFT - the data asset is still being created. DATA_ASSET_STATE_READY - the data asset is ready for use. DATA_ASSET_STATE_FAILED - the data asset creation failed.
232 @staticmethod 233 def from_dict(dataset_dict, domain, api_key): 234 """ 235 236 :param dataset_dict: Dictionary containing Dataset parameters 237 :param domain: Code Ocean Domain 238 :param api_key: API key to access data asset 239 :return: DataAsset 240 """ 241 if "sourceBucket" in dataset_dict: 242 dataset_dict["sourceBucket"] = SourceBucket(**dataset_dict["sourceBucket"]) 243 if "provenance" in dataset_dict: 244 dataset_dict["provenance"] = Provenance(**dataset_dict["provenance"]) 245 dataset_dict["domain"] = domain 246 dataset_dict["api_key"] = api_key 247 return DataAsset(**dataset_dict)
Parameters
- dataset_dict: Dictionary containing Dataset parameters
- domain: Code Ocean Domain
- api_key: API key to access data asset
Returns
DataAsset
249 def get_data_asset(self): 250 """ 251 Get data asset parameters for given data asset id. 252 """ 253 logger.debug(f"Retrieving data asset from {self.data_asset_url}") 254 req = self.get(self.data_asset_url) 255 new_comp = self.from_dict(req.json(), self.domain, self.api_key) 256 self.__dict__.update(new_comp.__dict__)
Get data asset parameters for given data asset id.
258 def delete_data_asset(self): 259 """Delete data asset""" 260 logger.info(f"Deleting data asset {self.id}") 261 self.delete(self.data_asset_url)
Delete data asset
263 @staticmethod 264 def create_data_asset( 265 name: str, 266 tags: list[str], 267 data_source: DataSource, 268 mount: str, 269 environment: CodeOcean, 270 custom_metadata: dict = None, 271 description: str = None, 272 ): 273 """ 274 Create data asset 275 :param name: Data asset name 276 :param tags: Data asset tags 277 :param data_source: Data source, can be AWSSource, GCPSource, or DataSource 278 :param mount: Mountpoint for data asset in capsule 279 :param environment: CodeOcean environment to create data asset in. 280 :param custom_metadata: Custom metadata values to set 281 :param description: Description of data asset 282 :return: DataAsset 283 """ 284 input_url = f"{environment.api_url}/data_assets" 285 data_source_dict = asdict(data_source) 286 data_type = data_source_dict["type"] 287 del data_source_dict["type"] 288 payload = { 289 "name": name, 290 "tags": tags, 291 "source": {data_type: data_source_dict}, 292 "mount": mount, 293 } 294 if custom_metadata: 295 payload["custom_metadata"] = custom_metadata 296 if description: 297 payload["description"] = description 298 logger.info(f"Creating data asset from {data_type}") 299 response = environment.post(input_url, payload).json() 300 new_data_asset = DataAsset.from_dict( 301 response, environment.domain, environment.api_key 302 ) 303 return new_data_asset
Create data asset
Parameters
- name: Data asset name
- tags: Data asset tags
- data_source: Data source, can be AWSSource, GCPSource, or DataSource
- mount: Mountpoint for data asset in capsule
- environment: CodeOcean environment to create data asset in.
- custom_metadata: Custom metadata values to set
- description: Description of data asset
Returns
DataAsset
305 def update_metadata(self, updated_parameters: UpdateMetadataParams): 306 """ 307 Update metadata for data asset 308 :param updated_parameters: DataAssetUpdateParams with updated values 309 :return: Updated DataAsset object 310 """ 311 logger.info(f"Updating metadata for asset {self.id}") 312 response = self.put(self.data_asset_url, asdict(updated_parameters)).json() 313 new_data_asset = DataAsset.from_dict(response, self.domain, self.api_key) 314 return new_data_asset
Update metadata for data asset
Parameters
- updated_parameters: DataAssetUpdateParams with updated values
Returns
Updated DataAsset object
316 def archive_asset(self): 317 """ 318 Archive data asset 319 """ 320 logger.info(f"Unarchiving asset {self.id}") 321 input_url = f"{self.data_asset_url}/archive" 322 self.patch(input_url, params={"archive": True})
Archive data asset
324 def unarchive_asset(self): 325 """ 326 Unarchive data asset 327 """ 328 logger.info(f"Unarchiving asset {self.id}") 329 input_url = f"{self.data_asset_url}/archive" 330 self.patch(input_url, params={"archive": False})
Unarchive data asset
332 @staticmethod 333 def search_data_assets(environment: CodeOcean, search_params: SearchParams): 334 """ 335 Search data assets 336 :param environment: CodeOcean environment 337 :param search_params: DataAssetSearchParams object containing the search parameters 338 :return: DataAssetSearchResults containing the found results 339 """ 340 search_params = asdict(search_params) 341 logger.info(f"Searching data assets assets") 342 logger.debug(search_params) 343 344 input_url = f"{environment.api_url}/data_assets/search" 345 response = environment.post(input_url, search_params).json() 346 search_response = SearchResults( 347 has_more=response["has_more"], 348 results=[ 349 DataAsset.from_dict(x, environment.domain, environment.api_key) 350 for x in response["results"] 351 ], 352 ) 353 return search_response
Search data assets
Parameters
- environment: CodeOcean environment
- search_params: DataAssetSearchParams object containing the search parameters
Returns
DataAssetSearchResults containing the found results
355 def set_permissions(self, permissions: PermissionParams): 356 """ 357 Set permissions on Data Asset 358 :param permissions: Permissions to add or update 359 """ 360 logger.info(f"Setting permissions on {self.id}") 361 permissions = asdict(permissions) 362 logger.debug(permissions) 363 input_url = f"{self.data_asset_url}/permissions" 364 self.post(input_url, permissions)
Set permissions on Data Asset
Parameters
- permissions: Permissions to add or update