codeoceansdk.DataAsset

  1import logging
  2from enum import Enum
  3from typing import Optional
  4
  5from dataclasses import dataclass, field, asdict
  6from codeoceansdk.CodeOcean import CodeOcean
  7
  8logger = logging.getLogger(__name__)
  9
 10
 11@dataclass(slots=True)
 12class DataSource:
 13    type: str
 14
 15
 16@dataclass(kw_only=True, slots=True)
 17class GCPCloudStorage(DataSource):
 18    """Google cloud data source"""
 19
 20    bucket: str
 21    """GCP bucket"""
 22    prefix: str = None
 23    """GCP secret"""
 24    client_secret: Optional[str] = None
 25    """GCP secret key"""
 26    client_id: Optional[str] = None
 27    """GCP client id"""
 28    type: str = "gcp"
 29
 30
 31@dataclass(kw_only=True, slots=True)
 32class AWSS3(DataSource):
 33    """AWS S3 source"""
 34
 35    bucket: str
 36    """AWS bucket"""
 37    keep_on_external_storage: bool = None
 38    """When this property is set to true, the data asset files will not be copied over to CO. Also the prefix 
 39    property will be ignored and the entire S3 bucket would be used. """
 40    prefix: str = None
 41    """AWS prefix"""
 42    index_data: bool = None
 43    """When this property is set to true, CO will index the files in the remote bucket, allowing to view 
 44    the file tree in the dataset and capsule pages. This is only relevant when keep_on_external_storage is set to 
 45    true (when keep_on_external_storage is false CO will always index the files) """
 46    access_key_id: Optional[str] = None
 47    """AWS ACCESS KEY ID (only needed when source bucket is private)"""
 48    secret_access_key: Optional[str] = None
 49    """AWS SECRET ACCESS KEY (only needed when source bucket is private)"""
 50    type: str = "aws"
 51
 52
 53@dataclass(kw_only=True, slots=True)
 54class ComputationSource(DataSource):
 55    """Result from computation source"""
 56
 57    id: str
 58    """Metadata id for computation"""
 59    type: str = "computation"
 60
 61
 62@dataclass(frozen=True, slots=True)
 63class SourceBucket:
 64    """In the data asset, what was the original source"""
 65
 66    bucket: str = None
 67    """original bucket’s name"""
 68    origin: Enum("origin", ["aws", "local", "gcp"]) = None
 69    """Which cloud did this come from (aws, local, gcp)."""
 70    prefix: str = None
 71    """Bucket prefix"""
 72
 73
 74@dataclass(frozen=True, slots=True)
 75class UpdateMetadataParams:
 76    name: str = None
 77    """Name of data asset to update"""
 78    description: str = None
 79    """Description of data asset to update"""
 80    mount: str = None
 81    """Mount point of data asset to update"""
 82    custom_metadata: Optional[dict] = field(default_factory=dict)
 83    """Custom metadata to update"""
 84    tags: Optional[list[str]] = field(default_factory=list)
 85    """Tags to update"""
 86
 87
 88@dataclass(frozen=True, slots=True)
 89class SearchRange:
 90    min: int = None
 91    """Minimum of the range"""
 92    max: int = None
 93    """Maximum of the range"""
 94
 95
 96@dataclass(frozen=True, slots=True)
 97class SearchFilter:
 98    key: str
 99    """Field key, can be each of title, description, tags, any custom field"""
100    exclude: bool = None
101    """Whether to include/exclude the field value"""
102    value: Optional[str] = None
103    """Field value to be included/excluded"""
104    values: Optional[list[str]] = field(default_factory=list)
105    """field values in case of multiple values"""
106    range: Optional[SearchRange] = None
107    """Field range to be included/excluded (one of min/max must be set)"""
108
109
110@dataclass(frozen=True, slots=True)
111class SearchParams:
112    query: str = None
113    """determines the search query. can be a free text or in the form of 'name:... tag:... run_script:... 
114    commit_id:...' """
115    offset: Optional[int] = None
116    """If 30 items are returned and this is set to 10, the results 10-20 will be returned"""
117    limit: Optional[int] = None
118    """How many items to return"""
119    sort_order: Optional[Enum("sort_order", ["asc", "desc"])] = None
120    """Sort results by ascending or descending order"""
121    sort_field: Optional[Enum("sort_field", ["created", "type", "name", "size"])] = None
122    """Sort results by specified field"""
123    type: Optional[Enum("type", ["dataset", "result"])] = None
124    """If omitted results may include both datasets and results."""
125    ownership: Optional[Enum("ownership", ["private", "created", "shared"])] = None
126    """search data asset by ownership - created - only datasets created by the user, shared- datasets shared with 
127    the user, private - datasets that the user has not shared"""
128    favorite: Optional[bool] = None
129    """Only search favorited data assets"""
130    archived: Optional[bool] = None
131    """Only search archived data assets"""
132    origin: Optional[Enum("origin", ["internal", "external"])] = None
133    """determines whether to get only external/local datasets"""
134    filters: Optional[list[SearchFilter]] = field(default_factory=list)
135    """List of fields to filter by."""
136
137
138@dataclass(frozen=True, slots=True)
139class SearchResults:
140    has_more: bool
141    """Indicates whether there are more results than those returned"""
142    results: Optional[list] = field(default_factory=list)
143    """DataAssets that match the search criteria"""
144
145
146@dataclass(frozen=True, slots=True)
147class PermissionGroup:
148    """Add/update permissions for a group to give access to a data asset (only relevant with certain
149    SSOs)"""
150
151    name: str
152    """Group name"""
153    role: Enum("role", ["owner", "viewer"])
154    """Role to set for group"""
155
156
157@dataclass(frozen=True, slots=True)
158class PermissionUser:
159    email: str
160    """User email"""
161    role: Enum("role", ["owner", "viewer", "editor"])
162    """Role to set for user"""
163
164
165@dataclass(frozen=True, slots=True)
166class PermissionParams:
167    """Parameters to use to set permissions"""
168
169    users: Optional[list[PermissionUser]] = field(default_factory=list)
170    """Users to add/update permissions for"""
171    groups: Optional[list[PermissionGroup]] = field(default_factory=list)
172    """Group to add/update permissions for"""
173    everyone: Optional[Enum("role", ["viewer", "none"])] = None
174    """Set permissions for everyone. Can only be viewer or none"""
175
176
177@dataclass(frozen=True, slots=True)
178class Provenance:
179    capsule: str = None
180    """Source capsule"""
181    commit: str = None
182    """Git commit for result"""
183    run_script: str = None
184    """Script used to generate results"""
185    docker_image: str = None
186    """Environment docker image used for result"""
187    data_assets: Optional[list[str]] = field(default_factory=list)
188    """Data assets used to generate results"""
189
190
191@dataclass(kw_only=True)
192class DataAsset(CodeOcean):
193    id: str
194    """Metadata id"""
195    created: int = 0
196    """Data asset creation time"""
197    description: str = ""
198    """Description of the data asset."""
199    files: int = 0
200    """Number of files in the data asset. Not relevant if the data asset was created with keep_on_external_storage = 
201    true and index_data = false. """
202    last_used: int = 0
203    """Time this data asset was last used"""
204    size: int = 0
205    """Size in bytes of the data asset. Not relevant if the data asset was created with keep_on_external_storage = 
206    true and index_data = false. """
207    sourceBucket: Optional[SourceBucket] = None
208    """Info on bucket from which dataset was created"""
209    tags: Optional[list[str]] = field(default_factory=list)
210    """Keywords for searching the data asset by."""
211    type: Enum("type", ["dataset", "result"]) = "DATA_ASSET_TYPE_DATASET"
212    """Type of the data asset. (DATA_ASSET_TYPE_DATASET, DATA_ASSET_TYPE_RESULT)"""
213    custom_metadata: dict = field(default_factory=dict)
214    """Map of key value pairs, according to custom metadata fields defined by the admin and values that were set by 
215    the user """
216    app_parameters: Optional[list[dict]] = field(default_factory=list[dict])
217    """Parameters used to generate the data asset"""
218    provenance: Optional[Provenance] = None
219    name: str = ""
220    """Name of dataset"""
221    state: Enum("state", ["draft", "ready", "failed"]) = "DATA_ASSET_STATE_DRAFT"
222    """data asset creation state. Can be one of the following:
223    DATA_ASSET_STATE_DRAFT - the data asset is still being created.
224    DATA_ASSET_STATE_READY - the data asset is ready for use.
225    DATA_ASSET_STATE_FAILED - the data asset creation failed."""
226
227    def __post_init__(self):
228        super().__post_init__()
229        self.data_asset_url = f"{self.api_url}/data_assets/{self.id}"
230
231    @staticmethod
232    def from_dict(dataset_dict, domain, api_key):
233        """
234
235        :param dataset_dict: Dictionary containing Dataset parameters
236        :param domain: Code Ocean Domain
237        :param api_key: API key to access data asset
238        :return: DataAsset
239        """
240        if "sourceBucket" in dataset_dict:
241            dataset_dict["sourceBucket"] = SourceBucket(**dataset_dict["sourceBucket"])
242        if "provenance" in dataset_dict:
243            dataset_dict["provenance"] = Provenance(**dataset_dict["provenance"])
244        dataset_dict["domain"] = domain
245        dataset_dict["api_key"] = api_key
246        return DataAsset(**dataset_dict)
247
248    def get_data_asset(self):
249        """
250        Get data asset parameters for given data asset id.
251        """
252        logger.debug(f"Retrieving data asset from {self.data_asset_url}")
253        req = self.get(self.data_asset_url)
254        new_comp = self.from_dict(req.json(), self.domain, self.api_key)
255        self.__dict__.update(new_comp.__dict__)
256
257    def delete_data_asset(self):
258        """Delete data asset"""
259        logger.info(f"Deleting data asset {self.id}")
260        self.delete(self.data_asset_url)
261
262    @staticmethod
263    def create_data_asset(
264        name: str,
265        tags: list[str],
266        data_source: DataSource,
267        mount: str,
268        environment: CodeOcean,
269        custom_metadata: dict = None,
270        description: str = None,
271    ):
272        """
273        Create data asset
274        :param name: Data asset name
275        :param tags: Data asset tags
276        :param data_source: Data source, can be AWSSource, GCPSource, or DataSource
277        :param mount: Mountpoint for data asset in capsule
278        :param environment: CodeOcean environment to create data asset in.
279        :param custom_metadata: Custom metadata values to set
280        :param description: Description of data asset
281        :return: DataAsset
282        """
283        input_url = f"{environment.api_url}/data_assets"
284        data_source_dict = asdict(data_source)
285        data_type = data_source_dict["type"]
286        del data_source_dict["type"]
287        payload = {
288            "name": name,
289            "tags": tags,
290            "source": {data_type: data_source_dict},
291            "mount": mount,
292        }
293        if custom_metadata:
294            payload["custom_metadata"] = custom_metadata
295        if description:
296            payload["description"] = description
297        logger.info(f"Creating data asset from {data_type}")
298        response = environment.post(input_url, payload).json()
299        new_data_asset = DataAsset.from_dict(
300            response, environment.domain, environment.api_key
301        )
302        return new_data_asset
303
304    def update_metadata(self, updated_parameters: UpdateMetadataParams):
305        """
306        Update metadata for data asset
307        :param updated_parameters: DataAssetUpdateParams with updated values
308        :return: Updated DataAsset object
309        """
310        logger.info(f"Updating metadata for asset {self.id}")
311        response = self.put(self.data_asset_url, asdict(updated_parameters)).json()
312        new_data_asset = DataAsset.from_dict(response, self.domain, self.api_key)
313        return new_data_asset
314
315    def archive_asset(self):
316        """
317        Archive data asset
318        """
319        logger.info(f"Unarchiving asset {self.id}")
320        input_url = f"{self.data_asset_url}/archive"
321        self.patch(input_url, params={"archive": True})
322
323    def unarchive_asset(self):
324        """
325        Unarchive data asset
326        """
327        logger.info(f"Unarchiving asset {self.id}")
328        input_url = f"{self.data_asset_url}/archive"
329        self.patch(input_url, params={"archive": False})
330
331    @staticmethod
332    def search_data_assets(environment: CodeOcean, search_params: SearchParams):
333        """
334        Search data assets
335        :param environment: CodeOcean environment
336        :param search_params: DataAssetSearchParams object containing the search parameters
337        :return: DataAssetSearchResults containing the found results
338        """
339        search_params = asdict(search_params)
340        logger.info(f"Searching data assets assets")
341        logger.debug(search_params)
342
343        input_url = f"{environment.api_url}/data_assets/search"
344        response = environment.post(input_url, search_params).json()
345        search_response = SearchResults(
346            has_more=response["has_more"],
347            results=[
348                DataAsset.from_dict(x, environment.domain, environment.api_key)
349                for x in response["results"]
350            ],
351        )
352        return search_response
353
354    def set_permissions(self, permissions: PermissionParams):
355        """
356        Set permissions on Data Asset
357        :param permissions: Permissions to add or update
358        """
359        logger.info(f"Setting permissions on {self.id}")
360        permissions = asdict(permissions)
361        logger.debug(permissions)
362        input_url = f"{self.data_asset_url}/permissions"
363        self.post(input_url, permissions)
logger = <Logger codeoceansdk.DataAsset (WARNING)>
@dataclass(slots=True)
class DataSource:
12@dataclass(slots=True)
13class DataSource:
14    type: str
DataSource(type: str)
type: str
@dataclass(kw_only=True, slots=True)
class GCPCloudStorage(DataSource):
17@dataclass(kw_only=True, slots=True)
18class GCPCloudStorage(DataSource):
19    """Google cloud data source"""
20
21    bucket: str
22    """GCP bucket"""
23    prefix: str = None
24    """GCP secret"""
25    client_secret: Optional[str] = None
26    """GCP secret key"""
27    client_id: Optional[str] = None
28    """GCP client id"""
29    type: str = "gcp"

Google cloud data source

GCPCloudStorage( *, type: str = 'gcp', bucket: str, prefix: str = None, client_secret: Optional[str] = None, client_id: Optional[str] = None)
bucket: str

GCP bucket

prefix: str

GCP secret

client_secret: Optional[str]

GCP secret key

client_id: Optional[str]

GCP client id

type: str
@dataclass(kw_only=True, slots=True)
class AWSS3(DataSource):
32@dataclass(kw_only=True, slots=True)
33class AWSS3(DataSource):
34    """AWS S3 source"""
35
36    bucket: str
37    """AWS bucket"""
38    keep_on_external_storage: bool = None
39    """When this property is set to true, the data asset files will not be copied over to CO. Also the prefix 
40    property will be ignored and the entire S3 bucket would be used. """
41    prefix: str = None
42    """AWS prefix"""
43    index_data: bool = None
44    """When this property is set to true, CO will index the files in the remote bucket, allowing to view 
45    the file tree in the dataset and capsule pages. This is only relevant when keep_on_external_storage is set to 
46    true (when keep_on_external_storage is false CO will always index the files) """
47    access_key_id: Optional[str] = None
48    """AWS ACCESS KEY ID (only needed when source bucket is private)"""
49    secret_access_key: Optional[str] = None
50    """AWS SECRET ACCESS KEY (only needed when source bucket is private)"""
51    type: str = "aws"

AWS S3 source

AWSS3( *, type: str = 'aws', bucket: str, keep_on_external_storage: bool = None, prefix: str = None, index_data: bool = None, access_key_id: Optional[str] = None, secret_access_key: Optional[str] = None)
bucket: str

AWS bucket

keep_on_external_storage: bool

When this property is set to true, the data asset files will not be copied over to CO. Also the prefix property will be ignored and the entire S3 bucket would be used.

prefix: str

AWS prefix

index_data: bool

When this property is set to true, CO will index the files in the remote bucket, allowing to view the file tree in the dataset and capsule pages. This is only relevant when keep_on_external_storage is set to true (when keep_on_external_storage is false CO will always index the files)

access_key_id: Optional[str]

AWS ACCESS KEY ID (only needed when source bucket is private)

secret_access_key: Optional[str]

AWS SECRET ACCESS KEY (only needed when source bucket is private)

type: str
@dataclass(kw_only=True, slots=True)
class ComputationSource(DataSource):
54@dataclass(kw_only=True, slots=True)
55class ComputationSource(DataSource):
56    """Result from computation source"""
57
58    id: str
59    """Metadata id for computation"""
60    type: str = "computation"

Result from computation source

ComputationSource(*, type: str = 'computation', id: str)
id: str

Metadata id for computation

type: str
@dataclass(frozen=True, slots=True)
class SourceBucket:
63@dataclass(frozen=True, slots=True)
64class SourceBucket:
65    """In the data asset, what was the original source"""
66
67    bucket: str = None
68    """original bucket’s name"""
69    origin: Enum("origin", ["aws", "local", "gcp"]) = None
70    """Which cloud did this come from (aws, local, gcp)."""
71    prefix: str = None
72    """Bucket prefix"""

In the data asset, what was the original source

SourceBucket( bucket: str = None, origin: codeoceansdk.DataAsset.origin = None, prefix: str = None)
bucket: str

original bucket’s name

origin: codeoceansdk.DataAsset.origin

Which cloud did this come from (aws, local, gcp).

prefix: str

Bucket prefix

@dataclass(frozen=True, slots=True)
class UpdateMetadataParams:
75@dataclass(frozen=True, slots=True)
76class UpdateMetadataParams:
77    name: str = None
78    """Name of data asset to update"""
79    description: str = None
80    """Description of data asset to update"""
81    mount: str = None
82    """Mount point of data asset to update"""
83    custom_metadata: Optional[dict] = field(default_factory=dict)
84    """Custom metadata to update"""
85    tags: Optional[list[str]] = field(default_factory=list)
86    """Tags to update"""
UpdateMetadataParams( name: str = None, description: str = None, mount: str = None, custom_metadata: Optional[dict] = <factory>, tags: Optional[list[str]] = <factory>)
name: str

Name of data asset to update

description: str

Description of data asset to update

mount: str

Mount point of data asset to update

custom_metadata: Optional[dict]

Custom metadata to update

tags: Optional[list[str]]

Tags to update

@dataclass(frozen=True, slots=True)
class SearchRange:
89@dataclass(frozen=True, slots=True)
90class SearchRange:
91    min: int = None
92    """Minimum of the range"""
93    max: int = None
94    """Maximum of the range"""
SearchRange(min: int = None, max: int = None)
min: int

Minimum of the range

max: int

Maximum of the range

@dataclass(frozen=True, slots=True)
class SearchFilter:
 97@dataclass(frozen=True, slots=True)
 98class SearchFilter:
 99    key: str
100    """Field key, can be each of title, description, tags, any custom field"""
101    exclude: bool = None
102    """Whether to include/exclude the field value"""
103    value: Optional[str] = None
104    """Field value to be included/excluded"""
105    values: Optional[list[str]] = field(default_factory=list)
106    """field values in case of multiple values"""
107    range: Optional[SearchRange] = None
108    """Field range to be included/excluded (one of min/max must be set)"""
SearchFilter( key: str, exclude: bool = None, value: Optional[str] = None, values: Optional[list[str]] = <factory>, range: Optional[SearchRange] = None)
key: str

Field key, can be each of title, description, tags, any custom field

exclude: bool

Whether to include/exclude the field value

value: Optional[str]

Field value to be included/excluded

values: Optional[list[str]]

field values in case of multiple values

range: Optional[SearchRange]

Field range to be included/excluded (one of min/max must be set)

@dataclass(frozen=True, slots=True)
class SearchParams:
111@dataclass(frozen=True, slots=True)
112class SearchParams:
113    query: str = None
114    """determines the search query. can be a free text or in the form of 'name:... tag:... run_script:... 
115    commit_id:...' """
116    offset: Optional[int] = None
117    """If 30 items are returned and this is set to 10, the results 10-20 will be returned"""
118    limit: Optional[int] = None
119    """How many items to return"""
120    sort_order: Optional[Enum("sort_order", ["asc", "desc"])] = None
121    """Sort results by ascending or descending order"""
122    sort_field: Optional[Enum("sort_field", ["created", "type", "name", "size"])] = None
123    """Sort results by specified field"""
124    type: Optional[Enum("type", ["dataset", "result"])] = None
125    """If omitted results may include both datasets and results."""
126    ownership: Optional[Enum("ownership", ["private", "created", "shared"])] = None
127    """search data asset by ownership - created - only datasets created by the user, shared- datasets shared with 
128    the user, private - datasets that the user has not shared"""
129    favorite: Optional[bool] = None
130    """Only search favorited data assets"""
131    archived: Optional[bool] = None
132    """Only search archived data assets"""
133    origin: Optional[Enum("origin", ["internal", "external"])] = None
134    """determines whether to get only external/local datasets"""
135    filters: Optional[list[SearchFilter]] = field(default_factory=list)
136    """List of fields to filter by."""
SearchParams( query: str = None, offset: Optional[int] = None, limit: Optional[int] = None, sort_order: Optional[codeoceansdk.DataAsset.sort_order] = None, sort_field: Optional[codeoceansdk.DataAsset.sort_field] = None, type: Optional[codeoceansdk.DataAsset.type] = None, ownership: Optional[codeoceansdk.DataAsset.ownership] = None, favorite: Optional[bool] = None, archived: Optional[bool] = None, origin: Optional[codeoceansdk.DataAsset.origin] = None, filters: Optional[list[SearchFilter]] = <factory>)
query: str

determines the search query. can be a free text or in the form of 'name:... tag:... run_script:... commit_id:...'

offset: Optional[int]

If 30 items are returned and this is set to 10, the results 10-20 will be returned

limit: Optional[int]

How many items to return

sort_order: Optional[codeoceansdk.DataAsset.sort_order]

Sort results by ascending or descending order

sort_field: Optional[codeoceansdk.DataAsset.sort_field]

Sort results by specified field

type: Optional[codeoceansdk.DataAsset.type]

If omitted results may include both datasets and results.

ownership: Optional[codeoceansdk.DataAsset.ownership]

search data asset by ownership - created - only datasets created by the user, shared- datasets shared with the user, private - datasets that the user has not shared

favorite: Optional[bool]

Only search favorited data assets

archived: Optional[bool]

Only search archived data assets

origin: Optional[codeoceansdk.DataAsset.origin]

determines whether to get only external/local datasets

filters: Optional[list[SearchFilter]]

List of fields to filter by.

@dataclass(frozen=True, slots=True)
class SearchResults:
139@dataclass(frozen=True, slots=True)
140class SearchResults:
141    has_more: bool
142    """Indicates whether there are more results than those returned"""
143    results: Optional[list] = field(default_factory=list)
144    """DataAssets that match the search criteria"""
SearchResults(has_more: bool, results: Optional[list] = <factory>)
has_more: bool

Indicates whether there are more results than those returned

results: Optional[list]

DataAssets that match the search criteria

@dataclass(frozen=True, slots=True)
class PermissionGroup:
147@dataclass(frozen=True, slots=True)
148class PermissionGroup:
149    """Add/update permissions for a group to give access to a data asset (only relevant with certain
150    SSOs)"""
151
152    name: str
153    """Group name"""
154    role: Enum("role", ["owner", "viewer"])
155    """Role to set for group"""

Add/update permissions for a group to give access to a data asset (only relevant with certain SSOs)

PermissionGroup(name: str, role: codeoceansdk.DataAsset.role)
name: str

Group name

role: codeoceansdk.DataAsset.role

Role to set for group

@dataclass(frozen=True, slots=True)
class PermissionUser:
158@dataclass(frozen=True, slots=True)
159class PermissionUser:
160    email: str
161    """User email"""
162    role: Enum("role", ["owner", "viewer", "editor"])
163    """Role to set for user"""
PermissionUser(email: str, role: codeoceansdk.DataAsset.role)
email: str

User email

role: codeoceansdk.DataAsset.role

Role to set for user

@dataclass(frozen=True, slots=True)
class PermissionParams:
166@dataclass(frozen=True, slots=True)
167class PermissionParams:
168    """Parameters to use to set permissions"""
169
170    users: Optional[list[PermissionUser]] = field(default_factory=list)
171    """Users to add/update permissions for"""
172    groups: Optional[list[PermissionGroup]] = field(default_factory=list)
173    """Group to add/update permissions for"""
174    everyone: Optional[Enum("role", ["viewer", "none"])] = None
175    """Set permissions for everyone. Can only be viewer or none"""

Parameters to use to set permissions

PermissionParams( users: Optional[list[PermissionUser]] = <factory>, groups: Optional[list[PermissionGroup]] = <factory>, everyone: Optional[codeoceansdk.DataAsset.role] = None)
users: Optional[list[PermissionUser]]

Users to add/update permissions for

groups: Optional[list[PermissionGroup]]

Group to add/update permissions for

everyone: Optional[codeoceansdk.DataAsset.role]

Set permissions for everyone. Can only be viewer or none

@dataclass(frozen=True, slots=True)
class Provenance:
178@dataclass(frozen=True, slots=True)
179class Provenance:
180    capsule: str = None
181    """Source capsule"""
182    commit: str = None
183    """Git commit for result"""
184    run_script: str = None
185    """Script used to generate results"""
186    docker_image: str = None
187    """Environment docker image used for result"""
188    data_assets: Optional[list[str]] = field(default_factory=list)
189    """Data assets used to generate results"""
Provenance( capsule: str = None, commit: str = None, run_script: str = None, docker_image: str = None, data_assets: Optional[list[str]] = <factory>)
capsule: str

Source capsule

commit: str

Git commit for result

run_script: str

Script used to generate results

docker_image: str

Environment docker image used for result

data_assets: Optional[list[str]]

Data assets used to generate results

@dataclass(kw_only=True)
class DataAsset(codeoceansdk.CodeOcean.CodeOcean):
192@dataclass(kw_only=True)
193class DataAsset(CodeOcean):
194    id: str
195    """Metadata id"""
196    created: int = 0
197    """Data asset creation time"""
198    description: str = ""
199    """Description of the data asset."""
200    files: int = 0
201    """Number of files in the data asset. Not relevant if the data asset was created with keep_on_external_storage = 
202    true and index_data = false. """
203    last_used: int = 0
204    """Time this data asset was last used"""
205    size: int = 0
206    """Size in bytes of the data asset. Not relevant if the data asset was created with keep_on_external_storage = 
207    true and index_data = false. """
208    sourceBucket: Optional[SourceBucket] = None
209    """Info on bucket from which dataset was created"""
210    tags: Optional[list[str]] = field(default_factory=list)
211    """Keywords for searching the data asset by."""
212    type: Enum("type", ["dataset", "result"]) = "DATA_ASSET_TYPE_DATASET"
213    """Type of the data asset. (DATA_ASSET_TYPE_DATASET, DATA_ASSET_TYPE_RESULT)"""
214    custom_metadata: dict = field(default_factory=dict)
215    """Map of key value pairs, according to custom metadata fields defined by the admin and values that were set by 
216    the user """
217    app_parameters: Optional[list[dict]] = field(default_factory=list[dict])
218    """Parameters used to generate the data asset"""
219    provenance: Optional[Provenance] = None
220    name: str = ""
221    """Name of dataset"""
222    state: Enum("state", ["draft", "ready", "failed"]) = "DATA_ASSET_STATE_DRAFT"
223    """data asset creation state. Can be one of the following:
224    DATA_ASSET_STATE_DRAFT - the data asset is still being created.
225    DATA_ASSET_STATE_READY - the data asset is ready for use.
226    DATA_ASSET_STATE_FAILED - the data asset creation failed."""
227
228    def __post_init__(self):
229        super().__post_init__()
230        self.data_asset_url = f"{self.api_url}/data_assets/{self.id}"
231
232    @staticmethod
233    def from_dict(dataset_dict, domain, api_key):
234        """
235
236        :param dataset_dict: Dictionary containing Dataset parameters
237        :param domain: Code Ocean Domain
238        :param api_key: API key to access data asset
239        :return: DataAsset
240        """
241        if "sourceBucket" in dataset_dict:
242            dataset_dict["sourceBucket"] = SourceBucket(**dataset_dict["sourceBucket"])
243        if "provenance" in dataset_dict:
244            dataset_dict["provenance"] = Provenance(**dataset_dict["provenance"])
245        dataset_dict["domain"] = domain
246        dataset_dict["api_key"] = api_key
247        return DataAsset(**dataset_dict)
248
249    def get_data_asset(self):
250        """
251        Get data asset parameters for given data asset id.
252        """
253        logger.debug(f"Retrieving data asset from {self.data_asset_url}")
254        req = self.get(self.data_asset_url)
255        new_comp = self.from_dict(req.json(), self.domain, self.api_key)
256        self.__dict__.update(new_comp.__dict__)
257
258    def delete_data_asset(self):
259        """Delete data asset"""
260        logger.info(f"Deleting data asset {self.id}")
261        self.delete(self.data_asset_url)
262
263    @staticmethod
264    def create_data_asset(
265        name: str,
266        tags: list[str],
267        data_source: DataSource,
268        mount: str,
269        environment: CodeOcean,
270        custom_metadata: dict = None,
271        description: str = None,
272    ):
273        """
274        Create data asset
275        :param name: Data asset name
276        :param tags: Data asset tags
277        :param data_source: Data source, can be AWSSource, GCPSource, or DataSource
278        :param mount: Mountpoint for data asset in capsule
279        :param environment: CodeOcean environment to create data asset in.
280        :param custom_metadata: Custom metadata values to set
281        :param description: Description of data asset
282        :return: DataAsset
283        """
284        input_url = f"{environment.api_url}/data_assets"
285        data_source_dict = asdict(data_source)
286        data_type = data_source_dict["type"]
287        del data_source_dict["type"]
288        payload = {
289            "name": name,
290            "tags": tags,
291            "source": {data_type: data_source_dict},
292            "mount": mount,
293        }
294        if custom_metadata:
295            payload["custom_metadata"] = custom_metadata
296        if description:
297            payload["description"] = description
298        logger.info(f"Creating data asset from {data_type}")
299        response = environment.post(input_url, payload).json()
300        new_data_asset = DataAsset.from_dict(
301            response, environment.domain, environment.api_key
302        )
303        return new_data_asset
304
305    def update_metadata(self, updated_parameters: UpdateMetadataParams):
306        """
307        Update metadata for data asset
308        :param updated_parameters: DataAssetUpdateParams with updated values
309        :return: Updated DataAsset object
310        """
311        logger.info(f"Updating metadata for asset {self.id}")
312        response = self.put(self.data_asset_url, asdict(updated_parameters)).json()
313        new_data_asset = DataAsset.from_dict(response, self.domain, self.api_key)
314        return new_data_asset
315
316    def archive_asset(self):
317        """
318        Archive data asset
319        """
320        logger.info(f"Unarchiving asset {self.id}")
321        input_url = f"{self.data_asset_url}/archive"
322        self.patch(input_url, params={"archive": True})
323
324    def unarchive_asset(self):
325        """
326        Unarchive data asset
327        """
328        logger.info(f"Unarchiving asset {self.id}")
329        input_url = f"{self.data_asset_url}/archive"
330        self.patch(input_url, params={"archive": False})
331
332    @staticmethod
333    def search_data_assets(environment: CodeOcean, search_params: SearchParams):
334        """
335        Search data assets
336        :param environment: CodeOcean environment
337        :param search_params: DataAssetSearchParams object containing the search parameters
338        :return: DataAssetSearchResults containing the found results
339        """
340        search_params = asdict(search_params)
341        logger.info(f"Searching data assets assets")
342        logger.debug(search_params)
343
344        input_url = f"{environment.api_url}/data_assets/search"
345        response = environment.post(input_url, search_params).json()
346        search_response = SearchResults(
347            has_more=response["has_more"],
348            results=[
349                DataAsset.from_dict(x, environment.domain, environment.api_key)
350                for x in response["results"]
351            ],
352        )
353        return search_response
354
355    def set_permissions(self, permissions: PermissionParams):
356        """
357        Set permissions on Data Asset
358        :param permissions: Permissions to add or update
359        """
360        logger.info(f"Setting permissions on {self.id}")
361        permissions = asdict(permissions)
362        logger.debug(permissions)
363        input_url = f"{self.data_asset_url}/permissions"
364        self.post(input_url, permissions)
DataAsset( *, domain: str, api_key: str, id: str, created: int = 0, description: str = '', files: int = 0, last_used: int = 0, size: int = 0, sourceBucket: Optional[SourceBucket] = None, tags: Optional[list[str]] = <factory>, type: codeoceansdk.DataAsset.type = 'DATA_ASSET_TYPE_DATASET', custom_metadata: dict = <factory>, app_parameters: Optional[list[dict]] = <factory>, provenance: Optional[Provenance] = None, name: str = '', state: codeoceansdk.DataAsset.state = 'DATA_ASSET_STATE_DRAFT')
id: str

Metadata id

created: int = 0

Data asset creation time

description: str = ''

Description of the data asset.

files: int = 0

Number of files in the data asset. Not relevant if the data asset was created with keep_on_external_storage = true and index_data = false.

last_used: int = 0

Time this data asset was last used

size: int = 0

Size in bytes of the data asset. Not relevant if the data asset was created with keep_on_external_storage = true and index_data = false.

sourceBucket: Optional[SourceBucket] = None

Info on bucket from which dataset was created

tags: Optional[list[str]]

Keywords for searching the data asset by.

type: codeoceansdk.DataAsset.type = 'DATA_ASSET_TYPE_DATASET'

Type of the data asset. (DATA_ASSET_TYPE_DATASET, DATA_ASSET_TYPE_RESULT)

custom_metadata: dict

Map of key value pairs, according to custom metadata fields defined by the admin and values that were set by the user

app_parameters: Optional[list[dict]]

Parameters used to generate the data asset

provenance: Optional[Provenance] = None
name: str = ''

Name of dataset

state: codeoceansdk.DataAsset.state = 'DATA_ASSET_STATE_DRAFT'

data asset creation state. Can be one of the following: DATA_ASSET_STATE_DRAFT - the data asset is still being created. DATA_ASSET_STATE_READY - the data asset is ready for use. DATA_ASSET_STATE_FAILED - the data asset creation failed.

@staticmethod
def from_dict(dataset_dict, domain, api_key):
232    @staticmethod
233    def from_dict(dataset_dict, domain, api_key):
234        """
235
236        :param dataset_dict: Dictionary containing Dataset parameters
237        :param domain: Code Ocean Domain
238        :param api_key: API key to access data asset
239        :return: DataAsset
240        """
241        if "sourceBucket" in dataset_dict:
242            dataset_dict["sourceBucket"] = SourceBucket(**dataset_dict["sourceBucket"])
243        if "provenance" in dataset_dict:
244            dataset_dict["provenance"] = Provenance(**dataset_dict["provenance"])
245        dataset_dict["domain"] = domain
246        dataset_dict["api_key"] = api_key
247        return DataAsset(**dataset_dict)
Parameters
  • dataset_dict: Dictionary containing Dataset parameters
  • domain: Code Ocean Domain
  • api_key: API key to access data asset
Returns

DataAsset

def get_data_asset(self):
249    def get_data_asset(self):
250        """
251        Get data asset parameters for given data asset id.
252        """
253        logger.debug(f"Retrieving data asset from {self.data_asset_url}")
254        req = self.get(self.data_asset_url)
255        new_comp = self.from_dict(req.json(), self.domain, self.api_key)
256        self.__dict__.update(new_comp.__dict__)

Get data asset parameters for given data asset id.

def delete_data_asset(self):
258    def delete_data_asset(self):
259        """Delete data asset"""
260        logger.info(f"Deleting data asset {self.id}")
261        self.delete(self.data_asset_url)

Delete data asset

@staticmethod
def create_data_asset( name: str, tags: list[str], data_source: DataSource, mount: str, environment: codeoceansdk.CodeOcean.CodeOcean, custom_metadata: dict = None, description: str = None):
263    @staticmethod
264    def create_data_asset(
265        name: str,
266        tags: list[str],
267        data_source: DataSource,
268        mount: str,
269        environment: CodeOcean,
270        custom_metadata: dict = None,
271        description: str = None,
272    ):
273        """
274        Create data asset
275        :param name: Data asset name
276        :param tags: Data asset tags
277        :param data_source: Data source, can be AWSSource, GCPSource, or DataSource
278        :param mount: Mountpoint for data asset in capsule
279        :param environment: CodeOcean environment to create data asset in.
280        :param custom_metadata: Custom metadata values to set
281        :param description: Description of data asset
282        :return: DataAsset
283        """
284        input_url = f"{environment.api_url}/data_assets"
285        data_source_dict = asdict(data_source)
286        data_type = data_source_dict["type"]
287        del data_source_dict["type"]
288        payload = {
289            "name": name,
290            "tags": tags,
291            "source": {data_type: data_source_dict},
292            "mount": mount,
293        }
294        if custom_metadata:
295            payload["custom_metadata"] = custom_metadata
296        if description:
297            payload["description"] = description
298        logger.info(f"Creating data asset from {data_type}")
299        response = environment.post(input_url, payload).json()
300        new_data_asset = DataAsset.from_dict(
301            response, environment.domain, environment.api_key
302        )
303        return new_data_asset

Create data asset

Parameters
  • name: Data asset name
  • tags: Data asset tags
  • data_source: Data source, can be AWSSource, GCPSource, or DataSource
  • mount: Mountpoint for data asset in capsule
  • environment: CodeOcean environment to create data asset in.
  • custom_metadata: Custom metadata values to set
  • description: Description of data asset
Returns

DataAsset

def update_metadata( self, updated_parameters: UpdateMetadataParams):
305    def update_metadata(self, updated_parameters: UpdateMetadataParams):
306        """
307        Update metadata for data asset
308        :param updated_parameters: DataAssetUpdateParams with updated values
309        :return: Updated DataAsset object
310        """
311        logger.info(f"Updating metadata for asset {self.id}")
312        response = self.put(self.data_asset_url, asdict(updated_parameters)).json()
313        new_data_asset = DataAsset.from_dict(response, self.domain, self.api_key)
314        return new_data_asset

Update metadata for data asset

Parameters
  • updated_parameters: DataAssetUpdateParams with updated values
Returns

Updated DataAsset object

def archive_asset(self):
316    def archive_asset(self):
317        """
318        Archive data asset
319        """
320        logger.info(f"Unarchiving asset {self.id}")
321        input_url = f"{self.data_asset_url}/archive"
322        self.patch(input_url, params={"archive": True})

Archive data asset

def unarchive_asset(self):
324    def unarchive_asset(self):
325        """
326        Unarchive data asset
327        """
328        logger.info(f"Unarchiving asset {self.id}")
329        input_url = f"{self.data_asset_url}/archive"
330        self.patch(input_url, params={"archive": False})

Unarchive data asset

@staticmethod
def search_data_assets( environment: codeoceansdk.CodeOcean.CodeOcean, search_params: SearchParams):
332    @staticmethod
333    def search_data_assets(environment: CodeOcean, search_params: SearchParams):
334        """
335        Search data assets
336        :param environment: CodeOcean environment
337        :param search_params: DataAssetSearchParams object containing the search parameters
338        :return: DataAssetSearchResults containing the found results
339        """
340        search_params = asdict(search_params)
341        logger.info(f"Searching data assets assets")
342        logger.debug(search_params)
343
344        input_url = f"{environment.api_url}/data_assets/search"
345        response = environment.post(input_url, search_params).json()
346        search_response = SearchResults(
347            has_more=response["has_more"],
348            results=[
349                DataAsset.from_dict(x, environment.domain, environment.api_key)
350                for x in response["results"]
351            ],
352        )
353        return search_response

Search data assets

Parameters
  • environment: CodeOcean environment
  • search_params: DataAssetSearchParams object containing the search parameters
Returns

DataAssetSearchResults containing the found results

def set_permissions(self, permissions: PermissionParams):
355    def set_permissions(self, permissions: PermissionParams):
356        """
357        Set permissions on Data Asset
358        :param permissions: Permissions to add or update
359        """
360        logger.info(f"Setting permissions on {self.id}")
361        permissions = asdict(permissions)
362        logger.debug(permissions)
363        input_url = f"{self.data_asset_url}/permissions"
364        self.post(input_url, permissions)

Set permissions on Data Asset

Parameters
  • permissions: Permissions to add or update