-
Notifications
You must be signed in to change notification settings - Fork 23
#330 - Enable deep copy #331
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| TypeCheckError, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| TypeSystem, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| TypeSystemMode, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| load_typesystem, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| _validator_optional_string = validators.optional(validators.instance_of(str)) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -832,6 +833,119 @@ def _copy(self) -> "Cas": | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| result._xmi_id_generator = self._xmi_id_generator | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| return result | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| def deep_copy(self, copy_typesystem: bool = False) -> "Cas": | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Create and return a deep copy of this CAS object. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| All feature structures, views, and sofas are copied. If `copy_typesystem` is True, the typesystem is also deep-copied; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| otherwise, the original typesystem is shared between the original and the copy. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Args: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| copy_typesystem (bool): Whether to copy the original typesystem or not. If True, the typesystem is deep-copied. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Returns: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Cas: A deep copy of this CAS object. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ts = self.typesystem | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| if copy_typesystem: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ts = self.typesystem.to_xml() | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ts = load_typesystem(ts) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| cas_copy = Cas(ts, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| document_language=self.document_language, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| lenient=self._lenient, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| sofa_mime=self.sofa_mime, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| cas_copy._views = {} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| cas_copy._sofas = {} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| for sofa in self.sofas: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| sofa_copy = Sofa( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| sofaID=sofa.sofaID, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| sofaNum=sofa.sofaNum, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type=ts.get_type(sofa.type.name), | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| xmiID=sofa.xmiID, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| sofa_copy.mimeType = sofa.mimeType | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| sofa_copy.sofaArray = sofa.sofaArray | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| sofa_copy.sofaString = sofa.sofaString | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| sofa_copy.sofaURI = sofa.sofaURI | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| cas_copy._sofas[sofa_copy.sofaID] = sofa_copy | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| cas_copy._views[sofa_copy.sofaID] = View(sofa=sofa_copy) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # removes the _IntialView created with the initialization of the copied CAS | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # removes the _IntialView created with the initialization of the copied CAS | |
| # removes the _InitialView created with the initialization of the copied CAS |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes!
Copilot
AI
Jan 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When copying primitive collections (line 899), the code directly assigns elements from the original collection to the copy. This creates a shallow copy of the elements list. If the elements are mutable (e.g., lists of bytes), modifications to one could affect the other. Consider creating a deep copy of the elements: fs_copy[feature.name].elements = list(fs.get(feature.name).elements) or using copy.deepcopy.
| fs_copy[feature.name].elements = fs.get(feature.name).elements | |
| fs_copy[feature.name].elements = list(fs.get(feature.name).elements) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes!
Copilot
AI
Jan 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When copying arrays (line 904), the code doesn't handle None elements properly. If an element in the array is None, it will be skipped when building the referenced_list, but the copied array won't include None at that position. This could result in arrays with different lengths or element orders than the original. Consider preserving None elements or storing their positions.
Copilot
AI
Jan 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code doesn't handle the case where a primitive collection feature value (line 897-899) or an array feature value (line 900-908) could be None. If fs.get(feature.name) returns None for these feature types, the code will fail with an AttributeError when trying to access .elements. Add a check for None before accessing .elements.
| fs_copy[feature.name] = ts.get_type(feature.rangeType.name)() | |
| fs_copy[feature.name].elements = fs.get(feature.name).elements | |
| elif ts.is_array(feature.rangeType): | |
| fs_copy[feature.name] = ts.get_type(TYPE_NAME_FS_ARRAY)() | |
| # collect referenced xmiIDs for mapping later | |
| referenced_list = [] | |
| for item in fs[feature.name].elements: | |
| if hasattr(item, 'xmiID') and item.xmiID is not None: | |
| referenced_list.append(item.xmiID) | |
| referenced_arrays.setdefault(fs.xmiID, {}) | |
| referenced_arrays[fs.xmiID][feature.name] = referenced_list | |
| original_value = fs.get(feature.name) | |
| if original_value is not None: | |
| fs_copy[feature.name] = ts.get_type(feature.rangeType.name)() | |
| fs_copy[feature.name].elements = original_value.elements | |
| else: | |
| fs_copy[feature.name] = None | |
| elif ts.is_array(feature.rangeType): | |
| original_value = fs.get(feature.name) | |
| if original_value is not None: | |
| fs_copy[feature.name] = ts.get_type(TYPE_NAME_FS_ARRAY)() | |
| # collect referenced xmiIDs for mapping later | |
| referenced_list = [] | |
| for item in original_value.elements: | |
| if hasattr(item, 'xmiID') and item.xmiID is not None: | |
| referenced_list.append(item.xmiID) | |
| referenced_arrays.setdefault(fs.xmiID, {}) | |
| referenced_arrays[fs.xmiID][feature.name] = referenced_list | |
| else: | |
| fs_copy[feature.name] = None |
Copilot
AI
Jan 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code checks if fs[feature.name] has an xmiID attribute (line 913), but doesn't handle the case where fs[feature.name] could be None. This will cause an error. Add a None check before accessing attributes: if fs[feature.name] is not None and hasattr(...)
| if hasattr(fs[feature.name], 'xmiID') and fs[feature.name].xmiID is not None: | |
| references.setdefault(feature.name, []) | |
| references[feature.name].append((fs.xmiID, fs[feature.name].xmiID)) | |
| feature_value = fs[feature.name] | |
| if feature_value is not None and hasattr(feature_value, 'xmiID') and feature_value.xmiID is not None: | |
| references.setdefault(feature.name, []) | |
| references[feature.name].append((fs.xmiID, feature_value.xmiID)) |
Copilot
AI
Jan 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Grammatical error in warning message: "was and not copied" should be "was not copied"
| warnings.warn(f"Original non-primitive feature \"{feature.name}\" was and not copied from feature structure {fs.xmiID}.") | |
| warnings.warn(f"Original non-primitive feature \"{feature.name}\" was not copied from feature structure {fs.xmiID}.") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes!
Copilot
AI
Jan 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The deep_copy method doesn't handle FSList (linked list) types. While it handles primitive collections and FSArrays, FSList types (like NonEmptyFSList) are not explicitly handled in the feature copying logic (lines 894-918). These will fall into the else clause on line 912, where they'll be treated as single references, which is incorrect for list structures. Consider adding explicit handling for FSList types similar to how FSArray is handled.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@reckart: I haven't seen a CAS containing an FSList yet. Do you know where I could find one?
Copilot
AI
Jan 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On line 933, the code assumes all reference_IDs in referenced_list exist in all_copied_fs, but doesn't handle the case where a reference might be missing. This will raise a KeyError. Consider adding error handling similar to lines 925-928, or filter out missing references.
| elements = [all_copied_fs[reference_ID] for reference_ID in referenced_list] | |
| all_copied_fs[current_ID][feature].elements = elements | |
| elements = [] | |
| for reference_ID in referenced_list: | |
| try: | |
| elements.append(all_copied_fs[reference_ID]) | |
| except KeyError: | |
| warnings.warn( | |
| f"Reference {reference_ID} not found for feature '{feature}' of feature structure {current_ID}" | |
| ) | |
| try: | |
| all_copied_fs[current_ID][feature].elements = elements | |
| except KeyError: | |
| warnings.warn( | |
| f"Feature '{feature}' or feature structure {current_ID} not found when setting array references" | |
| ) |
Copilot
AI
Jan 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The variable 'feature_structures' is created on line 937 but never used. It appears this was meant to iterate over the sorted list, but line 938 iterates over all_copied_fs.values() instead, which is unsorted. This could lead to issues if feature structures need to be added in a specific order.
| for item in all_copied_fs.values(): | |
| for item in feature_structures: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, but I would remove the unused variable instead.
reckart marked this conversation as resolved.
Show resolved
Hide resolved
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,7 @@ | |
| AnnotationHasNoSofa, | ||
| ) | ||
| from tests.fixtures import * | ||
| from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator | ||
|
|
||
| # Cas | ||
|
|
||
|
|
@@ -540,3 +541,64 @@ def test_covered_text_on_annotation_without_sofa(): | |
|
|
||
| with pytest.raises(AnnotationHasNoSofa): | ||
| ann.get_covered_text() | ||
|
|
||
|
|
||
| def test_deep_copy_without_typesystem(small_xmi, small_typesystem_xml): | ||
| org = load_cas_from_xmi(small_xmi, typesystem=load_typesystem(small_typesystem_xml)) | ||
| copy = org.deep_copy(copy_typesystem=False) | ||
|
|
||
| assert org != copy | ||
| assert len(copy.to_json(pretty_print=True)) == len(org.to_json(pretty_print=True)) | ||
| assert copy.to_json(pretty_print=True) == org.to_json(pretty_print=True) | ||
|
|
||
| assert org.typesystem == copy.typesystem | ||
|
|
||
|
|
||
| def test_deep_copy_with_typesystem(small_xmi, small_typesystem_xml): | ||
| org = load_cas_from_xmi(small_xmi, typesystem=load_typesystem(small_typesystem_xml)) | ||
| copy = org.deep_copy(copy_typesystem=True) | ||
|
|
||
| assert org != copy | ||
| assert len(copy.to_json(pretty_print=True)) == len(org.to_json(pretty_print=True)) | ||
| assert copy.to_json(pretty_print=True) == org.to_json(pretty_print=True) | ||
|
|
||
|
|
||
| assert org.typesystem != copy.typesystem | ||
| assert len(org.typesystem.to_xml()) == len(copy.typesystem.to_xml()) | ||
| assert org.typesystem.to_xml() == copy.typesystem.to_xml() | ||
|
|
||
|
|
||
| def test_random_multi_type_random_deep_copy(): | ||
| generator = MultiTypeRandomCasGenerator() | ||
| for i in range(0, 10): | ||
| generator.size = (i + 1) * 10 | ||
| generator.type_count = i + 1 | ||
| typesystem = generator.generate_type_system() | ||
| org = generator.generate_cas(typesystem) | ||
| print(f"CAS size: {sum(len(view.get_all_annotations()) for view in org.views)}") | ||
| copy = org.deep_copy(copy_typesystem=True) | ||
|
|
||
| org_text = org.to_xmi(pretty_print=True) | ||
| copy_text = copy.to_xmi(pretty_print=True) | ||
|
|
||
| assert org != copy | ||
| assert len(org_text) == len(copy_text) | ||
| assert org_text == copy_text | ||
|
|
||
|
|
||
| def test_random_multi_feature_deep_copy(): | ||
| generator = MultiFeatureRandomCasGenerator() | ||
| for i in range(0, 10): | ||
| generator.size = (i + 1) * 10 | ||
| typesystem = generator.generate_type_system() | ||
| org = generator.generate_cas(typesystem) | ||
| print(f"CAS size: {sum(len(view.get_all_annotations()) for view in org.views)}") | ||
| copy = org.deep_copy(copy_typesystem=True) | ||
|
|
||
| org_text = org.to_xmi(pretty_print=True) | ||
| copy_text = copy.to_xmi(pretty_print=True) | ||
|
|
||
| assert org != copy | ||
| assert len(org_text) == len(copy_text) | ||
| assert org_text == copy_text | ||
|
|
||
|
Comment on lines
+546
to
+604
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On line 852, document_language is read from self.document_language, which accesses the document annotation. If the original CAS doesn't have a document annotation, this will create one, which might not be desired behavior during copying. Consider handling the case where document_language is None or catching exceptions when the document annotation doesn't exist.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see a problem here, given that document_language defaults to None on initiation of CAS if not supplied with an actual value.