wexlergroup · manishrana7 · May 27, 2025
diff --git a/analysis/crystal.py b/analysis/crystal.py
@@ -8,11 +8,13 @@
 import pandas as pd
 from ase.visualize import view
 from pymatgen.analysis.defects.generators import VacancyGenerator
-from pymatgen.analysis.local_env import CrystalNN, VoronoiNN
+from pymatgen.analysis.local_env import CrystalNN, VoronoiNN, CutOffDictNN
 from pymatgen.core import Species, Structure
 from pymatgen.io.ase import AseAtomsAdaptor
 from pymatgen.analysis.bond_valence import BVAnalyzer, calculate_bv_sum
 from pymatgen.io.vasp import Poscar
+from collections import Counter
+
 
 EB_DICT = {"filepath": "../data/features/Eb.csv", "column_name": "Eb", "comparison": "os"}
 VR_DICT = {"filepath": "../data/features/Vr.csv", "column_name": "Vr", "comparison": "n"}
@@ -90,11 +92,12 @@ def __init__(
             poscar_string: Optional[str] = None,
             pymatgen_structure: Optional[Structure] = None,
             nn_finder_type: str = 'crystalnn', # crystalnn or voronoinn
+            cut_off_dict: Optional[dict] = None,
             use_weights: Optional[bool] = False,
             species_symbol: Optional[str] = "O",
             n: Optional[int] = None,  # attempt to allow for pre-emptive indexing
             oxi_states_override = None,
-    ):
+            ):
 
         """
         Initializes the Crystal object.
@@ -113,8 +116,10 @@ def __init__(
         """
         if filepath:
             self.structure = Structure.from_file(filepath)
-            if filepath.endswith(".txt"):  # Assuming your POSCAR files have a .txt extension
-                self.structure.add_oxidation_state_by_guess()
+            ## -------> Commented by Manish
+            # if filepath.endswith(".txt"):  # Assuming your POSCAR files have a .txt extension
+            #     self.structure.add_oxidation_state_by_guess()
+            ## <------- I think the guessed oxidation states will be replaced, which make the guessing redundant here
         elif poscar_string:
             self.structure = Structure.from_str(poscar_string, fmt="poscar") #giving nan for binding energies, fix
         elif pymatgen_structure:
@@ -126,9 +131,10 @@ def __init__(
 
         if nn_finder_type.lower() == 'voronoinn':
             self.nn_finder = VoronoiNN()
+        elif nn_finder_type.lower == 'cutoffdictnn':
+            self.nn_finder = CutOffDictNN(cut_off_dict=cut_off_dict)
         else: # default to crystalnn
-            self.nn_finder = CrystalNN()
-
+            self.nn_finder = CrystalNN()   
 
 
         self.use_weights = use_weights
@@ -163,12 +169,11 @@ def guess_oxidation_states(self, oxi_states_override=None):
                 self.structure.add_oxidation_state_by_guess()
 
             if not all(element.oxi_state != 0 for element in self.structure.species):
-                self.structure = self.structure.add_charges_from_oxi_state_guesses(
-                oxi_states_override=oxi_states_override,
-                target_charge=0,
-                all_oxi_states=False,
-                max_sites=None
-            )
+                self.structure = self.structure.add_charges_from_oxi_state_guesses(oxi_states_override=oxi_states_override,
+                                                                                   target_charge=0,
+                                                                                   all_oxi_states=False,
+                                                                                   max_sites=None
+                                                                                   )
             # potential specification to decide which method to use
             if not all(element.oxi_state != 0 for element in self.structure.species): # alternative method 2
                 raise ValueError("Oxidation states could not be guessed for all elements in the structure.")

diff --git a/analysis/test_CCTM/CCTM_species.py b/analysis/test_CCTM/CCTM_species.py
@@ -0,0 +1,81 @@
+from pymatgen.io.cif import CifParser, CifWriter
+from pymatgen.core.structure import Structure
+from pathlib import Path
+import sys
+sys.path.append(str(Path(__file__).parent.parent.parent))
+from analysis.crystal import Crystal
+import numpy as np
+import pandas as pd
+import math
+import os
+from datetime import datetime
+
+# date_str = datetime.now().strftime("%y%m%d")    # 20241223
+# date_str = datetime.now().strftime("%y%b%d")    # 2024Dec23
+date_str = datetime.now().strftime("%y-%b-%d")    # 2024-Dec-23
+
+M = 0
+N = 18
+
+st = Structure.from_file(f"/Users/manish/PycharmProjects/Proj_CCTM4STCH/2_DFT/1_data/"
+                         f"M{M}_N{N}/2_vacancy/0_bulk_structure/CONTCAR_bulk_M{M}_N{N}")
+
+test_cif = f"./junk_{date_str}.cif"
+
+
+def get_unique_values_from_dicts(dict_list, key):
+    seen_pairs = set()
+    values = []
+    for d in dict_list:
+        if key in d:
+            pair = (key, d[key])
+
+            if pair not in seen_pairs:
+                seen_pairs.add(pair)
+                values.append(d[key])
+    return values
+
+def get_species_data():
+    print()
+    unique_values = {}
+    Mn_OS = [2, 3, 4]
+    Ce_OS = [3, 4]
+    species_data = []
+    save_file = f"species_data_{date_str}.csv"
+
+    if os.path.exists(save_file):
+        df = pd.read_csv(save_file)
+        dff = df.drop_duplicates()
+        print(dff)
+        return dff
+
+    oxi_state_base = {"Ca": 2, "Ti": 4, "O": -2}
+    species_template = {"Ca": "Ca{}+", "Ce": "Ce{}+", "Ti": "Ti{}+", "Mn": "Mn{}+"}
+
+    for ceos, mnos in [(ce, mn) for ce in Ce_OS for mn in Mn_OS]:
+        print(f"Ce{ceos}, Mn{mnos}")
+        oxi_state_override = {**oxi_state_base, "Ce": ceos, "Mn": mnos}
+        # print(oxi_state_override)
+        st.add_oxidation_state_by_element(oxi_state_override)
+        crystal = Crystal(pymatgen_structure=st, nn_finder_type="voronoinn")
+        eb = crystal.bond_dissociation_enthalpies
+        vr = crystal.reduction_potentials
+
+        for elem in ["Ca", "Ce", "Ti", "Mn"]:
+            specie = species_template[elem].format(oxi_state_override[elem])
+            try:
+                eb_value = next(val for val in get_unique_values_from_dicts(eb, specie))
+                vr_value = next(val for val in get_unique_values_from_dicts(vr, specie))
+                species_data.append([specie, eb_value, vr_value])
+                print(specie)
+            except (StopIteration, IndexError):
+                print(f"Warning: Data not found for {specie}")
+
+    df = pd.DataFrame(species_data, columns=["species", "eb", "vr"]).sort_values(by="species").drop_duplicates()
+    print(df)
+    # df.to_csv(save_file, index=False)
+    return df
+
+get_species_data()
+# print(df)
+# get_all()
diff --git a/analysis/test_CCTM/features_sumEb_and_maxVr.csv b/analysis/test_CCTM/features_sumEb_and_maxVr.csv
@@ -0,0 +1,16 @@
+nCa,nCe,nTi,nMn,env,sigma_eb,max_vr,max_vr_ion
+4,0,2,0,0-0,13.958139994832159,-1.9122077365342869,Ti4+
+4,0,1,1,0-1,12.90561958532208,-0.433744681701679,Mn4+
+4,0,0,2,0-2,11.853099175812002,-0.433744681701679,Mn4+
+3,1,2,0,1-0,15.045808865468798,-1.9122077365342869,Ti4+
+3,1,1,1,1-1,13.99328845595872,-0.433744681701679,Mn4+
+3,1,0,2,1-2,12.940768046448643,-0.433744681701679,Mn4+
+2,2,2,0,2-0,16.133477736105437,-1.9122077365342869,Ti4+
+2,2,1,1,2-1,15.080957326595358,-0.433744681701679,Mn4+
+2,2,0,2,2-2,14.02843691708528,-0.433744681701679,Mn4+
+1,3,2,0,3-0,17.221146606742074,-1.9122077365342869,Ti4+
+1,3,1,1,3-1,16.168626197231998,-0.433744681701679,Mn4+
+1,3,0,2,3-2,15.116105787721917,-0.433744681701679,Mn4+
+0,4,2,0,4-0,18.308815477378715,-1.9122077365342869,Ti4+
+0,4,1,1,4-1,17.256295067868635,-0.433744681701679,Mn4+
+0,4,0,2,4-2,16.203774658358558,-0.433744681701679,Mn4+
diff --git a/analysis/test_CCTM/script_CCTM.py b/analysis/test_CCTM/script_CCTM.py
@@ -0,0 +1,165 @@
+from pymatgen.io.cif import CifParser, CifWriter
+from pymatgen.core.structure import Structure
+from pathlib import Path
+import sys
+sys.path.append(str(Path(__file__).parent.parent.parent))
+from analysis.crystal import Crystal
+import numpy as np
+import pandas as pd
+import math
+import os
+from datetime import datetime
+
+# date_str = datetime.now().strftime("%y%m%d")    # 20241222
+# date_str = datetime.now().strftime("%y%b%d")    # 2024Dec22
+date_str = datetime.now().strftime("%y-%b-%d")    # 2024-Dec-22
+
+M = 0
+N = 18
+
+st = Structure.from_file(f"/Users/manish/PycharmProjects/Proj_CCTM4STCH/2_DFT/1_data/"
+                         f"M{M}_N{N}/2_vacancy/0_bulk_structure/CONTCAR_bulk_M{M}_N{N}")
+
+cif_junk = f"./test_{date_str}.cif"
+def get_all():
+    oxi_state_override = {"Ca": 2, "Ce": 3, "Ti": 4, "Mn": 4, "O": -2,}
+    st.add_oxidation_state_by_element(oxi_state_override)
+
+    cif_writer = CifWriter(st)
+    cif_writer.write_file(cif_junk)
+    # cif_file_path = Path(__file__).parent / f"../manish/{cif_junk}"
+    cif_file_path = Path(__file__).parent / f"{cif_junk}"
+    print(cif_file_path)
+    cif_file_path = cif_file_path.resolve()
+    if not cif_file_path.is_file():
+        raise FileNotFoundError(f"The CIF file does not exist: {cif_file_path}")
+
+    structure = CifParser(cif_file_path).parse_structures(primitive=True)[0]
+
+    crystal = Crystal(pymatgen_structure=structure, nn_finder_type="voronoinn")
+
+    cn = crystal.cn_dicts
+    for cdict in cn:
+        cdict.pop("O2-", None)
+
+    eb = crystal.bond_dissociation_enthalpies
+    for edict in eb:
+        edict.pop("O2-", None)
+
+    vr = crystal.reduction_potentials
+    for vdict in vr:
+        vdict.pop("O2-", None)
+
+
+    Ce_ox = oxi_state_override["Ce"]
+    Ce_specie = f"Ce{Ce_ox}+"
+    Mn_ox = oxi_state_override["Mn"]
+    Mn_specie = f"Mn{Mn_ox}+"
+
+    print("Type of eb:", type(eb))
+
+
+    print("Type of vr:", type(vr), len(vr))
+    # print("Contents of vr:", vr)
+
+    features = []
+    for index, dict in enumerate(cn):
+        nCa = dict.get("Ca2+", 4 - dict.get(Ce_specie, 0))
+        nCe = dict.get(Ce_specie, 4 - dict.get("Ca2+", 0))
+        nTi = dict.get("Ti4+", 2 - dict.get(Mn_specie, 0))
+        nMn = dict.get(Mn_specie, 2 - dict.get("Ti4+", 0))
+
+        env = f"{nCe}-{nMn}"
+
+        eb_d = eb[index]
+        vr_d = vr[index]
+        sigma_eb = (nCa * eb_d.get("Ca2+", 0) + nCe * eb_d.get(Ce_specie, 0) +
+                    nTi * eb_d.get("Ti4+", 0) + nMn * eb_d.get(Mn_specie, 0))
+
+        max_vr = max(vr_d.values())
+        max_vr_species = max(vr_d, key=vr_d.get)
+
+        features.append([nCa, nCe, nTi, nMn, env, sigma_eb, max_vr, max_vr_species])
+
+    dfff = pd.DataFrame(features, columns=["nCa", "nCe", "nTi", "nMn", "env", "sigma_eb", "max_vr", "max_vr_ion"])
+    ddff = dfff.drop_duplicates().sort_values(by="env", ascending=True)
+    ddff.to_csv(f"features_sumEb_and_maxVr.csv", index=False)
+
+    print(ddff)
+
+
+def get_unique_values_from_dicts(dict_list, key):
+    seen_pairs = set()
+    values = []
+    for d in dict_list:
+        if key in d:
+            pair = (key, d[key])
+
+            if pair not in seen_pairs:
+                seen_pairs.add(pair)
+                values.append(d[key])
+    return values
+
+def get_species_data():
+    print()
+    unique_values = {}
+    Mn_OS = [2, 3, 4]
+    Ce_OS = [3, 4]
+    species_data = []
+    # save_file = "species_data.csv"
+    # cif_junk = "./junk.cif"
+
+    # test on Dec 23, 2024
+    save_file = "species_data_2024Dec23.csv"
+    # cif_junk = "./junk_2025May20.cif"
+    if not os.path.exists(save_file):
+        for ceos, mnos in zip(Ce_OS, Mn_OS):
+            print(f"Ce{ceos}, Mn{mnos}")
+            oxi_state_override = {"Ca": 2, "Ce": ceos, "Ti": 4, "Mn": mnos, "O": -2, }
+            st.add_oxidation_state_by_element(oxi_state_override)
+
+            cif_writer = CifWriter(st)
+            cif_writer.write_file(cif_junk)
+            # cif_file_path = Path(__file__).parent / f"../manish/{cif_junk}"
+            cif_file_path = Path(__file__).parent / f"{cif_junk}"
+            cif_file_path = cif_file_path.resolve()
+            if not cif_file_path.is_file():
+                raise FileNotFoundError(f"The CIF file does not exist: {cif_file_path}")
+
+            structure = CifParser(cif_file_path).parse_structures(primitive=True)[0]
+
+            crystal = Crystal(pymatgen_structure=structure, nn_finder_type="voronoinn")
+
+            eb = crystal.bond_dissociation_enthalpies
+            vr = crystal.reduction_potentials
+
+            Ca_ox = oxi_state_override["Ca"]
+            Ca_specie = f"Ca{Ca_ox}+"
+            Ce_ox = oxi_state_override["Ce"]
+            Ce_specie = f"Ce{Ce_ox}+"
+            Ti_ox = oxi_state_override["Ti"]
+            Ti_specie = f"Ti{Ti_ox}+"
+            Mn_ox = oxi_state_override["Mn"]
+            Mn_specie = f"Mn{Mn_ox}+"
+
+            species_list = [Ca_specie, Ce_specie, Ti_specie, Mn_specie]
+            for species in species_list:
+                eb_value = get_unique_values_from_dicts(eb, species)[0]
+                vr_value = get_unique_values_from_dicts(vr, species)[0]
+                species_data.append([species, eb_value, vr_value])
+
+        print(species_data)
+        df = pd.DataFrame(species_data, columns=["species", "eb", "vr"])
+        dff = df.drop_duplicates()
+        print(dff)
+        # dff.to_csv(save_file, index=False)
+
+    else:
+        df = pd.read_csv(save_file)
+        dff = df.drop_duplicates()
+        print(dff)
+        # dff.to_csv(save_file, index=False)
+
+    return dff
+
+df = get_species_data()