|
import numpy as np |
|
import torch |
|
from ase.units import Bohr, Hartree |
|
from torch_geometric.data import Data, InMemoryDataset |
|
from tqdm import trange |
|
|
|
|
|
class Chignolin(InMemoryDataset): |
|
|
|
self_energies = { |
|
1: -0.496665677271, |
|
6: -37.8289474402, |
|
7: -54.5677547104, |
|
8: -75.0321126521, |
|
16: -398.063946327, |
|
} |
|
|
|
def __init__(self, root, transform=None, pre_transform=None): |
|
|
|
super(Chignolin, self).__init__(root, transform, pre_transform) |
|
|
|
self.data, self.slices = torch.load(self.processed_paths[0]) |
|
|
|
@property |
|
def raw_file_names(self): |
|
return [f'chignolin.npz'] |
|
|
|
@property |
|
def processed_file_names(self): |
|
return [f'chignolin.pt'] |
|
|
|
def process(self): |
|
for path, processed_path in zip(self.raw_paths, self.processed_paths): |
|
|
|
data_npz = np.load(path) |
|
concat_z = torch.from_numpy(data_npz["Z"]).long() |
|
concat_positions = torch.from_numpy(data_npz["R"]).float() |
|
energies = torch.from_numpy(data_npz["E"]).float() |
|
concat_forces = torch.from_numpy(data_npz["F"]).float() * Hartree / Bohr |
|
num_atoms = 166 |
|
|
|
samples = [] |
|
for index in trange(energies.shape[0]): |
|
z = concat_z[index * num_atoms:(index + 1) * num_atoms] |
|
ref_energy = torch.sum(torch.tensor([self.self_energies[int(atom)] for atom in z])) |
|
pos = concat_positions[index * num_atoms:(index + 1) * num_atoms, :] |
|
y = (energies[index] - ref_energy) * Hartree |
|
|
|
dy = -concat_forces[index * num_atoms:(index + 1) * num_atoms, :] |
|
data = Data(z=z, pos=pos, y=y.reshape(1, 1), dy=dy) |
|
|
|
if self.pre_filter is not None: |
|
data = self.pre_filter(data) |
|
|
|
if self.pre_transform is not None: |
|
data = self.pre_transform(data) |
|
|
|
samples.append(data) |
|
|
|
data, slices = self.collate(samples) |
|
torch.save((data, slices), processed_path) |