Here I will show how to preprocess the time-series data from MD simulation using Sklearn’s train_test_split module
import pandas as pd
from pandas import read_csv
#Input the csv file. This can be the COLVAR file
df = pd.read_csv('angles_sincos', header=0, sep=',', delim_whitespace=True)
df
data = df.values
X, y = data[:, :-1], data[:, -1]
print(X.shape, y.shape)
(669019, 27) (669019,)
# Import Sklearn module
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
(448242, 27) (220777, 27) (448242,) (220777,)