split train and test for apc-od
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import shutil
import os.path as osp
from sklearn.cross_validation import train_test_split
from sklearn.datasets import load_files
dataset = load_files('dataset', load_content=False, shuffle=False)
files = dataset.filenames
y = dataset.target
file_train, file_test, y_train, y_test = train_test_split(files, y, test_size=0.2)
for file_, label in zip(file_train, y_train):
to_file = osp.join('train', dataset.target_names[label], osp.basename(file_))
print file_, '->', to_file
shutil.move(file_, to_file)
for file_, label in zip(file_test, y_test):
to_file = osp.join('test', dataset.target_names[label], osp.basename(file_))
print file_, '->', to_file
shutil.move(file_, to_file)