wkentaro
11/14/2015 - 7:04 AM

split train and test for apc-od

split train and test for apc-od

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import shutil
import os.path as osp

from sklearn.cross_validation import train_test_split
from sklearn.datasets import load_files


dataset = load_files('dataset', load_content=False, shuffle=False)
files = dataset.filenames
y = dataset.target
file_train, file_test, y_train, y_test = train_test_split(files, y, test_size=0.2)

for file_, label in zip(file_train, y_train):
    to_file = osp.join('train', dataset.target_names[label], osp.basename(file_))
    print file_, '->', to_file
    shutil.move(file_, to_file)

for file_, label in zip(file_test, y_test):
    to_file = osp.join('test', dataset.target_names[label], osp.basename(file_))
    print file_, '->', to_file
    shutil.move(file_, to_file)