lnicola
6/24/2014 - 9:42 AM

A data generator for regular expressions

A data generator for regular expressions

#include <functional>
#include <iostream>
#include <string>
#include <vector>

const int repetition_limit = 10;

template<typename F>
void do_times(size_t count, const F &f)
{
	while (count-- > 0)
		f();
}

std::function<void ()> compile_impl(const char *&p, const std::function<int ()> &rng)
{
	std::vector<std::vector<std::function<void ()>>> alternatives(1);

	for (; *p && *p != ')'; p++)
	{
		if (*p == '(')
			alternatives.back().push_back(compile_impl(++p, rng));
		else if (*p == '|')
			alternatives.emplace_back();
		else if (*p == '+' || *p == '*' || *p == '?')
		{
			auto min = *p == '+' ? 1 : 0;
			auto max = *p == '?' ? 2 : repetition_limit;
			auto &gen = alternatives.back().back();
			alternatives.back().back() = [=] { do_times(min + rng() % max, gen); };
		}
		else
		{
			auto ch = *p;
			alternatives.back().emplace_back([=] { std::cout << ch; });
		}
	}

	return [=] {
		for (auto &gen : alternatives[alternatives.size() == 1 ? 0 : rng() % alternatives.size()])
			gen();
	};
}

std::function<void ()> compile(const char *p, const std::function<int ()> &rng)
{
	return compile_impl(p, rng);
}

std::function<int ()> make_counter()
{
	auto counter = 0U;
	return [=]() mutable { return counter++; };
}

int main()
{
	std::string regex;
	std::getline(std::cin, regex);

	//const auto &gen = compile(regex.c_str(), rand); // if you prefer
	const auto &gen = compile(regex.c_str(), make_counter());

	for (auto i = 0; i < 10; i++)
	{
		gen();
		std::cout << std::endl;
	}
}