#!/usr/bin/python2 -B
# -*- coding: utf-8; mode: Python; indent-tabs-mode: t; tab-width: 4; python-indent: 4 -*-

# Copyright (C) 2012  Olga Yakovleva <yakovleva.o.v@gmail.com>

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys
import codecs
import sexpr

if __name__=="__main__":
	entries=sexpr.read(sys.argv[1])
	if entries[0]=="set!":
		entries=entries[2]
	with codecs.open(sys.argv[2],"wb","utf-8") as f_out:
		parts_of_speech=list()
		for entry in entries:
			parts_of_speech.append(entry[0])
			f_out.write(u"define {} {};\n".format(parts_of_speech[-1].title(),u" | ".join(sorted(set(entry[1:])))))
		f_out.write(u"\ndefine FunctionWord {};\n\n".format(u" | ".join(pos.title() for pos in parts_of_speech)))
		f_out.write("regex \n")
		for pos in parts_of_speech:
			f_out.write(u"{} -> {} || _ ,, \n".format(pos.title(),pos.lower()))
		f_out.write("\FunctionWord -> content || _ ;\n")
