#!/usr/local/bin/perl

# Author: TSUCHIYA Masatoshi <tsuchiya@namazu.org>
# Keywords: dictionary

# GENE95Ѵ Perl ץ

# GENE95ϡKurumi  Nifty-Serve ѲåեǸ
# ¼ǤΡפ SDIC ΥڡǤޤ
#
#     http://pine.kuee.kyoto-u.ac.jp/member/tsuchiya/elisp/gene.html
#
# COMPATѴϡ
#
#     nkf -S -e gene.txt | perl gene.perl --compat >gene.dic
#
# SDICѴϡ
#
#     nkf -S -e gene.txt | perl gene.perl >gene.sdic
#
# ȻꤷƲ줾ηξܺ٤ˤĤƤ sdic.texi 򻲾ȡ
#
# ʤCOMPATμSDICѴϡ
#
#    perl gene.perl --compat-to-sdic gene.dic >gene.sdic
#
# ȤƲ
#
# SDICμCOMPATѴϡ
#
#    perl gene.perl --sdic-to-compat gene.sdic >gene.dic
#
# ȤƲSDICΤۤ˭ʤΤǡѴԤȰ
# ŪˤϾ󤬷ޤΤǡդƻȤäƲ


eval { binmode(STDOUT); };

if(( $ARGV[0] eq '--compat' )){
    shift;
    &compat();
} elsif(( $ARGV[0] eq '--compat-to-sdic' )){
    shift;
    &compat_to_sdic();
} elsif(( $ARGV[0] eq '--sdic-to-compat' )){
    shift;
    &sdic_to_compat();
} else {
    &sdic();
}

# SDICμĤؿ
sub sdic {
    $_ = <>;				# 2ɤФ
    s/\s*$/\n/;				# ԥɤѴ [sdic:00428]
    print "# ",$_;
    $_ = <>;
    s/\s*$/\n/;
    print "# ",$_;
    for( $i=0; <>; $i++ ){
	s/\s+$//;			# ζʸ
	s/&/&amp;/g;			# ᥿饯ִ
	s/</&lt;/g;
	s/>/&gt;/g;
	if( $i%2==0 ){
	    $key = $_;
	}else{
	    $head = $key;
	    $key  =~ tr/A-Z/a-z/;
	    $key  =~ s/\s+/ /;
	    $key  =~ s/ \+\d+//;
	    if( $key eq $head ){
		push( @line, "$key\x00$head\x00<<K>$key</K>$_\n" );
	    } else {
		push( @line, "$key\x00$head\x00<<H>$head</H><K>$key</K>$_\n" );
	    }
	}
    }
    for( sort @line ){
	@f = split(/</,$_,2);
	print $f[1];
    }
}

# COMPATμؿ
sub compat {
    <>;					# 2ɤФ
    <>;
    for( $i=0; <>; $i++ ){
	s/\s+$//;
	s/\t/        /g;
	if( $i%2==0 ){
	    $word = $_;
	}else{
	    $orig = $word;
	    $word =~ tr/A-Z/a-z/;
	    push( @line, "$word\x00$orig\x00\t$orig\t$_\n" );
	}
    }
    for( sort @line ){
	@f = split(/\t/,$_,3);
	print "$f[1]\t$f[2]";
    }
}

# COMPATμSDICѴؿ
sub compat_to_sdic {
    while( <> ){
	s/\s+$//;			# ζʸ
	s/&/&amp;/g;			# ᥿饯ִ
	s/</&lt;/g;
	s/>/&gt;/g;
	@f = split( /\t/,$_,2 );
	$k = $f[0];
	$k =~ tr/A-Z/a-z/;
	$k =~ s/\s+/ /;
	$k =~ s/ \+\d+//;
	if( $k eq $f[0] ){
	    push( @line, "$k\x00$f[0]\x00<<K>$k</K>$f[1]\n" );
	} else {
	    push( @line, "$k\x00$f[0]\x00<<H>$f[0]</H><K>$k</K>$f[1]\n" );
	}
    }
    for( sort @line ){
	@f = split(/</,$_,2);
	print $f[1];
    }
}

# SDICμCOMPATѴؿ
#     SDICΤۤ¿ᡢCOMPATˤȡɤƤ
#     η뤿ᡢդѤƤ
sub sdic_to_compat {
    while( <> ){
	next unless /^</;
	s/\s+$//;			# ζʸ
	s!^<([KH])>(.*?)</\1>!!;	# ФФ
	$head = $2;
	$head =~ s/&lt;/</g;		# ФΥ᥿饯ִ
	$head =~ s/&gt;/>/g;
	$head =~ s/&amp;/&/g;
	while( s!^<K>(.*)</K>!! ){ ; }
	s/&lt;/</g;			# ʸΥ᥿饯ִ
	s/&gt;/>/g;
	s/&amp;/&/g;
	s/\t/        /g;
	$key = $head;
	$key  =~ tr/A-Z/a-z/;
	push( @line, "$key\x00$head\x00\t$head\t$_\n" );
    }
    for( sort @line ){
	@f = split(/\t/,$_,3);
	print "$f[1]\t$f[2]";
    }
}
