LL(1) Regular Expression Parser in PerlWhile I am not particularly proud of this LL(1) hack, nor am I completely sure it works 100%, I still want to see how this blog handles code posts. This is available in my cvs repository under the module "regexll". I'll post a LR(1) parser after Thursday. The parser table is hard coded, but I did not go above and beyond in the assignement this time. If I did, I would have computed FIRST and FOLLOW positions from the give left-recursion-free RE grammar, then would have built the parser table from FIRST and FOLLOW. Note: this doesn't handle Perl regex syntactical sugar, just the traditional notation. It also doesn't do anything but parse, but that could be easily changed...enjoy :).
#!/usr/bin/env perl
# RegEx LL(1) Parser
$^W++;
use strict;
use Data::Dumper;
#
# Parse table is hard coded (easy way out, but this could be
# created dynamically - including FIRST and FOLLOW - pretty
# easily given a grammar)
#
my %LL_TABLE = (
R => {id=>'O','('=>'O'},
O => {id=>'C O_prime','('=>'C O_prime'},
O_prime => {'|'=>'| C O_prime',')'=>'| C O_prime','$'=>'@'},
C => {id=>'S C_prime','('=>'S C_prime'},
C_prime => {'|'=>'. S C_prime','.'=>'. S C_prime',')'=>'. S C_prime','$'=>'@'},
S => {id=>'L S_prime','('=>'L S_prime'},
S_prime => {'*'=>'* S_prime',')'=>'* S_prime','$'=>'@'},
L => {id=>'id','('=>'( R )', '$'=>'@'},
);
# Loop over test data after __DATA__
while () {
print "################\n$_################\n";
parse($_);
print "\n";
}
# print out parse table
print "\n##LL(1) Parse Table##\n";
foreach my $A (keys(%LL_TABLE)) {
print "$A\n";
foreach my $alpha (keys(%{$LL_TABLE{$A}})) {
print " $alpha --> $LL_TABLE{$A}{$alpha}\n";
}
}
# Main parser subroutine
sub parse {
my $re = shift;
chomp($re);
my @STRING = insert_cats(split(//,$re));
my @STACK = ('$','R');
while (@STRING) {
my $target = shift(@STRING);
my $done = 0;
my $top = '';
while (!$done && defined($top)) {
print_stack(@STACK);
$top = pop(@STACK);
if ($top eq '$') {
$done++;
} elsif ($top eq 'id' || $top eq '.' || $top eq '*') {
# print "Match!\n";
$done++;
} else { # assuming production @ this point - R, O, etc
my $replace = get_production($top,$target);
# print "[$top,$target]\n";
if (defined($replace)) {
push(@STACK,reverse(split(' ',$replace)));
}
}
}
}
}
# Prings parser stack contents
sub print_stack {
my @stack = @_;
foreach (@stack) {
print "$_ ";
}
print "\n";
}
# Inserts cat symbol in appropriate places
sub insert_cats {
my @string = @_;
my @new = ();
my $prev = undef;
my $curr = undef;
foreach (@string) {
$curr = $_;
if (defined($prev)) {
if ((is_terminal($curr) && is_terminal($prev))) {
push(@new,'.',$curr);
} elsif ($prev eq '*' && ($curr ne '*' && $curr ne '|')) {
push(@new,'.',$curr);
} else {
push(@new,$curr);
}
} else {
push(@new,$curr);
}
$prev = $curr;
}
# print Dumper(@new);
# exit;
return @new;
}
# Queries %LL_PARSER table for production rule to use give the
# current non-terminal and target symbol
# A ->
|
TopicsRecent blog posts
|