ged2wiki
This is an old revision of the document!
Ged2Wiki
Ged2Wiki is a project to convert a GEDCOM into DokuWiki pages so that they can be added to this (or any other DokuWiki-based) site.
It is my intention to write this program in the Perl programming language. And while I had initially thought that the GEDCOM module from CPAN would be a good starting point, I've since come to the decision that starting from the ground up will be easier (for me).
As of 18 Nov 2007, I have written only the skeleton parser. This version will read a GEDCOM file and report the “XREF” labels for each Individual and Family found. However, as of now, it does nothing with the data.
Here's what it looks like so far:
#
# Ged2Wiki.pl - Perl program to convert a GEDCOM file to plain text Wiki pages
#
# The input format is based on "The GEDCOM Standard" Release 5.5, as published
# by the Family History Department of The Church of Jesus Christ of Latter-day
# Saints, 2 January 1996.
#
# The output format is a series of plain-text files based on DokuWiki
#
# For questions, comments, bug reports, etc., please visit this site:
# http://s560.com/ged2wiki
#
my $currline;
my $nextline;
my $currlevel;
my $nextlevel;
# Prime the parser by reading the first line
fetch_line();
# "Read" the first line
read_line();
# The first line must be "0 HEAD"
if ($currline !~ /^HEAD/) {
print "Input does not appear to be a GEDCOM file\n";
exit;
}
# First line is good. Parse the rest of the header
parse_header();
# If it's a Submission record, parse it here
if ($currline =~ /^\@(.*?)\@\s+SUBN/) {
parse_subn($1);
}
# Loop here, reading the "Record" entries until we find the Trailer
while (1) {
# First, check for the Trailer record
if ($currline =~ /^TRLR/) {
last; # We're done with the input - break out of this loop
}
# Each Record must be in this format
if ($currline !~ /^\@(.*?)\@\s+(\w+)/) {
print "Unexpected Record at line $.:\n>> $currline\n";
exit;
}
my $xref = $1;
my $tag = $2;
# If it's a "FAM" record, parse it here
if ($tag eq "FAM") {
parse_family($xref);
next;
}
# If it's a "INDI" record, parse it here
if ($tag eq "INDI") {
parse_individual($xref);
next;
}
# If it's a "NOTE" record, parse it here
if ($tag eq "NOTE") {
parse_note($xref);
next;
}
# If it's an unknown record, it's an error
if ($tag !~ /^(OBJE|REPO|SOUR|SUBM)$/) {
print "Unexpected Record at line $.:\n>> $currline\n";
exit;
}
# Ignore this type of record
parse_ignore();
}
print "GEDCOM file parsed successfully\n";
# Generate all the output here ...
exit;
# Read one line from the input file (and strip unwanted whitespace)
sub fetch_line {
$nextline = <>;
$nextline =~ s/^\s+//; # Strip leading whitespace
$nextline =~ s/\s+[\r\n]*$//; # Strip trailing whitespace (and newline)
if ($nextline !~ /^(\d+)\s+/) {
print "Invalid GEDCOM data at line $.:\n>> $nextline\n";
exit;
}
$nextlevel = $1;
$nextline = "$'";
}
# One input line -> $currline (concatenating as needed)
sub read_line {
$currline = $nextline;
$currlevel = $nextlevel;
# Special case: Do not read beyond the Trailer
if ($currline =~ /^TRLR/) {
$nextline = "";
$nextlevel = "";
return;
}
fetch_line();
while ($nextlevel == $currlevel + 1 && $nextline =~ /^CON([CT])\s+/) {
if ($1 eq "C") {
$currline .= "$'";
} else {
$currline .= "\n$'";
}
fetch_line();
}
}
# Parse the header
sub parse_header {
print ">> Header\n";
parse_ignore(); # For now, simply ignore it
}
# Parse the submission record
sub parse_subn {
print ">> Submission\n";
parse_ignore(); # For now, simply ignore it
}
# Parse a Family record
sub parse_family {
print ">> Family $_[0]\n";
parse_ignore(); # For now, simply ignore it
}
# Parse an Individual record
sub parse_individual {
print ">> Individual $_[0]\n";
parse_ignore(); # For now, simply ignore it
}
# Parse a Note record
sub parse_note {
print ">> Note\n";
parse_ignore(); # For now, simply ignore it
}
# Ignore this record
sub parse_ignore {
my $thislevel = $currlevel;
while (1) {
read_line();
if ($currlevel <= $thislevel) {
last;
}
}
}
ged2wiki.1195421179.txt.gz · Last modified: 2007/11/18 15:26 by jims
