ged2wiki
This is an old revision of the document!
Ged2Wiki
Ged2Wiki is a project to convert a GEDCOM into DokuWiki pages so that they can be added to this (or any other DokuWiki-based) site.
This program is written in the Perl programming language – and while I had initially thought that the GEDCOM module from CPAN would be a good starting point, I later decided that starting from the ground up was easier (for me).
As of 24 Nov 2007, I have written only the parser. This version reads a GEDCOM file and stores all the relevant data in hashes. It then spits out basic data about each family to stdout. Eventually, each family will be written to a separate file.
Here's what it looks like so far:
# # Ged2Wiki.pl - Perl program to convert a GEDCOM file to plain text Wiki pages # # The input format is based on "The GEDCOM Standard" Release 5.5, as published # by the Family History Department of The Church of Jesus Christ of Latter-day # Saints, 2 January 1996. # # The output format is a series of plain-text files based on DokuWiki # # For questions, comments, bug reports, etc., please visit this site: # http://s560.com/ged2wiki # # The globals for the GEDCOM input lines my $currline; my $nextline; my $currlevel; my $nextlevel; # The globals for the decoded data about individuals # (Index = Individual Xref tag) my %name; # Full Name my %sex; # 'M' or 'F' my %birth; # Birth Date and/or Place my %death; # Death Date and/or Place my %burial; # Burial Date and/or Place my %familydown; # Xref tag for Individual's Family (FAMS) my %familyup; # Xref tag for Parent's Family (FAMC) my %inote; # Note my %private; # Is data private? (If so, do not show details on output) # most of the data above can have an associated note my %nnote; # Note for Name my %bnote; # Note for Birth my %dnote; # Note for Death my %burnote; # Note for Burial my %fdnote; # Note for Family (Up) my %funote; # Note for Family (Down) # The globals for the decoded data about families # (Index = Family Xref tag) my %familyflag; # This entry will exist for ALL families my %husband; # Individual tag for husband my %wife; # Individual tag for wife my %marriage; # Marriage Date and/or Place my %fnote; # Note my %children; # List of individual tags for children my %mnote; # Note for Marriage event # The global for records other than Individuals and Families my %note; # Note # Prime the parser by reading the first line fetch_line(); # "Read" the first line read_line(); # The first line must be "0 HEAD" if ($currline !~ /^HEAD/) { print "Input does not appear to be a GEDCOM file\n"; exit; } # First line is good. Parse the rest of the header parse_header(); # If it's a Submission record, parse it here if ($currline =~ /^\@(.*?)\@\s+SUBN/) { parse_subn($1); } # Loop here, reading the "Record" entries until we find the Trailer while (1) { # First, check for the Trailer record if ($currline =~ /^TRLR/) { last; # We're done with the input - break out of this loop } # Each Record must be in this format if ($currline !~ /^\@(.*?)\@\s+(\w+)\s*/) { print "Unexpected Record at line $.:\n>> $currline\n"; exit; } my $xref = $1; my $tag = $2; my $data = "$'"; # If it's a "FAM" record, parse it here if ($tag eq "FAM") { parse_family($xref); next; } # If it's a "INDI" record, parse it here if ($tag eq "INDI") { parse_individual($xref); next; } # If it's a "NOTE" record, parse it here if ($tag eq "NOTE") { parse_note($xref, $data); next; } # If it's an unknown record, it's an error if ($tag !~ /^(OBJE|REPO|SOUR|SUBM)$/) { print "Unexpected Tag at line $.:\n>> $currline\n"; exit; } # Ignore this type of record parse_ignore(); } print "GEDCOM file parsed successfully\n"; # Generate all the output here ... foreach my $fxref (keys %familyflag) { # Output data for family $fxref output_family($fxref); } exit; # Read one line from the input file (and strip unwanted whitespace) sub fetch_line { $nextline = <>; $nextline =~ s/^\s+//; # Strip leading whitespace $nextline =~ s/[\s\r\n]*$//; # Strip trailing whitespace (and newline) if ($nextline !~ /^(\d{1,2})\s+/) { print "Invalid GEDCOM data at line $.:\n>> $nextline\n"; exit; } $nextlevel = $1; $nextline = "$'"; } # One input line -> $currline (concatenating as needed) sub read_line { $currline = $nextline; $currlevel = $nextlevel; # Special case: Do not read beyond the Trailer if ($currline =~ /^TRLR/) { $nextline = ""; $nextlevel = ""; return; } fetch_line(); # Special case: A tag without any parameters - Add a single space if ($currline =~ /^(\@\w+\@)?\s+\w+$/) { $currline .= " "; } while ($nextlevel == $currlevel + 1 && $nextline =~ /^CON([CT])\s*/) { if ($1 eq "C") { $currline .= "$'"; } else { $currline .= "\n$'"; } fetch_line(); } } # Parse the header sub parse_header { parse_ignore(); # For now, simply ignore it # Format of Header (brackets denote optional entries) # 0 HEAD # 1 SOUR # [ 1 DEST ] # [ 1 DATE ] # 1 SUBM # [ 1 SUBN ] # [ 1 FILE ] # [ 1 COPR ] # 1 GEDC # 1 CHAR # [ 1 LANG ] # [ 1 PLAC ] # [ 1 NOTE ] } # Parse the submission record sub parse_subn { parse_ignore(); # For now, simply ignore it # Format of Submission record (all sub-fields are optional) # 0 @XREF@ SUBN # [ 1 SUBM ] # [ 1 FAMF ] # [ 1 TEMP ] # [ 1 ANCE ] # [ 1 DESC ] # [ 1 ORDI ] # [ 1 RIN ] } # Parse a Family record sub parse_family { my $ftag = $_[0]; # All families must have this marker entry $familyflag{$ftag} = 1; # Format of Family record (brackets denote optional entries) # 0 @XREF@ FAM # { 1 ANUL|CENS|DIV|DIVF|ENGA|MARB|MARC|MARL|MARS|EVEN } # { 1 MARR } -> % # [ 1 HUSB ] -> % # [ 1 WIFE ] -> % # { 1 CHIL } -> % # [ 1 NCHI ] # { 1 SUBM } # [ 1 SLGS ] # { 1 SOUR } # { 1 OBJE } # { 1 NOTE } -> %fnote # { 1 REFN } # [ 1 RIN ] # [ 1 CHAN ] # Read the first sub-line read_line(); while ($currlevel > 0) { my $work; # Record this family's husband # (If there are multiple entries of this type, use only the first) if ($currline =~ /^HUSB\s+\@(.*?)\@/) { if ($husband{$ftag} eq "") { $husband{$ftag} = $1; } } # Record this family's wife # (If there are multiple entries of this type, use only the first) if ($currline =~ /^WIFE\s+\@(.*?)\@/) { if ($wife{$ftag} eq "") { $wife{$ftag} = $1; } } # Record this family's children # (The result is a comma-separated list of Individual tags) if ($currline =~ /^CHIL\s+\@(.*?)\@/) { if ($children{$ftag} eq "") { $children{$ftag} = $1; } else { $children{$ftag} .= ",$1"; } } # Record this family's marriage event if ($currline =~ /^MARR/) { if ($marriage{$ftag} eq "") { $marriage{$ftag} = read_event(); } $work = read_note(); if ($work ne "") { $mnote{$ftag} .= $work; } next; } # Record any notes associated with this family if ($currline =~ /^NOTE\s+/) { $fnote{$ftag} .= "$'"; } # If we get here, we need to ignore the remaining lines of this sub-record parse_ignore(); } } # Parse an Individual record sub parse_individual { my $itag = $_[0]; # Format of Individual record (brackets denote optional entries) # 0 @XREF@ INDI # { 1 NAME } -> %name; # [ 1 SEX ] -> %sex; # { 1 BIRT } -> %birth; # { 1 DEAT } -> %death; # { 1 BURI } -> %burial; # { 1 CHR|CREM|ADOP|BAPM|BARM|BASM|BLES|CHRA|CONF|FCOM| # ORDN|NATU|EMIG|IMMI|CENS|PROB|WILL|GRAD|RETI|EVEN } # { 1 CAST|DSCR|EDUC|IDNO|NATI|NCHI|NMR|OCCU|PROP|RELI|RESI|SSN|TITL } # { 1 BAPL|CONL|ENDL|SLGC } # { 1 FAMC } -> &familyup; # { 1 FAMS } -> %familydown; # { 1 SUBM } # { 1 ASSO } # { 1 ALIA } # { 1 ANCI } # { 1 DESI } # { 1 SOUR } # { 1 OBJE } # { 1 NOTE } -> %inote; # [ 1 RFN ] # [ 1 AFN ] # { 1 REFN } # [ 1 RIN ] # [ 1 CHAN ] # Read the first sub-line read_line(); while ($currlevel > 0) { my $work; # Record this person's name # (If there are multiple entries of this type, use only the first) if ($currline =~ /^NAME\s+/) { if ($name{$itag} eq "") { $name{$itag} = "$'"; # Save the full name } $work = read_note(); if ($work ne "") { $nnote{$itag} = $work; } next; } # Record this person's gender if ($currline =~ /^SEX\s+/) { if ($sex{$itag} eq "") { $sex{$itag} = uc substr("$'", 0, 1); } } # Record this person's birth date/place if ($currline =~ /^BIRT/) { if ($birth{$itag} eq "") { $birth{$itag} = read_event(); } $work = read_note(); if ($work ne "") { $bnote{$itag} .= $work; } next; } # Record this person's death date/place if ($currline =~ /^DEAT/) { if ($death{$itag} eq "") { $death{$itag} = read_event(); } $work = read_note(); if ($work ne "") { $dnote{$itag} .= $work; } next; } # Record this person's burial date/place if ($currline =~ /^BURI/) { if ($burial{$itag} eq "") { $burial{$itag} = read_event(); } $work = read_note(); if ($work ne "") { $burnote{$itag} .= $work; } next; } # Record the family in which this person is a child # (There may be multiple entries of this type) if ($currline =~ /^FAMC\s+\@(.*?)\@/) { if ($familyup{$itag} eq "") { $familyup{$itag} = $1; } else { $familyup{$itag} .= ",$1"; } $work = read_note(); if ($work ne "") { $funote{$itag} = $work; } next; } # Record the family in which this person is a spouse # (There may be multiple entries of this type) if ($currline =~ /^FAMS\s+\@(.*?)\@/) { if ($familydown{$itag} eq "") { $familydown{$itag} = $1; } else { $familydown{$itag} .= ",$1"; } $work = read_note(); if ($work ne "") { $fdnote{$itag} = $work; } next; } # Record any notes associated with this person if ($currline =~ /^NOTE\s+/) { $inote{$itag} .= "$'"; } # If we get here, we need to ignore the remaining lines of this sub-record parse_ignore(); } } # Parse a Note record sub parse_note { # Save the note's data $note{$_[0]} = $_[1]; # Format of Note record # 0 @XREF@ NOTE # { 1 SOUR } # { 1 REFN } # [ 1 RIN ] # [ 1 CHAN ] # And ignore all sub-records parse_ignore(); } # Read lines until we find a level the same or lower than the current line # Examples: # If the current line is "0 HEAD", it will read until it finds another "0" line. # if the current line is "1 NAME", it will read until it finds a "1" or a "0" line. # In either case, the "current" line will be the line AFTER the ignored block sub parse_ignore { my $thislevel = $currlevel; while (1) { read_line(); if ($currlevel <= $thislevel) { last; } } } # Read and parse an "Event Detail" entry sub read_event { my $thislevel = $currlevel; my $edate; my $eplace; while (1) { read_line(); if ($currlevel <= $thislevel) { last; } if ($currline =~ /^DATE\s+/) { $edate = "$'"; } if ($currline =~ /^PLAC\s+/) { $eplace = "$'"; } } if ($eplace ne "" && $edate ne "") { "$edate, $eplace"; } else { if ($eplace ne "") { $eplace; } else { $edate; } } } # Read the sub-records, looking for a NOTE entry sub read_note { my $thislevel = $currlevel; my $thisnote; while (1) { read_line(); if ($currlevel <= $thislevel) { last; } if ($currline =~ /^NOTE\s+/) { $thisnote .= "$'"; } } $thisnote; } # Output a single family sub output_family { my $ftag = $_[0]; print "\nFamily $ftag\n"; print "Husband: $name{$husband{$ftag}}\n"; print " Wife: $name{$wife{$ftag}}\n"; my @children = split(/\,/, $children{$ftag}); foreach my $ctag (@children) { print " Child: $name{$ctag}\n"; } }
ged2wiki.1195945427.txt.gz · Last modified: 2007/11/24 17:03 by jims