ged2wiki
This is an old revision of the document!
Ged2Wiki
Ged2Wiki is a project to convert a GEDCOM into DokuWiki pages so that they can be added to this (or any other DokuWiki-based) site.
This program is written in the Perl programming language – and while I had initially thought that the GEDCOM module from CPAN would be a good starting point, I later decided that starting from the ground up was easier (for me).
As of 24 Nov 2007, I have written only the parser. This version reads a GEDCOM file and stores all the relevant data in hashes. It then spits out basic data about each family to stdout. Eventually, each family will be written to a separate file.
Here's what it looks like so far:
#
# Ged2Wiki.pl - Perl program to convert a GEDCOM file to plain text Wiki pages
#
# The input format is based on "The GEDCOM Standard" Release 5.5, as published
# by the Family History Department of The Church of Jesus Christ of Latter-day
# Saints, 2 January 1996.
#
# The output format is a series of plain-text files based on DokuWiki
#
# For questions, comments, bug reports, etc., please visit this site:
# http://s560.com/ged2wiki
#
# The globals for the GEDCOM input lines
my $currline;
my $nextline;
my $currlevel;
my $nextlevel;
# The globals for the decoded data about individuals
# (Index = Individual Xref tag)
my %name; # Full Name
my %sex; # 'M' or 'F'
my %birth; # Birth Date and/or Place
my %death; # Death Date and/or Place
my %burial; # Burial Date and/or Place
my %familydown; # Xref tag for Individual's Family (FAMS)
my %familyup; # Xref tag for Parent's Family (FAMC)
my %inote; # Note
my %private; # Is data private? (If so, do not show details on output)
# most of the data above can have an associated note
my %nnote; # Note for Name
my %bnote; # Note for Birth
my %dnote; # Note for Death
my %burnote; # Note for Burial
my %fdnote; # Note for Family (Up)
my %funote; # Note for Family (Down)
# The globals for the decoded data about families
# (Index = Family Xref tag)
my %familyflag; # This entry will exist for ALL families
my %husband; # Individual tag for husband
my %wife; # Individual tag for wife
my %marriage; # Marriage Date and/or Place
my %fnote; # Note
my %children; # List of individual tags for children
my %mnote; # Note for Marriage event
# The global for records other than Individuals and Families
my %note; # Note
# Prime the parser by reading the first line
fetch_line();
# "Read" the first line
read_line();
# The first line must be "0 HEAD"
if ($currline !~ /^HEAD/) {
print "Input does not appear to be a GEDCOM file\n";
exit;
}
# First line is good. Parse the rest of the header
parse_header();
# If it's a Submission record, parse it here
if ($currline =~ /^\@(.*?)\@\s+SUBN/) {
parse_subn($1);
}
# Loop here, reading the "Record" entries until we find the Trailer
while (1) {
# First, check for the Trailer record
if ($currline =~ /^TRLR/) {
last; # We're done with the input - break out of this loop
}
# Each Record must be in this format
if ($currline !~ /^\@(.*?)\@\s+(\w+)\s*/) {
print "Unexpected Record at line $.:\n>> $currline\n";
exit;
}
my $xref = $1;
my $tag = $2;
my $data = "$'";
# If it's a "FAM" record, parse it here
if ($tag eq "FAM") {
parse_family($xref);
next;
}
# If it's a "INDI" record, parse it here
if ($tag eq "INDI") {
parse_individual($xref);
next;
}
# If it's a "NOTE" record, parse it here
if ($tag eq "NOTE") {
parse_note($xref, $data);
next;
}
# If it's an unknown record, it's an error
if ($tag !~ /^(OBJE|REPO|SOUR|SUBM)$/) {
print "Unexpected Tag at line $.:\n>> $currline\n";
exit;
}
# Ignore this type of record
parse_ignore();
}
print "GEDCOM file parsed successfully\n";
# Generate all the output here ...
foreach my $fxref (keys %familyflag) {
# Output data for family $fxref
output_family($fxref);
}
exit;
# Read one line from the input file (and strip unwanted whitespace)
sub fetch_line {
$nextline = <>;
$nextline =~ s/^\s+//; # Strip leading whitespace
$nextline =~ s/[\s\r\n]*$//; # Strip trailing whitespace (and newline)
if ($nextline !~ /^(\d{1,2})\s+/) {
print "Invalid GEDCOM data at line $.:\n>> $nextline\n";
exit;
}
$nextlevel = $1;
$nextline = "$'";
}
# One input line -> $currline (concatenating as needed)
sub read_line {
$currline = $nextline;
$currlevel = $nextlevel;
# Special case: Do not read beyond the Trailer
if ($currline =~ /^TRLR/) {
$nextline = "";
$nextlevel = "";
return;
}
fetch_line();
# Special case: A tag without any parameters - Add a single space
if ($currline =~ /^(\@\w+\@)?\s+\w+$/) {
$currline .= " ";
}
while ($nextlevel == $currlevel + 1 && $nextline =~ /^CON([CT])\s*/) {
if ($1 eq "C") {
$currline .= "$'";
} else {
$currline .= "\n$'";
}
fetch_line();
}
}
# Parse the header
sub parse_header {
parse_ignore(); # For now, simply ignore it
# Format of Header (brackets denote optional entries)
# 0 HEAD
# 1 SOUR
# [ 1 DEST ]
# [ 1 DATE ]
# 1 SUBM
# [ 1 SUBN ]
# [ 1 FILE ]
# [ 1 COPR ]
# 1 GEDC
# 1 CHAR
# [ 1 LANG ]
# [ 1 PLAC ]
# [ 1 NOTE ]
}
# Parse the submission record
sub parse_subn {
parse_ignore(); # For now, simply ignore it
# Format of Submission record (all sub-fields are optional)
# 0 @XREF@ SUBN
# [ 1 SUBM ]
# [ 1 FAMF ]
# [ 1 TEMP ]
# [ 1 ANCE ]
# [ 1 DESC ]
# [ 1 ORDI ]
# [ 1 RIN ]
}
# Parse a Family record
sub parse_family {
my $ftag = $_[0];
# All families must have this marker entry
$familyflag{$ftag} = 1;
# Format of Family record (brackets denote optional entries)
# 0 @XREF@ FAM
# { 1 ANUL|CENS|DIV|DIVF|ENGA|MARB|MARC|MARL|MARS|EVEN }
# { 1 MARR } -> %
# [ 1 HUSB ] -> %
# [ 1 WIFE ] -> %
# { 1 CHIL } -> %
# [ 1 NCHI ]
# { 1 SUBM }
# [ 1 SLGS ]
# { 1 SOUR }
# { 1 OBJE }
# { 1 NOTE } -> %fnote
# { 1 REFN }
# [ 1 RIN ]
# [ 1 CHAN ]
# Read the first sub-line
read_line();
while ($currlevel > 0) {
my $work;
# Record this family's husband
# (If there are multiple entries of this type, use only the first)
if ($currline =~ /^HUSB\s+\@(.*?)\@/) {
if ($husband{$ftag} eq "") {
$husband{$ftag} = $1;
}
}
# Record this family's wife
# (If there are multiple entries of this type, use only the first)
if ($currline =~ /^WIFE\s+\@(.*?)\@/) {
if ($wife{$ftag} eq "") {
$wife{$ftag} = $1;
}
}
# Record this family's children
# (The result is a comma-separated list of Individual tags)
if ($currline =~ /^CHIL\s+\@(.*?)\@/) {
if ($children{$ftag} eq "") {
$children{$ftag} = $1;
} else {
$children{$ftag} .= ",$1";
}
}
# Record this family's marriage event
if ($currline =~ /^MARR/) {
if ($marriage{$ftag} eq "") {
$marriage{$ftag} = read_event();
}
$work = read_note();
if ($work ne "") {
$mnote{$ftag} .= $work;
}
next;
}
# Record any notes associated with this family
if ($currline =~ /^NOTE\s+/) {
$fnote{$ftag} .= "$'";
}
# If we get here, we need to ignore the remaining lines of this sub-record
parse_ignore();
}
}
# Parse an Individual record
sub parse_individual {
my $itag = $_[0];
# Format of Individual record (brackets denote optional entries)
# 0 @XREF@ INDI
# { 1 NAME } -> %name;
# [ 1 SEX ] -> %sex;
# { 1 BIRT } -> %birth;
# { 1 DEAT } -> %death;
# { 1 BURI } -> %burial;
# { 1 CHR|CREM|ADOP|BAPM|BARM|BASM|BLES|CHRA|CONF|FCOM|
# ORDN|NATU|EMIG|IMMI|CENS|PROB|WILL|GRAD|RETI|EVEN }
# { 1 CAST|DSCR|EDUC|IDNO|NATI|NCHI|NMR|OCCU|PROP|RELI|RESI|SSN|TITL }
# { 1 BAPL|CONL|ENDL|SLGC }
# { 1 FAMC } -> &familyup;
# { 1 FAMS } -> %familydown;
# { 1 SUBM }
# { 1 ASSO }
# { 1 ALIA }
# { 1 ANCI }
# { 1 DESI }
# { 1 SOUR }
# { 1 OBJE }
# { 1 NOTE } -> %inote;
# [ 1 RFN ]
# [ 1 AFN ]
# { 1 REFN }
# [ 1 RIN ]
# [ 1 CHAN ]
# Read the first sub-line
read_line();
while ($currlevel > 0) {
my $work;
# Record this person's name
# (If there are multiple entries of this type, use only the first)
if ($currline =~ /^NAME\s+/) {
if ($name{$itag} eq "") {
$name{$itag} = "$'"; # Save the full name
}
$work = read_note();
if ($work ne "") {
$nnote{$itag} = $work;
}
next;
}
# Record this person's gender
if ($currline =~ /^SEX\s+/) {
if ($sex{$itag} eq "") {
$sex{$itag} = uc substr("$'", 0, 1);
}
}
# Record this person's birth date/place
if ($currline =~ /^BIRT/) {
if ($birth{$itag} eq "") {
$birth{$itag} = read_event();
}
$work = read_note();
if ($work ne "") {
$bnote{$itag} .= $work;
}
next;
}
# Record this person's death date/place
if ($currline =~ /^DEAT/) {
if ($death{$itag} eq "") {
$death{$itag} = read_event();
}
$work = read_note();
if ($work ne "") {
$dnote{$itag} .= $work;
}
next;
}
# Record this person's burial date/place
if ($currline =~ /^BURI/) {
if ($burial{$itag} eq "") {
$burial{$itag} = read_event();
}
$work = read_note();
if ($work ne "") {
$burnote{$itag} .= $work;
}
next;
}
# Record the family in which this person is a child
# (There may be multiple entries of this type)
if ($currline =~ /^FAMC\s+\@(.*?)\@/) {
if ($familyup{$itag} eq "") {
$familyup{$itag} = $1;
} else {
$familyup{$itag} .= ",$1";
}
$work = read_note();
if ($work ne "") {
$funote{$itag} = $work;
}
next;
}
# Record the family in which this person is a spouse
# (There may be multiple entries of this type)
if ($currline =~ /^FAMS\s+\@(.*?)\@/) {
if ($familydown{$itag} eq "") {
$familydown{$itag} = $1;
} else {
$familydown{$itag} .= ",$1";
}
$work = read_note();
if ($work ne "") {
$fdnote{$itag} = $work;
}
next;
}
# Record any notes associated with this person
if ($currline =~ /^NOTE\s+/) {
$inote{$itag} .= "$'";
}
# If we get here, we need to ignore the remaining lines of this sub-record
parse_ignore();
}
}
# Parse a Note record
sub parse_note {
# Save the note's data
$note{$_[0]} = $_[1];
# Format of Note record
# 0 @XREF@ NOTE
# { 1 SOUR }
# { 1 REFN }
# [ 1 RIN ]
# [ 1 CHAN ]
# And ignore all sub-records
parse_ignore();
}
# Read lines until we find a level the same or lower than the current line
# Examples:
# If the current line is "0 HEAD", it will read until it finds another "0" line.
# if the current line is "1 NAME", it will read until it finds a "1" or a "0" line.
# In either case, the "current" line will be the line AFTER the ignored block
sub parse_ignore {
my $thislevel = $currlevel;
while (1) {
read_line();
if ($currlevel <= $thislevel) {
last;
}
}
}
# Read and parse an "Event Detail" entry
sub read_event {
my $thislevel = $currlevel;
my $edate;
my $eplace;
while (1) {
read_line();
if ($currlevel <= $thislevel) {
last;
}
if ($currline =~ /^DATE\s+/) {
$edate = "$'";
}
if ($currline =~ /^PLAC\s+/) {
$eplace = "$'";
}
}
if ($eplace ne "" && $edate ne "") {
"$edate, $eplace";
} else {
if ($eplace ne "") {
$eplace;
} else {
$edate;
}
}
}
# Read the sub-records, looking for a NOTE entry
sub read_note {
my $thislevel = $currlevel;
my $thisnote;
while (1) {
read_line();
if ($currlevel <= $thislevel) {
last;
}
if ($currline =~ /^NOTE\s+/) {
$thisnote .= "$'";
}
}
$thisnote;
}
# Output a single family
sub output_family {
my $ftag = $_[0];
print "\nFamily $ftag\n";
print "Husband: $name{$husband{$ftag}}\n";
print " Wife: $name{$wife{$ftag}}\n";
my @children = split(/\,/, $children{$ftag});
foreach my $ctag (@children) {
print " Child: $name{$ctag}\n";
}
}
ged2wiki.1195945427.txt.gz · Last modified: 2007/11/24 17:03 by jims
