User Tools

Site Tools


ged2wiki

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
ged2wiki [2007/11/24 17:03] jimsged2wiki [2013/01/06 15:48] (current) jims
Line 3: Line 3:
 ==== Ged2Wiki ==== ==== Ged2Wiki ====
  
-Ged2Wiki is a project to convert a GEDCOM into +Ged2Wiki is a project of my own creation to convert a GEDCOM into 
 [[http://wiki.splitbrain.org/wiki:dokuwiki|DokuWiki]] pages so that they can be  [[http://wiki.splitbrain.org/wiki:dokuwiki|DokuWiki]] pages so that they can be 
 added to this (or any other  added to this (or any other 
Line 13: Line 13:
 from the ground up was easier (for me). from the ground up was easier (for me).
  
-As of 24 Nov 2007, I have written only the parser.  This version reads a GEDCOM  +The program reads a GEDCOM file and stores all the relevant data in hashes.  It  
-file and stores all the relevant data in hashes.  It then spits out basic data  +then spits out a wiki page for each family (and any individuals who do not have  
-about each family to stdout.  Eventually, each family will be written to a +families of their own).
-separate file.+
  
-Here's what it looks like so far:+It makes an effort to mark living people as "Private", so their information will 
 +not show up.  However, be aware that this feature is not 100%!
  
-<code> +Finally, it creates an Index file for all non-Private names it finds.
-+
-# Ged2Wiki.pl - Perl program to convert a GEDCOM file to plain text Wiki pages +
-+
-# The input format is based on "The GEDCOM Standard" Release 5.5, as published +
-# by the Family History Department of The Church of Jesus Christ of Latter-day +
-# Saints, 2 January 1996. +
-+
-# The output format is a series of plain-text files based on DokuWiki +
-+
-# For questions, comments, bug reports, etc., please visit this site: +
-#    http://s560.com/ged2wiki +
-#+
  
-# The globals for the GEDCOM input lines +Modification History: 
-my $currline; +  * 01 Feb 2009 -- Initial upload (very buggy and mostly useless) 
-my $nextline; +  * 30 May 2010 -- Fixed most bugs, but still had major problems marking people as "Private" 
-my $currlevel; +  * 06 Jan 2013 -- Added Index feature and enhanced Privacy feature
-my $nextlevel;+
  
-The globals for the decoded data about individuals +The latest version of the program can be found here: {{:ged2wiki.zip|}}
-# (Index = Individual Xref tag) +
-my %name;         # Full Name +
-my %sex;          # 'M' or 'F' +
-my %birth;        # Birth Date and/or Place +
-my %death;        # Death Date and/or Place +
-my %burial;       # Burial Date and/or Place +
-my %familydown;   # Xref tag for Individual's Family (FAMS) +
-my %familyup;     # Xref tag for Parent's Family (FAMC) +
-my %inote;        # Note +
-my %private;      # Is data private? (If so, do not show details on output)+
  
-# most of the data above can have an associated note +An example of the output can be found here: [[owen:f189|John Thomas SEYMOUR]] (+ [[owen:index|Name Index]])
-my %nnote;        # Note for Name +
-my %bnote;        # Note for Birth +
-my %dnote;        # Note for Death +
-my %burnote;      # Note for Burial +
-my %fdnote;       # Note for Family (Up) +
-my %funote;       # Note for Family (Down) +
- +
-# The globals for the decoded data about families +
-# (Index = Family Xref tag) +
-my %familyflag;   # This entry will exist for ALL families +
-my %husband;      # Individual tag for husband +
-my %wife;         # Individual tag for wife +
-my %marriage;     # Marriage Date and/or Place +
-my %fnote;        # Note +
-my %children;     # List of individual tags for children +
-my %mnote;        # Note for Marriage event +
- +
-# The global for records other than Individuals and Families +
-my %note;         # Note +
- +
-# Prime the parser by reading the first line +
-fetch_line(); +
- +
-# "Read" the first line +
-read_line(); +
- +
-# The first line must be "0 HEAD" +
-if ($currline !~ /^HEAD/) { +
-   print "Input does not appear to be a GEDCOM file\n"; +
-   exit; +
-+
- +
-# First line is good.  Parse the rest of the header +
-parse_header(); +
- +
-# If it's a Submission record, parse it here +
-if ($currline =~ /^\@(.*?)\@\s+SUBN/) { +
-   parse_subn($1); +
-+
- +
-# Loop here, reading the "Record" entries until we find the Trailer +
-while (1) { +
- +
-   # First, check for the Trailer record +
-   if ($currline =~ /^TRLR/) { +
-      last; # We're done with the input - break out of this loop +
-   } +
- +
-   # Each Record must be in this format +
-   if ($currline !~ /^\@(.*?)\@\s+(\w+)\s*/) { +
-      print "Unexpected Record at line $.:\n>> $currline\n"; +
-      exit; +
-   } +
-   my $xref = $1; +
-   my $tag = $2; +
-   my $data = "$'"; +
- +
-   # If it's a "FAM" record, parse it here +
-   if ($tag eq "FAM") { +
-      parse_family($xref); +
-      next; +
-   } +
- +
-   # If it's a "INDI" record, parse it here +
-   if ($tag eq "INDI") { +
-      parse_individual($xref); +
-      next; +
-   } +
- +
-   # If it's a "NOTE" record, parse it here +
-   if ($tag eq "NOTE") { +
-      parse_note($xref, $data); +
-      next; +
-   } +
- +
-   # If it's an unknown record, it's an error +
-   if ($tag !~ /^(OBJE|REPO|SOUR|SUBM)$/) { +
-      print "Unexpected Tag at line $.:\n>> $currline\n"; +
-      exit; +
-   } +
- +
-   # Ignore this type of record +
-   parse_ignore(); +
-+
- +
-print "GEDCOM file parsed successfully\n"; +
- +
-# Generate all the output here ... +
-foreach my $fxref (keys %familyflag) { +
-   # Output data for family $fxref +
-   output_family($fxref); +
-+
-exit; +
- +
-# Read one line from the input file (and strip unwanted whitespace) +
-sub fetch_line { +
-   $nextline = <>; +
-   $nextline =~ s/^\s+//;        # Strip leading whitespace +
-   $nextline =~ s/[\s\r\n]*$//;  # Strip trailing whitespace (and newline) +
-   if ($nextline !~ /^(\d{1,2})\s+/) { +
-      print "Invalid GEDCOM data at line $.:\n>> $nextline\n"; +
-      exit; +
-   } +
-   $nextlevel = $1; +
-   $nextline = "$'"; +
-+
- +
-# One input line -> $currline (concatenating as needed) +
-sub read_line { +
- +
-   $currline = $nextline; +
-   $currlevel = $nextlevel; +
- +
-   # Special case: Do not read beyond the Trailer +
-   if ($currline =~ /^TRLR/) { +
-      $nextline = ""; +
-      $nextlevel = ""; +
-      return; +
-   } +
- +
-   fetch_line(); +
- +
-   # Special case: A tag without any parameters - Add a single space +
-   if ($currline =~ /^(\@\w+\@)?\s+\w+$/) { +
-      $currline .= " "; +
-   } +
- +
-   while ($nextlevel == $currlevel + 1 && $nextline =~ /^CON([CT])\s*/) { +
-      if ($1 eq "C") { +
-         $currline .= "$'"; +
-      } else { +
-         $currline .= "\n$'"; +
-      } +
-      fetch_line(); +
-   } +
-+
- +
-# Parse the header +
-sub parse_header { +
-   parse_ignore();   # For now, simply ignore it +
- +
-   # Format of Header (brackets denote optional entries) +
-   #   0 HEAD +
-   #   1 SOUR +
-   # [ 1 DEST ] +
-   # [ 1 DATE ] +
-   #   1 SUBM +
-   # [ 1 SUBN ] +
-   # [ 1 FILE ] +
-   # [ 1 COPR ] +
-   #   1 GEDC +
-   #   1 CHAR +
-   # [ 1 LANG ] +
-   # [ 1 PLAC ] +
-   # [ 1 NOTE ] +
- +
-+
- +
-# Parse the submission record +
-sub parse_subn { +
-   parse_ignore();   # For now, simply ignore it +
- +
-   # Format of Submission record (all sub-fields are optional) +
-   #   0 @XREF@ SUBN +
-   # [ 1 SUBM ] +
-   # [ 1 FAMF ] +
-   # [ 1 TEMP ] +
-   # [ 1 ANCE ] +
-   # [ 1 DESC ] +
-   # [ 1 ORDI ] +
-   # [ 1 RIN ] +
- +
-+
- +
-# Parse a Family record +
-sub parse_family { +
-   my $ftag = $_[0]; +
- +
-   # All families must have this marker entry +
-   $familyflag{$ftag} = 1; +
- +
-   # Format of Family record (brackets denote optional entries) +
-   #   0 @XREF@ FAM +
-   # { 1 ANUL|CENS|DIV|DIVF|ENGA|MARB|MARC|MARL|MARS|EVEN } +
-   # { 1 MARR }   ->    % +
-   # [ 1 HUSB   ->    % +
-   # [ 1 WIFE   ->    % +
-   # { 1 CHIL }   ->    % +
-   # [ 1 NCHI ] +
-   # { 1 SUBM } +
-   # [ 1 SLGS ] +
-   # { 1 SOUR } +
-   # { 1 OBJE } +
-   # { 1 NOTE }   ->    %fnote +
-   # { 1 REFN } +
-   # [ 1 RIN ] +
-   # [ 1 CHAN ] +
- +
-   # Read the first sub-line +
-   read_line(); +
- +
-   while ($currlevel > 0) { +
- +
-      my $work; +
- +
-      # Record this family's husband +
-      # (If there are multiple entries of this type, use only the first) +
-      if ($currline =~ /^HUSB\s+\@(.*?)\@/) { +
-         if ($husband{$ftag} eq "") { +
-            $husband{$ftag} = $1; +
-         } +
-      } +
- +
-      # Record this family's wife +
-      # (If there are multiple entries of this type, use only the first) +
-      if ($currline =~ /^WIFE\s+\@(.*?)\@/) { +
-         if ($wife{$ftag} eq "") { +
-            $wife{$ftag} = $1; +
-         } +
-      } +
- +
-      # Record this family's children +
-      # (The result is a comma-separated list of Individual tags) +
-      if ($currline =~ /^CHIL\s+\@(.*?)\@/) { +
-         if ($children{$ftag} eq "") { +
-            $children{$ftag} = $1; +
-         } else { +
-            $children{$ftag} .= ",$1"; +
-         } +
-      } +
- +
-      # Record this family's marriage event +
-      if ($currline =~ /^MARR/) { +
-         if ($marriage{$ftag} eq "") { +
-            $marriage{$ftag} = read_event(); +
-         } +
-         $work = read_note(); +
-         if ($work ne "") { +
-            $mnote{$ftag} .= $work; +
-         } +
-         next; +
-      } +
- +
-      # Record any notes associated with this family +
-      if ($currline =~ /^NOTE\s+/) { +
-         $fnote{$ftag} .= "$'"; +
-      } +
- +
-      # If we get here, we need to ignore the remaining lines of this sub-record +
-      parse_ignore(); +
- +
-   } +
- +
-+
- +
-# Parse an Individual record +
-sub parse_individual { +
-   my $itag = $_[0]; +
- +
-   # Format of Individual record (brackets denote optional entries) +
-   #   0 @XREF@ INDI +
-   # { 1 NAME }   ->    %name; +
-   # 1 SEX ]    ->    %sex; +
-   # { 1 BIRT }   ->    %birth; +
-   # { 1 DEAT }   ->    %death; +
-   # { 1 BURI }   ->    %burial; +
-   # { 1 CHR|CREM|ADOP|BAPM|BARM|BASM|BLES|CHRA|CONF|FCOM| +
-   #     ORDN|NATU|EMIG|IMMI|CENS|PROB|WILL|GRAD|RETI|EVEN } +
-   # { 1 CAST|DSCR|EDUC|IDNO|NATI|NCHI|NMR|OCCU|PROP|RELI|RESI|SSN|TITL } +
-   # { 1 BAPL|CONL|ENDL|SLGC } +
-   # { 1 FAMC }   ->    &familyup; +
-   # { 1 FAMS }   ->    %familydown; +
-   # { 1 SUBM } +
-   # { 1 ASSO } +
-   # { 1 ALIA } +
-   # { 1 ANCI } +
-   # { 1 DESI } +
-   # { 1 SOUR } +
-   # { 1 OBJE } +
-   # { 1 NOTE }   ->    %inote; +
-   # [ 1 RFN ] +
-   # [ 1 AFN ] +
-   # { 1 REFN } +
-   # [ 1 RIN ] +
-   # [ 1 CHAN ] +
- +
-   # Read the first sub-line +
-   read_line()+
- +
-   while ($currlevel > 0) { +
- +
-      my $work; +
- +
-      # Record this person's name +
-      # (If there are multiple entries of this type, use only the first) +
-      if ($currline =~ /^NAME\s+/) { +
-         if ($name{$itag} eq "") { +
-            $name{$itag} = "$'"; # Save the full name +
-         } +
-         $work = read_note(); +
-         if ($work ne "") { +
-            $nnote{$itag} = $work; +
-         } +
-         next; +
-      } +
- +
-      # Record this person's gender +
-      if ($currline =~ /^SEX\s+/) { +
-         if ($sex{$itag} eq "") { +
-            $sex{$itag} = uc substr("$'", 0, 1); +
-         } +
-      } +
- +
-      # Record this person's birth date/place +
-      if ($currline =~ /^BIRT/) { +
-         if ($birth{$itag} eq "") { +
-            $birth{$itag} = read_event(); +
-         } +
-         $work = read_note(); +
-         if ($work ne "") { +
-            $bnote{$itag} .= $work; +
-         } +
-         next; +
-      } +
- +
-      # Record this person's death date/place +
-      if ($currline =~ /^DEAT/) { +
-         if ($death{$itag} eq "") { +
-            $death{$itag} = read_event(); +
-         } +
-         $work = read_note(); +
-         if ($work ne "") { +
-            $dnote{$itag} .= $work; +
-         } +
-         next; +
-      } +
- +
-      # Record this person's burial date/place +
-      if ($currline =~ /^BURI/) { +
-         if ($burial{$itag} eq "") { +
-            $burial{$itag} = read_event(); +
-         } +
-         $work = read_note(); +
-         if ($work ne "") { +
-            $burnote{$itag} .= $work; +
-         } +
-         next; +
-      } +
- +
-      # Record the family in which this person is a child +
-      # (There may be multiple entries of this type) +
-      if ($currline =~ /^FAMC\s+\@(.*?)\@/) { +
-         if ($familyup{$itag} eq "") { +
-            $familyup{$itag} = $1; +
-         } else { +
-            $familyup{$itag} .= ",$1"; +
-         } +
-         $work = read_note(); +
-         if ($work ne "") { +
-            $funote{$itag} = $work; +
-         } +
-         next; +
-      } +
- +
-      # Record the family in which this person is a spouse +
-      # (There may be multiple entries of this type) +
-      if ($currline =~ /^FAMS\s+\@(.*?)\@/) { +
-         if ($familydown{$itag} eq "") { +
-            $familydown{$itag} = $1; +
-         } else { +
-            $familydown{$itag} .= ",$1"; +
-         } +
-         $work = read_note(); +
-         if ($work ne "") { +
-            $fdnote{$itag} = $work; +
-         } +
-         next; +
-      } +
- +
-      # Record any notes associated with this person +
-      if ($currline =~ /^NOTE\s+/) { +
-         $inote{$itag} .= "$'"; +
-      } +
- +
-      # If we get here, we need to ignore the remaining lines of this sub-record +
-      parse_ignore(); +
- +
-   } +
- +
-+
- +
-# Parse a Note record +
-sub parse_note { +
-   # Save the note's data +
-   $note{$_[0]} = $_[1]; +
- +
-   # Format of Note record +
-   #   0 @XREF@ NOTE +
-   # { 1 SOUR } +
-   # { 1 REFN } +
-   # [ 1 RIN ] +
-   # [ 1 CHAN ] +
- +
-   # And ignore all sub-records +
-   parse_ignore(); +
-+
- +
-# Read lines until we find a level the same or lower than the current line +
-# Examples: +
-# If the current line is "0 HEAD", it will read until it finds another "0" line. +
-# if the current line is "1 NAME", it will read until it finds a "1" or a "0" line. +
-# In either case, the "current" line will be the line AFTER the ignored block +
-sub parse_ignore { +
-   my $thislevel = $currlevel; +
-   while (1) { +
-      read_line(); +
-      if ($currlevel <= $thislevel) { +
-         last; +
-      } +
-   } +
-+
- +
-# Read and parse an "Event Detail" entry +
-sub read_event { +
-   my $thislevel = $currlevel; +
-   my $edate; +
-   my $eplace; +
-   while (1) { +
-      read_line(); +
-      if ($currlevel <= $thislevel) { +
-         last; +
-      } +
-      if ($currline =~ /^DATE\s+/) { +
-         $edate = "$'"; +
-      } +
-      if ($currline =~ /^PLAC\s+/) { +
-         $eplace = "$'"; +
-      } +
-   } +
-   if ($eplace ne "" && $edate ne "") { +
-      "$edate, $eplace"; +
-   } else { +
-      if ($eplace ne "") { +
-         $eplace; +
-      } else { +
-         $edate; +
-      } +
-   } +
-+
- +
-# Read the sub-records, looking for a NOTE entry +
-sub read_note { +
-   my $thislevel = $currlevel; +
-   my $thisnote; +
-   while (1) { +
-      read_line(); +
-      if ($currlevel <= $thislevel) { +
-         last; +
-      } +
-      if ($currline =~ /^NOTE\s+/) { +
-         $thisnote .= "$'"; +
-      } +
-   } +
-   $thisnote; +
-+
- +
-# Output a single family +
-sub output_family { +
-   my $ftag = $_[0]; +
- +
-   print "\nFamily $ftag\n"; +
-   print "Husband: $name{$husband{$ftag}}\n"; +
-   print "   Wife: $name{$wife{$ftag}}\n"; +
- +
-   my @children = split(/\,/, $children{$ftag}); +
-   foreach my $ctag (@children) { +
-      print "  Child: $name{$ctag}\n"; +
-   } +
-+
-</code>+
  
ged2wiki.1195945427.txt.gz · Last modified: 2007/11/24 17:03 by jims