ged2wiki
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
ged2wiki [2007/11/24 17:03] – jims | ged2wiki [2013/01/06 15:48] (current) – jims | ||
---|---|---|---|
Line 3: | Line 3: | ||
==== Ged2Wiki ==== | ==== Ged2Wiki ==== | ||
- | Ged2Wiki is a project to convert a GEDCOM into | + | Ged2Wiki is a project |
[[http:// | [[http:// | ||
added to this (or any other | added to this (or any other | ||
Line 13: | Line 13: | ||
from the ground up was easier (for me). | from the ground up was easier (for me). | ||
- | As of 24 Nov 2007, I have written only the parser. | + | The program |
- | file and stores all the relevant data in hashes. | + | then spits out a wiki page for each family |
- | about each family | + | families of their own). |
- | separate file. | + | |
- | Here's what it looks like so far: | + | It makes an effort to mark living people as " |
+ | not show up. However, be aware that this feature is not 100%! | ||
- | < | + | Finally, it creates an Index file for all non-Private names it finds. |
- | # | + | |
- | # Ged2Wiki.pl - Perl program to convert a GEDCOM | + | |
- | # | + | |
- | # The input format is based on "The GEDCOM Standard" | + | |
- | # by the Family History Department of The Church of Jesus Christ of Latter-day | + | |
- | # Saints, 2 January 1996. | + | |
- | # | + | |
- | # The output format is a series of plain-text files based on DokuWiki | + | |
- | # | + | |
- | # For questions, comments, bug reports, etc., please visit this site: | + | |
- | # http://s560.com/ | + | |
- | # | + | |
- | # The globals for the GEDCOM input lines | + | Modification History: |
- | my $currline; | + | * 01 Feb 2009 -- Initial upload (very buggy and mostly useless) |
- | my $nextline; | + | * 30 May 2010 -- Fixed most bugs, but still had major problems marking people as " |
- | my $currlevel; | + | * 06 Jan 2013 -- Added Index feature and enhanced Privacy feature |
- | my $nextlevel; | + | |
- | # The globals for the decoded data about individuals | + | The latest version of the program can be found here: {{: |
- | # (Index = Individual Xref tag) | + | |
- | my %name; | + | |
- | my %sex; # ' | + | |
- | my %birth; | + | |
- | my %death; | + | |
- | my %burial; | + | |
- | my %familydown; | + | |
- | my %familyup; | + | |
- | my %inote; | + | |
- | my %private; | + | |
- | # most of the data above can have an associated note | + | An example |
- | my %nnote; | + | |
- | my %bnote; | + | |
- | my %dnote; | + | |
- | my %burnote; | + | |
- | my %fdnote; | + | |
- | my %funote; | + | |
- | + | ||
- | # The globals for the decoded data about families | + | |
- | # (Index = Family Xref tag) | + | |
- | my %familyflag; | + | |
- | my %husband; | + | |
- | my %wife; | + | |
- | my %marriage; | + | |
- | my %fnote; | + | |
- | my %children; | + | |
- | my %mnote; | + | |
- | + | ||
- | # The global for records other than Individuals and Families | + | |
- | my %note; | + | |
- | + | ||
- | # Prime the parser by reading the first line | + | |
- | fetch_line(); | + | |
- | + | ||
- | # " | + | |
- | read_line(); | + | |
- | + | ||
- | # The first line must be "0 HEAD" | + | |
- | if ($currline !~ /^HEAD/) { | + | |
- | print "Input does not appear to be a GEDCOM file\n"; | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # First line is good. Parse the rest of the header | + | |
- | parse_header(); | + | |
- | + | ||
- | # If it's a Submission record, parse it here | + | |
- | if ($currline =~ / | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # Loop here, reading the " | + | |
- | while (1) { | + | |
- | + | ||
- | # First, check for the Trailer record | + | |
- | if ($currline =~ /^TRLR/) { | + | |
- | last; # We're done with the input - break out of this loop | + | |
- | } | + | |
- | + | ||
- | # Each Record must be in this format | + | |
- | if ($currline !~ / | + | |
- | print " | + | |
- | exit; | + | |
- | } | + | |
- | my $xref = $1; | + | |
- | my $tag = $2; | + | |
- | my $data = " | + | |
- | + | ||
- | # If it's a " | + | |
- | if ($tag eq " | + | |
- | parse_family($xref); | + | |
- | next; | + | |
- | } | + | |
- | + | ||
- | # If it's a " | + | |
- | if ($tag eq " | + | |
- | parse_individual($xref); | + | |
- | next; | + | |
- | } | + | |
- | + | ||
- | # If it's a " | + | |
- | if ($tag eq " | + | |
- | parse_note($xref, | + | |
- | next; | + | |
- | } | + | |
- | + | ||
- | # If it's an unknown record, it's an error | + | |
- | if ($tag !~ / | + | |
- | print " | + | |
- | exit; | + | |
- | } | + | |
- | + | ||
- | # Ignore this type of record | + | |
- | | + | |
- | } | + | |
- | + | ||
- | print " | + | |
- | + | ||
- | # Generate all the output here ... | + | |
- | foreach my $fxref (keys %familyflag) { | + | |
- | # Output data for family $fxref | + | |
- | | + | |
- | } | + | |
- | exit; | + | |
- | + | ||
- | # Read one line from the input file (and strip unwanted whitespace) | + | |
- | sub fetch_line { | + | |
- | | + | |
- | | + | |
- | | + | |
- | if ($nextline !~ / | + | |
- | print " | + | |
- | exit; | + | |
- | } | + | |
- | | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # One input line -> $currline (concatenating as needed) | + | |
- | sub read_line { | + | |
- | + | ||
- | | + | |
- | | + | |
- | + | ||
- | # Special case: Do not read beyond the Trailer | + | |
- | if ($currline =~ /^TRLR/) { | + | |
- | $nextline = ""; | + | |
- | $nextlevel = ""; | + | |
- | return; | + | |
- | } | + | |
- | + | ||
- | | + | |
- | + | ||
- | # Special case: A tag without any parameters - Add a single space | + | |
- | if ($currline =~ / | + | |
- | $currline .= " "; | + | |
- | } | + | |
- | + | ||
- | while ($nextlevel == $currlevel + 1 && $nextline =~ / | + | |
- | if ($1 eq " | + | |
- | | + | |
- | } else { | + | |
- | | + | |
- | } | + | |
- | fetch_line(); | + | |
- | } | + | |
- | } | + | |
- | + | ||
- | # Parse the header | + | |
- | sub parse_header { | + | |
- | | + | |
- | + | ||
- | # Format of Header (brackets denote optional entries) | + | |
- | # | + | |
- | # | + | |
- | # [ 1 DEST ] | + | |
- | # [ 1 DATE ] | + | |
- | # | + | |
- | # [ 1 SUBN ] | + | |
- | # [ 1 FILE ] | + | |
- | # [ 1 COPR ] | + | |
- | # | + | |
- | # | + | |
- | # [ 1 LANG ] | + | |
- | # [ 1 PLAC ] | + | |
- | # [ 1 NOTE ] | + | |
- | + | ||
- | } | + | |
- | + | ||
- | # Parse the submission record | + | |
- | sub parse_subn { | + | |
- | | + | |
- | + | ||
- | # Format of Submission record (all sub-fields are optional) | + | |
- | # | + | |
- | # [ 1 SUBM ] | + | |
- | # [ 1 FAMF ] | + | |
- | # [ 1 TEMP ] | + | |
- | # [ 1 ANCE ] | + | |
- | # [ 1 DESC ] | + | |
- | # [ 1 ORDI ] | + | |
- | # [ 1 RIN ] | + | |
- | + | ||
- | } | + | |
- | + | ||
- | # Parse a Family record | + | |
- | sub parse_family { | + | |
- | my $ftag = $_[0]; | + | |
- | + | ||
- | # All families must have this marker entry | + | |
- | | + | |
- | + | ||
- | # Format of Family record (brackets denote optional entries) | + | |
- | # | + | |
- | # { 1 ANUL|CENS|DIV|DIVF|ENGA|MARB|MARC|MARL|MARS|EVEN } | + | |
- | # { 1 MARR } | + | |
- | # [ 1 HUSB ] -> % | + | |
- | # [ 1 WIFE ] -> % | + | |
- | # { 1 CHIL } | + | |
- | # [ 1 NCHI ] | + | |
- | # { 1 SUBM } | + | |
- | # [ 1 SLGS ] | + | |
- | # { 1 SOUR } | + | |
- | # { 1 OBJE } | + | |
- | # { 1 NOTE } | + | |
- | # { 1 REFN } | + | |
- | # [ 1 RIN ] | + | |
- | # [ 1 CHAN ] | + | |
- | + | ||
- | # Read the first sub-line | + | |
- | | + | |
- | + | ||
- | while ($currlevel > 0) { | + | |
- | + | ||
- | my $work; | + | |
- | + | ||
- | # Record this family' | + | |
- | # (If there are multiple entries of this type, use only the first) | + | |
- | if ($currline =~ /^HUSB\s+\@(.*?)\@/) { | + | |
- | if ($husband{$ftag} eq "" | + | |
- | $husband{$ftag} = $1; | + | |
- | } | + | |
- | } | + | |
- | + | ||
- | # Record this family' | + | |
- | # (If there are multiple entries of this type, use only the first) | + | |
- | if ($currline =~ / | + | |
- | if ($wife{$ftag} eq "" | + | |
- | $wife{$ftag} = $1; | + | |
- | } | + | |
- | } | + | |
- | + | ||
- | # Record this family' | + | |
- | # (The result is a comma-separated list of Individual tags) | + | |
- | if ($currline =~ / | + | |
- | if ($children{$ftag} eq "" | + | |
- | $children{$ftag} = $1; | + | |
- | } else { | + | |
- | $children{$ftag} .= ", | + | |
- | } | + | |
- | } | + | |
- | + | ||
- | # Record this family' | + | |
- | if ($currline =~ /^MARR/) { | + | |
- | if ($marriage{$ftag} eq "" | + | |
- | $marriage{$ftag} = read_event(); | + | |
- | } | + | |
- | $work = read_note(); | + | |
- | if ($work ne "" | + | |
- | $mnote{$ftag} .= $work; | + | |
- | } | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # Record any notes associated with this family | + | |
- | if ($currline =~ /^NOTE\s+/) { | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # If we get here, we need to ignore the remaining lines of this sub-record | + | |
- | parse_ignore(); | + | |
- | + | ||
- | } | + | |
- | + | ||
- | } | + | |
- | + | ||
- | # Parse an Individual record | + | |
- | sub parse_individual { | + | |
- | my $itag = $_[0]; | + | |
- | + | ||
- | # Format of Individual record (brackets denote optional entries) | + | |
- | # | + | |
- | # { 1 NAME } | + | |
- | # | + | |
- | # { 1 BIRT } | + | |
- | # { 1 DEAT } | + | |
- | # { 1 BURI } | + | |
- | # { 1 CHR|CREM|ADOP|BAPM|BARM|BASM|BLES|CHRA|CONF|FCOM| | + | |
- | # | + | |
- | # { 1 CAST|DSCR|EDUC|IDNO|NATI|NCHI|NMR|OCCU|PROP|RELI|RESI|SSN|TITL } | + | |
- | # { 1 BAPL|CONL|ENDL|SLGC } | + | |
- | # { 1 FAMC } | + | |
- | # { 1 FAMS } | + | |
- | # { 1 SUBM } | + | |
- | # { 1 ASSO } | + | |
- | # { 1 ALIA } | + | |
- | # { 1 ANCI } | + | |
- | # { 1 DESI } | + | |
- | # { 1 SOUR } | + | |
- | # { 1 OBJE } | + | |
- | # { 1 NOTE } | + | |
- | # [ 1 RFN ] | + | |
- | # [ 1 AFN ] | + | |
- | # { 1 REFN } | + | |
- | # [ 1 RIN ] | + | |
- | # [ 1 CHAN ] | + | |
- | + | ||
- | # Read the first sub-line | + | |
- | | + | |
- | + | ||
- | while ($currlevel > 0) { | + | |
- | + | ||
- | my $work; | + | |
- | + | ||
- | # Record this person' | + | |
- | # (If there are multiple entries of this type, use only the first) | + | |
- | if ($currline =~ /^NAME\s+/) { | + | |
- | if ($name{$itag} eq "" | + | |
- | $name{$itag} = " | + | |
- | } | + | |
- | $work = read_note(); | + | |
- | if ($work ne "" | + | |
- | $nnote{$itag} = $work; | + | |
- | } | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # Record this person' | + | |
- | if ($currline =~ /^SEX\s+/) { | + | |
- | if ($sex{$itag} eq "" | + | |
- | $sex{$itag} = uc substr(" | + | |
- | } | + | |
- | } | + | |
- | + | ||
- | # Record this person' | + | |
- | if ($currline =~ /^BIRT/) { | + | |
- | if ($birth{$itag} eq "" | + | |
- | $birth{$itag} = read_event(); | + | |
- | } | + | |
- | $work = read_note(); | + | |
- | if ($work ne "" | + | |
- | $bnote{$itag} .= $work; | + | |
- | } | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # Record this person' | + | |
- | if ($currline =~ /^DEAT/) { | + | |
- | if ($death{$itag} eq "" | + | |
- | $death{$itag} = read_event(); | + | |
- | } | + | |
- | $work = read_note(); | + | |
- | if ($work ne "" | + | |
- | $dnote{$itag} .= $work; | + | |
- | } | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # Record this person' | + | |
- | if ($currline =~ /^BURI/) { | + | |
- | if ($burial{$itag} eq "" | + | |
- | $burial{$itag} = read_event(); | + | |
- | } | + | |
- | $work = read_note(); | + | |
- | if ($work ne "" | + | |
- | $burnote{$itag} .= $work; | + | |
- | } | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # Record the family in which this person is a child | + | |
- | # (There may be multiple entries of this type) | + | |
- | if ($currline =~ / | + | |
- | if ($familyup{$itag} eq "" | + | |
- | $familyup{$itag} = $1; | + | |
- | } else { | + | |
- | $familyup{$itag} .= ", | + | |
- | } | + | |
- | $work = read_note(); | + | |
- | if ($work ne "" | + | |
- | $funote{$itag} = $work; | + | |
- | } | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # Record the family in which this person is a spouse | + | |
- | # (There may be multiple entries of this type) | + | |
- | if ($currline =~ / | + | |
- | if ($familydown{$itag} eq "" | + | |
- | $familydown{$itag} = $1; | + | |
- | } else { | + | |
- | $familydown{$itag} .= ", | + | |
- | } | + | |
- | $work = read_note(); | + | |
- | if ($work ne "" | + | |
- | $fdnote{$itag} = $work; | + | |
- | } | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # Record any notes associated with this person | + | |
- | if ($currline =~ /^NOTE\s+/) { | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # If we get here, we need to ignore the remaining lines of this sub-record | + | |
- | parse_ignore(); | + | |
- | + | ||
- | } | + | |
- | + | ||
- | } | + | |
- | + | ||
- | # Parse a Note record | + | |
- | sub parse_note { | + | |
- | # Save the note's data | + | |
- | | + | |
- | + | ||
- | # Format of Note record | + | |
- | # | + | |
- | # { 1 SOUR } | + | |
- | # { 1 REFN } | + | |
- | # [ 1 RIN ] | + | |
- | # [ 1 CHAN ] | + | |
- | + | ||
- | # And ignore all sub-records | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # Read lines until we find a level the same or lower than the current line | + | |
- | # Examples: | + | |
- | # If the current line is "0 HEAD", it will read until it finds another " | + | |
- | # if the current line is "1 NAME", it will read until it finds a " | + | |
- | # In either case, the " | + | |
- | sub parse_ignore { | + | |
- | my $thislevel = $currlevel; | + | |
- | while (1) { | + | |
- | read_line(); | + | |
- | if ($currlevel <= $thislevel) { | + | |
- | | + | |
- | } | + | |
- | } | + | |
- | } | + | |
- | + | ||
- | # Read and parse an "Event Detail" | + | |
- | sub read_event { | + | |
- | my $thislevel = $currlevel; | + | |
- | my $edate; | + | |
- | my $eplace; | + | |
- | while (1) { | + | |
- | read_line(); | + | |
- | if ($currlevel <= $thislevel) { | + | |
- | | + | |
- | } | + | |
- | if ($currline =~ /^DATE\s+/) { | + | |
- | | + | |
- | } | + | |
- | if ($currline =~ /^PLAC\s+/) { | + | |
- | | + | |
- | } | + | |
- | } | + | |
- | if ($eplace ne "" | + | |
- | " | + | |
- | } else { | + | |
- | if ($eplace ne "" | + | |
- | | + | |
- | } else { | + | |
- | | + | |
- | } | + | |
- | } | + | |
- | } | + | |
- | + | ||
- | # Read the sub-records, | + | |
- | sub read_note { | + | |
- | my $thislevel = $currlevel; | + | |
- | my $thisnote; | + | |
- | while (1) { | + | |
- | read_line(); | + | |
- | if ($currlevel <= $thislevel) { | + | |
- | | + | |
- | } | + | |
- | if ($currline =~ /^NOTE\s+/) { | + | |
- | | + | |
- | } | + | |
- | } | + | |
- | | + | |
- | } | + | |
- | + | ||
- | # Output a single family | + | |
- | sub output_family { | + | |
- | my $ftag = $_[0]; | + | |
- | + | ||
- | print " | + | |
- | print " | + | |
- | print " | + | |
- | + | ||
- | my @children = split(/\,/, $children{$ftag}); | + | |
- | | + | |
- | print " | + | |
- | } | + | |
- | } | + | |
- | </ | + | |
ged2wiki.1195945427.txt.gz · Last modified: 2007/11/24 17:03 by jims