Summarising tidyged objects

Introduction

The tidyged package comes with several functions for summarising and querying tidyged objects. The following object is used to illustrate the functionality:

library(tidyged)

simpsons <- gedcom(subm("Me"), gedcom_description = "Some of the Simpsons",
                   gedcom_copyright = "No copyright") |> 
  add_indi(qn = "Homer Simpson", sex = "M") |> 
  add_indi_fact("bir", date = date_calendar(1960, 5, 14),
                fact_place = place(name = "Springfield, USA")) |> 
  add_indi(qn = "Marge Simpson", sex = "F") |> 
  add_indi_fact("bir", date = date_calendar(1960, 8, 24),
                fact_place = place(name = "Springfield, USA")) |> 
  add_indi(qn = "Lisa Simpson", sex = "F") |> 
  add_indi_fact("bir", date = date_calendar(1984, 12, 3),
                fact_place = place(name = "Springfield, USA")) |> 
  add_indi(qn = "Bart Simpson", sex = "M") |> 
  add_indi_fact("bir", date = date_calendar(1982, 3, 16),
                fact_place = place(name = "Springfield, USA"))
#> Added Male Individual: @I1@
#> Added Female Individual: @I2@
#> Added Female Individual: @I3@
#> Added Male Individual: @I4@

homer_xref <- find_indi_name(simpsons, "Homer")
marge_xref <- find_indi_name(simpsons, "Marge")
bart_lisa_xref <- find_indi_name_all(simpsons, "Bart|Lisa")

simpsons <- simpsons |>
  add_famg(husband = homer_xref, wife = marge_xref, 
           children = bart_lisa_xref, 
           number_of_children = 3) |> 
  add_note("This is a note") |> 
  add_note("This is another note") |> 
  add_media("123", "PDF") |> 
  add_media("456", "PDF") |>
  add_repo("A library") |> 
  add_sour(originator = "J.R. Hartley", title = "A History of the Simpsons") |>
  add_sour(originator = "Prof. J Frink", title = "The screaming and the pain and the hoyvenglaven")
#> Added Family Group: @F1@
#> Added Note: @N1@
#> Added Note: @N2@
#> Added Multimedia: @M1@
#> Added Multimedia: @M2@
#> Added Repository: @R1@
#> Added Source: @S1@
#> Added Source: @S2@

Global summaries

The str() and summary() functions give a high level overview of tidyged objects:

summary(simpsons)
#> GEDCOM file summary: 
#>  
#>  Submitter:               Me 
#>  Description:             Some of the Simpsons 
#>  Language:                English 
#>  Character set:           UTF-8 
#>  
#>  Copyright:               No copyright 
#>  
#>  Source system:           gedcompendium 
#>  Source system version:    
#>  Product name:            The 'gedcompendium' ecosystem of packages for the R language 
#>  Product source:          Jamie Lendrum
str(simpsons)
#> GEDCOM version 5.5.5 (LINEAGE-LINKED)
#> 
#> Individuals:         4
#> Families:            1
#> Submitters:          1
#> Multimedia objects:  2
#> Notes:               2
#> Sources:             2
#> Repositories:        1

Record type summaries

The num_*() family of functions count the number of records of a particular type:

num_indi(simpsons)
#> [1] 4
num_famg(simpsons)
#> [1] 1
num_media(simpsons)
#> [1] 2

The xrefs_*() family of functions return the xrefs of records of a particular type:

xrefs_indi(simpsons)
#> [1] "@I1@" "@I2@" "@I3@" "@I4@"
xrefs_note(simpsons)
#> [1] "@N1@" "@N2@"
xrefs_sour(simpsons)
#> [1] "@S1@" "@S2@"

These functions are the workhorse of the str() function.

The df_*() family of functions provide a tabular summary of all records of a particular type:

df_indi(simpsons) |> knitr::kable()
xref name sex date_of_birth place_of_birth date_of_death place_of_death mother father num_siblings num_children last_modified
@I1@ Homer Simpson M 14 MAY 1960 Springfield, USA 2 22 NOV 2024
@I2@ Marge Simpson F 24 AUG 1960 Springfield, USA 2 22 NOV 2024
@I3@ Lisa Simpson F 3 DEC 1984 Springfield, USA Marge Simpson Homer Simpson 2 0 22 NOV 2024
@I4@ Bart Simpson M 16 MAR 1982 Springfield, USA Marge Simpson Homer Simpson 2 0 22 NOV 2024
df_famg(simpsons) |> knitr::kable()
xref husband wife relationship_type relationship_date relationship_place num_children last_modified
@F1@ Homer Simpson Marge Simpson 3 22 NOV 2024
df_media(simpsons) |> knitr::kable()
xref file_ref file_title file_format source_media last_modified
@M1@ 123 PDF 22 NOV 2024
@M2@ 456 PDF 22 NOV 2024

Individual record summaries

The is_*() family of functions determine whether an xref is a record of a particular type:

is_indi(simpsons, "@I1@")
#> [1] TRUE
is_famg(simpsons, "@F1@")
#> [1] TRUE
is_repo(simpsons, "@N1@")
#> [1] FALSE

The describe_*() family of functions give a description of certain records:

describe_indi(simpsons, find_indi_name(simpsons, "Bart"))
#> [1] "Individual @I4@, Bart Simpson, child of Homer Simpson and Marge Simpson, born 16 MAR 1982 in Springfield, USA"
describe_indi(simpsons, find_indi_name(simpsons, "Lisa"), name_only = TRUE)
#> [1] "Lisa Simpson"
describe_indi(simpsons, find_indi_name(simpsons, "Homer"), short_desc = TRUE)
#> [1] "Individual @I1@, Homer Simpson"
describe_famg(simpsons, "@F1@")
#> [1] "Family @F1@, headed by Homer Simpson and Marge Simpson, and children: Bart Simpson, Lisa Simpson"

Whilst this family of functions can only operate on one record at a time, the describe_records() function can take a whole vector of xrefs, potentially of different types (header, trailer, and duplicate records are ignored by this function):

describe_records(simpsons, simpsons$record)
#>  [1] "Submitter @U1@, Me"                                                                                           
#>  [2] "Individual @I1@, Homer Simpson, born 14 MAY 1960 in Springfield, USA"                                         
#>  [3] "Individual @I2@, Marge Simpson, born 24 AUG 1960 in Springfield, USA"                                         
#>  [4] "Individual @I3@, Lisa Simpson, child of Homer Simpson and Marge Simpson, born 3 DEC 1984 in Springfield, USA" 
#>  [5] "Individual @I4@, Bart Simpson, child of Homer Simpson and Marge Simpson, born 16 MAR 1982 in Springfield, USA"
#>  [6] "Family @F1@, headed by Homer Simpson and Marge Simpson, and children: Bart Simpson, Lisa Simpson"             
#>  [7] "Note @N1@, with the following text: This is a note"                                                           
#>  [8] "Note @N2@, with the following text: This is another note"                                                     
#>  [9] "Multimedia @M1@, format PDF, with file reference 123"                                                         
#> [10] "Multimedia @M2@, format PDF, with file reference 456"                                                         
#> [11] "Repository @R1@, A library"                                                                                   
#> [12] "Source @S1@, titled A History of the Simpsons, by J.R. Hartley"                                               
#> [13] "Source @S2@, titled The screaming and the pain and the hoyvenglaven, by Prof. J Frink"

As with the record-specific functions, setting short_desc = TRUE gives a shorter description.

describe_records(simpsons, simpsons$record, short_desc = TRUE)
#>  [1] "Submitter @U1@, Me"                                                 
#>  [2] "Individual @I1@, Homer Simpson"                                     
#>  [3] "Individual @I2@, Marge Simpson"                                     
#>  [4] "Individual @I3@, Lisa Simpson"                                      
#>  [5] "Individual @I4@, Bart Simpson"                                      
#>  [6] "Family @F1@, headed by Homer Simpson and Marge Simpson"             
#>  [7] "Note @N1@, with the following text: This is a note..."              
#>  [8] "Note @N2@, with the following text: This is another note..."        
#>  [9] "Multimedia @M1@, with file reference 123"                           
#> [10] "Multimedia @M2@, with file reference 456"                           
#> [11] "Repository @R1@, A library"                                         
#> [12] "Source @S1@, titled A History of the Simpsons"                      
#> [13] "Source @S2@, titled The screaming and the pain and the hoyvenglaven"

Next article: Identifying relations >