Lots of this can be done anywhere
- This is just rather simple XML manipulation
- Let’s say we rename a tier:
library(tidyverse)
library(xml2)
rename_tier <- function(filename, current_name, new_name, suffix){
read_xml(filename) %>%
xml_find_all(glue("//TIER[@TIER_ID='{current_name}']")) %>%
walk(~ xml_set_attr(.x, 'LINGUISTIC_TYPE_ID', new_name)) %>%
xml_find_first('/') %>%
write_xml(str_replace(filename, '.eaf', glue('{suffix}.eaf')))
}
rename_type <- function(filename, current_name, new_name, suffix){
read_xml(filename) %>%
xml_find_all(glue("//TIER[@LINGUISTIC_TYPE_REF='{current_name}']")) %>%
walk(~ xml_set_attr(.x, 'LINGUISTIC_TYPE_REF', new_name)) %>%
xml_find_all("../LINGUISTIC_TYPE[@LINGUISTIC_TYPE_ID='{current_name}']") %>%
walk(~ xml_set_attr(.x, 'LINGUISTIC_TYPE_ID', new_name)) %>%
xml_find_first('/') %>%
write_xml(str_replace(filename, '.eaf', glue('{suffix}.eaf')))
}
- Already one extra thing to remember!
- However, things can be way more complicated!
reticulate
library(reticulate)
pympi <- import('pympi')
elan_file <- pympi$Eaf(file_path = '/Users/niko/github/testcorpus/kpv_udo20120330SazinaJS-encounter.eaf')
elan_file$get_tier_names()
## dict_keys(['ref@NTP-M-1986', 'ref@JSS-F-1988', 'orth@NTP-M-1986', 'word@NTP-M-1986', 'lemma@NTP-M-1986', 'pos@NTP-M-1986', 'orth@JSS-F-1988', 'word@JSS-F-1988', 'lemma@JSS-F-1988', 'pos@JSS-F-1988'])
elan_file$get_annotation_data_for_tier(id_tier = 'orth@NTP-M-1986')
## [[1]]
## [[1]][[1]]
## [1] 170
##
## [[1]][[2]]
## [1] 3730
##
## [[1]][[3]]
## [1] "И эшшӧ ӧтик тор, мый тэнад, тэныд мам висьталіс интереснӧй юӧр,"
##
## [[1]][[4]]
## [1] "kpv_udo20120330SazinaJS-encounter-01"
##
##
## [[2]]
## [[2]][[1]]
## [1] 4380
##
## [[2]][[2]]
## [1] 9040
##
## [[2]][[3]]
## [1] "тэнад, шуам, тайӧ роч нывка и, коми нывка и роч морт йылысь…"
##
## [[2]][[4]]
## [1] "kpv_udo20120330SazinaJS-encounter-03"
##
##
## [[3]]
## [[3]][[1]]
## [1] 9080
##
## [[3]][[2]]
## [1] 9970
##
## [[3]][[3]]
## [1] "кытчӧ сійӧ вӧлі?"
##
## [[3]][[4]]
## [1] "kpv_udo20120330SazinaJS-encounter-05"
##
##
## [[4]]
## [[4]][[1]]
## [1] 83189
##
## [[4]][[2]]
## [1] 84237
##
## [[4]][[3]]
## [1] "Сідз и висьталі?"
##
## [[4]][[4]]
## [1] "kpv_udo20120330SazinaJS-encounter-30"
##
##
## [[5]]
## [[5]][[1]]
## [1] 85947
##
## [[5]][[2]]
## [1] 86212
##
## [[5]][[3]]
## [1] "a"
##
## [[5]][[4]]
## [1] "kpv_udo20120330SazinaJS-encounter-11111"