library(mestrado)
library(dplyr)

Annotations dataset

The function tidy_annotations() is just a wrapper around purrr::map_dfr() for stack up all the rds files generated by wavesurfer::annotator_app(). (see vignette("04-data-annotation"))

annotations_dir <-  system.file("annotations", package = "mestrado")
annotations_dir
#> [1] "/tmp/Rtmp5PtfF0/temp_libpath2fc46d094fd/mestrado/annotations"

annotations <- tidy_annotations(annotations_dir)
glimpse(annotations)
#> Rows: 25
#> Columns: 5
#> $ audio_id  <chr> "Glaucidium-minutissimum-24426.wav", "Glaucidium-minutissim…
#> $ region_id <chr> "wavesurfer_75b415rrqd8", "wavesurfer_4nt181nsvl", "wavesur…
#> $ start     <dbl> 0.185169817, 4.258905794, 8.554845551, 13.924770247, 19.590…
#> $ end       <dbl> 1.407291, 5.555095, 10.184340, 15.369095, 21.257495, 26.294…
#> $ label     <chr> "Glaucidium-minutissimum", "Glaucidium-minutissimum", "Glau…
annotations %>% head() %>% knitr::kable()
audio_id region_id start end label
Glaucidium-minutissimum-24426.wav wavesurfer_75b415rrqd8 0.1851698 1.407291 Glaucidium-minutissimum
Glaucidium-minutissimum-24426.wav wavesurfer_4nt181nsvl 4.2589058 5.555095 Glaucidium-minutissimum
Glaucidium-minutissimum-24426.wav wavesurfer_ootdh1hriqg 8.5548456 10.184340 Glaucidium-minutissimum
Glaucidium-minutissimum-24426.wav wavesurfer_ar4jis448p 13.9247702 15.369095 Glaucidium-minutissimum
Glaucidium-minutissimum-24426.wav wavesurfer_7i7a77cqqsg 19.5909667 21.257495 Glaucidium-minutissimum
Glaucidium-minutissimum-24426.wav wavesurfer_oubnm1mie9 24.5535178 26.294114 Glaucidium-minutissimum

Retriving the labels for the slices

By merging the slices of the wave files with the labels annotated in the previous step, we construct the final dataset with the response/targets ready for modelling. The function label_slices() take the directory of slices and the annotations dataset as inputs. The output is a map between slice and label.

slices_dir <-  system.file("wav_sample_slices_1000ms", package = "mestrado")

slices_1000ms_labels <- label_slices(
  slices_dir, 
  annotations, 
  pattern = "Glaucidium|Megascops-atricapilla"
)
glimpse(slices_1000ms_labels)
#> Rows: 42
#> Columns: 3
#> $ audio_id <chr> "Glaucidium-minutissimum-24426.wav", "Glaucidium-minutissimu…
#> $ slice_id <chr> "Glaucidium-minutissimum-24426@0@1@.wav", "Glaucidium-minuti…
#> $ label    <chr> "Glaucidium-minutissimum", "Glaucidium-minutissimum", "Glauc…
slices_1000ms_labels %>% head() %>% knitr::kable()
audio_id slice_id label
Glaucidium-minutissimum-24426.wav @.wav Glaucidium-minutissimum
Glaucidium-minutissimum-24426.wav @.wav Glaucidium-minutissimum
Glaucidium-minutissimum-24426.wav @.wav Glaucidium-minutissimum
Glaucidium-minutissimum-24426.wav @.wav unknown
Glaucidium-minutissimum-24426.wav @.wav unknown
Glaucidium-minutissimum-24426.wav @.wav Glaucidium-minutissimum
# stores for later use
saveRDS(slices_1000ms_labels, "../data_/slices_1000ms_labels_by_humans.rds")