library(move2)
library(dplyr)
library(units)
library(sf)
Download example data and select columns to reduce printing.
movebank_download_study(2911040,
galapagos_albatrosses <-attributes = c(
"ground_speed",
"heading",
"height_above_ellipsoid",
"eobs_temperature",
"individual_local_identifier"
)%>%
) select_track_data(study_site, weight, animal_life_stage)
%>%
galapagos_albatrosses filter(!st_is_empty(.))
#> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 28 tracks lasting on average 3201735 secs in a
#> Simple feature collection with 16028 features and 6 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 16,028 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 16,023 more rows
#> # ℹ 3 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 28 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 23 more rows
First location each 6 hour window
%>%
galapagos_albatrosses filter(!st_is_empty(.)) %>%
mt_filter_per_interval(unit = "6 hours")
#> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 28 tracks lasting on average 3193638 secs in a
#> Simple feature collection with 4109 features and 6 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3296 ymin: -12.79464 xmax: -77.52837 ymax: 0.1814998
#> Geodetic CRS: WGS 84
#> # A tibble: 4,109 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0.2 9.83 24.8 18
#> 3 0.32 334. 14.8 15
#> 4 0.08 330. 10.4 11
#> 5 0.1 10.5 8.6 12
#> # ℹ 4,104 more rows
#> # ℹ 3 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 28 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 23 more rows
Random location each day
%>%
galapagos_albatrosses filter(!st_is_empty(.)) %>%
mt_filter_per_interval(criterion = "random", unit = "days")
#> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 28 tracks lasting on average 3171189 secs in a
#> Simple feature collection with 1057 features and 6 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.29471 ymin: -12.35231 xmax: -77.51874 ymax: -0.1058634
#> Geodetic CRS: WGS 84
#> # A tibble: 1,057 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.24 37.4 19 22
#> 2 0 34.7 26.9 33
#> 3 10.1 94.4 1.1 17
#> 4 9.59 112. 7.9 11
#> 5 0.25 37.4 -16.9 16
#> # ℹ 1,052 more rows
#> # ℹ 3 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 28 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 23 more rows
When dealing with trajectories frequently duplicated records do occur. There are many reasons these can appear ranging from the way in which data is recorded to duplicated data transmissions and uploads. These data are often stored, but for analysis they need to be removed. A simple definition of a duplicate record would be an observation at exactly the same time of the same individual. However many tracking devices record additional information such as acceleration. These records frequently have the same time as location records meaning not all records with duplicated timestamps can directly be deleted.
Duplicated records can be found in the following way:
%>%
galapagos_albatrosses group_by(mt_time(), mt_track_id()) %>%
filter(n() != 1) %>%
arrange(mt_time())
#> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 28 tracks lasting on average 3172363 secs in a
#> Simple feature collection with 8092 features and 8 fields (with 4066 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.24518 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 8,092 × 9
#> # Groups: mt_time(), mt_track_id() [4,046]
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> [m/s] [°] [m] [°C]
#> 1 0.3 14.4 7.9 27
#> 2 NA NA NA NA
#> 3 0.55 330. 1.6 24
#> 4 NA NA NA NA
#> 5 0.15 53.1 11.4 27
#> # ℹ 8,087 more rows
#> # ℹ 5 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>, `mt_time()` <dttm>, `mt_track_id()` <fct>
#> First 5 track features:
#> # A tibble: 28 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 23 more rows
If you are only interested in finding duplicated records where there is a location this can as follows (in this case there are none):
%>%
galapagos_albatrosses filter(!st_is_empty(.)) %>%
group_by(mt_time(), mt_track_id()) %>%
filter(n() != 1) %>%
arrange(mt_time())
#> Warning in mean.default(do.call(c, lapply(lapply(split(mt_time(x),
#> mt_track_id(x), : argument is not numeric or logical: returning NA
#> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 0 tracks lasting on average NA in a
#> Simple feature collection with 0 features and 8 fields
#> Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
#> Geodetic CRS: WGS 84
#> # A tibble: 0 × 9
#> # Groups: mt_time(), mt_track_id() [0]
#> # ℹ 9 variables: ground_speed [m/s], heading [°], height_above_ellipsoid [m],
#> # eobs_temperature [°C], individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <GEOMETRY [°]>, mt_time() <dttm>, mt_track_id() <fct>
#> Track features:
#> # A tibble: 0 × 4
#> # ℹ 4 variables: study_site <chr>, weight [g], animal_life_stage <fct>,
#> # individual_local_identifier <fct>
The package also has some build in functions for filtering unique records. Several strategies for omitting duplicated records are build in.
First it is possible to omit all records that are a subset of other records, i.e. records that got added later with more information are retained. This happens with some tracking devices if data gets directly downloaded from the tag. As no information is lost this is the default strategy.
mt_sim_brownian_motion(1:2)[rep(1:4, 2), ]
simulated_data <-$temperature <- c(1:3, NA, 1:2, 7:8)
simulated_data
simulated_data#> A <move2> with `track_id_column` "track" and `time_column` "time"
#> Containing 2 tracks lasting on average 1 in a
#> Simple feature collection with 8 features and 3 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 0 ymin: -1.781979 xmax: 1.132471 ymax: 1.087483
#> CRS: NA
#> First 5 features:
#> time track geometry temperature
#> 1 1 1 POINT (0 0) 1
#> 2 2 1 POINT (1.132471 1.087483) 2
#> 3 1 2 POINT (0 0) 3
#> 4 2 2 POINT (0.1095682 -1.781979) NA
#> 1.1 1 1 POINT (0 0) 1
#> Track features:
#> track
#> 1 1
#> 2 2
%>% mt_filter_unique()
simulated_data #> Warning: After removing all records that are subsets of other records there are
#> still remaining duplicates.
#> A <move2> with `track_id_column` "track" and `time_column` "time"
#> Containing 2 tracks lasting on average 1 in a
#> Simple feature collection with 5 features and 3 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 0 ymin: -1.781979 xmax: 1.132471 ymax: 1.087483
#> CRS: NA
#> time track geometry temperature
#> 1 1 1 POINT (0 0) 1
#> 2 2 1 POINT (1.132471 1.087483) 2
#> 3 1 2 POINT (0 0) 3
#> 3.1 1 2 POINT (0 0) 7
#> 4.1 2 2 POINT (0.1095682 -1.781979) 8
#> Track features:
#> track
#> 1 1
#> 2 2
This strategy how ever does not guarantee not duplicates are left, as two records might not be subsets from each other.
An alternative is to take a random record from each set of duplicates, this is not advised for formal analysis but might help for a quick inspection of data. This is also a lot quicker then inspecting subsets. How ever care needs to be taken as the example below, for example, results in empty points being retained at the cost of informative locations.
%>% mt_filter_unique("sample")
galapagos_albatrosses #> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 28 tracks lasting on average 3206656 secs in a
#> Simple feature collection with 110883 features and 6 fields (with 96907 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.52837 ymax: 0.1814998
#> Geodetic CRS: WGS 84
#> # A tibble: 110,883 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 110,878 more rows
#> # ℹ 3 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 28 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 23 more rows
n
locations%>%
galapagos_albatrosses group_by(mt_track_id()) %>%
filter(n() > 500)
#> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 20 tracks lasting on average 50.9 days in a
#> Simple feature collection with 112639 features and 7 fields (with 96941 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 112,639 × 8
#> # Groups: mt_track_id() [20]
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 112,634 more rows
#> # ℹ 4 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>, `mt_track_id()` <fct>
#> First 5 track features:
#> # A tibble: 20 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 15 more rows
%>%
galapagos_albatrosses group_by(mt_track_id()) %>%
filter(as_units(diff(range(mt_time()))) > set_units(1, "week"))
#> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 19 tracks lasting on average 53.3 days in a
#> Simple feature collection with 111971 features and 7 fields (with 96369 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 111,971 × 8
#> # Groups: mt_track_id() [19]
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 111,966 more rows
#> # ℹ 4 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>, `mt_track_id()` <fct>
#> First 5 track features:
#> # A tibble: 19 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 14 more rows
st_as_sfc(st_bbox(c(
foraging_area <-xmin = -82, xmax = -77,
ymax = -0.5, ymin = -13
crs = 4326))
), library(ggplot2, quietly = TRUE)
ggplot() +
geom_sf(data = rnaturalearth::ne_coastline(returnclass = "sf", 50)) +
theme_linedraw() +
geom_sf(data = foraging_area, fill = "red", alpha = .3, color = "red") +
geom_sf(
data = galapagos_albatrosses %>% filter(!st_is_empty(.)),
aes(color = `individual_local_identifier`)
+
) coord_sf(
crs = sf::st_crs("+proj=aeqd +lon_0=-83 +lat_0=-6 +units=km"),
xlim = c(-1000, 600), ylim = c(-800, 700)
)#> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
#> which was just loaded, will retire in October 2023.
#> Please refer to R-spatial evolution reports for details, especially
#> https://r-spatial.org/r/2023/05/15/evolution4.html.
#> It may be desirable to make the sf package available;
#> package maintainers should consider adding sf to Suggests:.
#> The sp package is now running under evolution status 2
#> (status 2 uses the sf package in place of rgdal)
# Filter to tracks making it at least once to the foraging area
%>%
galapagos_albatrosses group_by(mt_track_id()) %>%
filter(any(st_intersects(geometry, foraging_area, sparse = FALSE)))
#> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 15 tracks lasting on average 63.9 days in a
#> Simple feature collection with 106151 features and 7 fields (with 91303 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 106,151 × 8
#> # Groups: mt_track_id() [15]
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 106,146 more rows
#> # ℹ 4 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>, `mt_track_id()` <fct>
#> First 5 track features:
#> # A tibble: 15 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Isla de la Plata 22 adult unbanded-151
#> 2 Isla de la Plata 22 adult unbanded-153
#> 3 Isla de la Plata 22 adult unbanded-154
#> 4 Isla de la Plata 22 adult unbanded-156
#> 5 Isla de la Plata 22 adult unbanded-159
#> # ℹ 10 more rows
%>%
galapagos_albatrosses filter_track_data(study_site == "Punta Suarez")
#> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 12 tracks lasting on average 2455668 secs in a
#> Simple feature collection with 38072 features and 6 fields (with 32699 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -9.087225 xmax: -78.65155 ymax: -0.6481274
#> Geodetic CRS: WGS 84
#> # A tibble: 38,072 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> * [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 38,067 more rows
#> # ℹ 3 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 12 × 4
#> study_site weight animal_life_stage individual_local_identifier
#> <chr> [g] <fct> <fct>
#> 1 Punta Suarez 22 adult 4262-84830876
#> 2 Punta Suarez 22 adult 4270-84831217
#> 3 Punta Suarez 22 adult 4261-2228
#> 4 Punta Suarez 22 adult 4264-84830852
#> 5 Punta Suarez 22 adult 4266-84831108
#> # ℹ 7 more rows
%>%
galapagos_albatrosses filter(!st_is_empty(.)) %>%
mutate(
next_new_track = mt_time_lags(.) > set_units(4, "h") |
is.na(mt_time_lags(.)),
track_index = cumsum(lag(next_new_track, default = FALSE))
%>%
) mt_set_track_id("track_index")
#> A <move2> with `track_id_column` "track_index" and `time_column` "timestamp"
#> Containing 81 tracks lasting on average 1073502 secs in a
#> Simple feature collection with 16028 features and 8 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 16,028 × 9
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 16,023 more rows
#> # ℹ 5 more variables: individual_local_identifier <fct>, timestamp <dttm>,
#> # geometry <POINT [°]>, next_new_track <lgl>, track_index <int>
#> First 5 track features:
#> # A tibble: 81 × 4
#> track_index study_site weight animal_life_stage
#> <int> <chr> [g] <fct>
#> 1 51 Punta Cevallos 22 adult
#> 2 3 Punta Cevallos 22 adult
#> 3 4 Punta Cevallos 22 adult
#> 4 5 Punta Cevallos 22 adult
#> 5 6 Punta Cevallos 22 adult
#> # ℹ 76 more rows
library(lubridate, quietly = TRUE)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
%>%
galapagos_albatrosses mt_set_track_id(paste(mt_track_id(.),
sep = "_", month.name[month(mt_time(.))]
))#> A <move2> with `track_id_column` "individual_local_identifier" and
#> `time_column` "timestamp"
#> Containing 71 tracks lasting on average 1264021 secs in a
#> Simple feature collection with 114929 features and 6 fields (with 98901 geometries empty)
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -91.3732 ymin: -12.79464 xmax: -77.51874 ymax: 0.1821983
#> Geodetic CRS: WGS 84
#> # A tibble: 114,929 × 7
#> ground_speed heading height_above_ellipsoid eobs_temperature
#> [m/s] [°] [m] [°C]
#> 1 0.01 21.6 16.5 12
#> 2 0 95.7 12.6 19
#> 3 0.11 13.8 17.4 24
#> 4 0.2 9.83 24.8 18
#> 5 0.24 37.4 19 22
#> # ℹ 114,924 more rows
#> # ℹ 3 more variables: individual_local_identifier <chr>, timestamp <dttm>,
#> # geometry <POINT [°]>
#> First 5 track features:
#> # A tibble: 71 × 4
#> individual_local_identifier study_site weight animal_life_stage
#> <chr> <chr> [g] <fct>
#> 1 1094-1094_June Punta Cevallos 22 adult
#> 2 1103-1103_June Punta Cevallos 22 adult
#> 3 1103-1103_July Punta Cevallos 22 adult
#> 4 1163-1163_June Punta Cevallos 22 adult
#> 5 1163-1163_July Punta Cevallos 22 adult
#> # ℹ 66 more rows