General Usage

Steffi LaZerte

2018-10-08

library(dplyr)
library(ggplot2)
library(weathercan)

Stations

weathercan includes a data frame called stations which lists available stations and their details (including station_id.

head(stations)
## # A tibble: 6 x 13
##   prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz    interval
##   <fct> <chr>        <fct>      <fct>      <fct>  <fct> <dbl> <dbl> <dbl> <chr> <chr>   
## 1 AB    DAYSLAND     1795       301AR54    <NA>   <NA>   52.9 -112.  689. Etc/… day     
## 2 AB    DAYSLAND     1795       301AR54    <NA>   <NA>   52.9 -112.  689. Etc/… hour    
## 3 AB    DAYSLAND     1795       301AR54    <NA>   <NA>   52.9 -112.  689. Etc/… month   
## 4 AB    EDMONTON CO… 1796       301BK03    <NA>   <NA>   53.6 -114.  671. Etc/… day     
## 5 AB    EDMONTON CO… 1796       301BK03    <NA>   <NA>   53.6 -114.  671. Etc/… hour    
## 6 AB    EDMONTON CO… 1796       301BK03    <NA>   <NA>   53.6 -114.  671. Etc/… month   
## # ... with 2 more variables

You can look through this data frame directly, or you can use the stations_search function:

stations_search("Kamloops")
## # A tibble: 40 x 13
##    prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz    interval
##    <fct> <chr>        <fct>      <fct>      <fct>  <fct> <dbl> <dbl> <dbl> <chr> <chr>   
##  1 BC    KAMLOOPS     1274       1163779    <NA>   <NA>   50.7 -120.  379. Etc/… day     
##  2 BC    KAMLOOPS     1274       1163779    <NA>   <NA>   50.7 -120.  379. Etc/… month   
##  3 BC    KAMLOOPS A   1275       1163780    71887  YKA    50.7 -120.  345. Etc/… day     
##  4 BC    KAMLOOPS A   1275       1163780    71887  YKA    50.7 -120.  345. Etc/… hour    
##  5 BC    KAMLOOPS A   1275       1163780    71887  YKA    50.7 -120.  345. Etc/… month   
##  6 BC    KAMLOOPS A   51423      1163781    71887  YKA    50.7 -120.  345. Etc/… day     
##  7 BC    KAMLOOPS A   51423      1163781    71887  YKA    50.7 -120.  345. Etc/… hour    
##  8 BC    KAMLOOPS AF… 1276       1163790    <NA>   <NA>   50.7 -120.  701  Etc/… day     
##  9 BC    KAMLOOPS AF… 1276       1163790    <NA>   <NA>   50.7 -120.  701  Etc/… month   
## 10 BC    KAMLOOPS AUT 42203      1163842    71741  ZKA    50.7 -120.  345  Etc/… day     
## # ... with 30 more rows, and 2 more variables

You can narrow down your search by specifying time intervals (options are “hour”, “day”, or “month”):

stations_search("Kamloops", interval = "hour")
## # A tibble: 3 x 13
##   prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz    interval
##   <fct> <chr>        <fct>      <fct>      <fct>  <fct> <dbl> <dbl> <dbl> <chr> <chr>   
## 1 BC    KAMLOOPS A   1275       1163780    71887  YKA    50.7 -120.  345. Etc/… hour    
## 2 BC    KAMLOOPS A   51423      1163781    71887  YKA    50.7 -120.  345. Etc/… hour    
## 3 BC    KAMLOOPS AUT 42203      1163842    71741  ZKA    50.7 -120.  345  Etc/… hour    
## # ... with 2 more variables

You can specify more than one interval:

stations_search("Kamloops", interval = c("hour", "month"))
## # A tibble: 21 x 13
##    prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz    interval
##    <fct> <chr>        <fct>      <fct>      <fct>  <fct> <dbl> <dbl> <dbl> <chr> <chr>   
##  1 BC    KAMLOOPS     1274       1163779    <NA>   <NA>   50.7 -120.  379. Etc/… month   
##  2 BC    KAMLOOPS A   1275       1163780    71887  YKA    50.7 -120.  345. Etc/… hour    
##  3 BC    KAMLOOPS A   1275       1163780    71887  YKA    50.7 -120.  345. Etc/… month   
##  4 BC    KAMLOOPS A   51423      1163781    71887  YKA    50.7 -120.  345. Etc/… hour    
##  5 BC    KAMLOOPS AF… 1276       1163790    <NA>   <NA>   50.7 -120.  701  Etc/… month   
##  6 BC    KAMLOOPS AUT 42203      1163842    71741  ZKA    50.7 -120.  345  Etc/… hour    
##  7 BC    KAMLOOPS AUT 42203      1163842    71741  ZKA    50.7 -120.  345  Etc/… month   
##  8 BC    KAMLOOPS CDA 1277       1163810    <NA>   <NA>   50.7 -120.  345  Etc/… month   
##  9 BC    KAMLOOPS CH… 1278       1163814    <NA>   <NA>   50.7 -121.  556. Etc/… month   
## 10 BC    KAMLOOPS CH… 1279       1163815    <NA>   <NA>   50.6 -121.  701  Etc/… month   
## # ... with 11 more rows, and 2 more variables

You can also search by proximity. These results include a new column distance specifying the distance in km from the coordinates:

stations_search(coords = c(50.667492, -120.329049), dist = 20, interval = "hour")
## # A tibble: 3 x 14
##   prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz    interval
##   <fct> <chr>        <fct>      <fct>      <fct>  <fct> <dbl> <dbl> <dbl> <chr> <chr>   
## 1 BC    KAMLOOPS A   1275       1163780    71887  YKA    50.7 -120.  345. Etc/… hour    
## 2 BC    KAMLOOPS AUT 42203      1163842    71741  ZKA    50.7 -120.  345  Etc/… hour    
## 3 BC    KAMLOOPS A   51423      1163781    71887  YKA    50.7 -120.  345. Etc/… hour    
## # ... with 3 more variables

We can also perform more complex searches using filter() function from the dplyr package:

BCstations <- stations %>%
  filter(prov %in% c("BC")) %>%
  filter(interval == "hour") %>%
  filter(lat > 49 & lat < 49.5) %>%
  filter(lon > -119 & lon < -116) %>%
  filter(start <= 2002) %>%
  filter(end >= 2016)
BCstations
## # A tibble: 3 x 13
##   prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz    interval
##   <fct> <chr>        <fct>      <fct>      <fct>  <fct> <dbl> <dbl> <dbl> <chr> <chr>   
## 1 BC    CRESTON CAM… 6838       114B1F0    71770  WJR    49.1 -116.  641. Etc/… hour    
## 2 BC    NELSON CS    6839       1145M29    71776  WNM    49.5 -117.  535. Etc/… hour    
## 3 BC    WARFIELD RCS 31067      1148705    71401  XWF    49.1 -118.  567. Etc/… hour    
## # ... with 2 more variables
## weather_dl() accepts numbers so we can create a vector to input into weather:
stn_vector <- BCstations$station_id 
stn_vector
## [1] 6838  6839  31067
## 8739 Levels: 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 ... 54698

Searching a bleeding edge updated list of stations:

s <- stations_dl() # Download complete stations list
stations_search("Saskatoon", stn = s) # Specify the new stations list to search

Weather

Once you have your station_id(s) you can download weather data:

kam <- weather_dl(station_ids = 51423, start = "2016-01-01", end = "2016-02-15")
                    
kam
## # A tibble: 1,104 x 35
##    station_name station_id station_operator prov    lat   lon  elev climate_id WMO_id
##  * <chr>             <dbl> <lgl>            <fct> <dbl> <dbl> <dbl> <chr>      <chr> 
##  1 KAMLOOPS A        51423 NA               BC     50.7 -120.  345. 1163781    71887 
##  2 KAMLOOPS A        51423 NA               BC     50.7 -120.  345. 1163781    71887 
##  3 KAMLOOPS A        51423 NA               BC     50.7 -120.  345. 1163781    71887 
##  4 KAMLOOPS A        51423 NA               BC     50.7 -120.  345. 1163781    71887 
##  5 KAMLOOPS A        51423 NA               BC     50.7 -120.  345. 1163781    71887 
##  6 KAMLOOPS A        51423 NA               BC     50.7 -120.  345. 1163781    71887 
##  7 KAMLOOPS A        51423 NA               BC     50.7 -120.  345. 1163781    71887 
##  8 KAMLOOPS A        51423 NA               BC     50.7 -120.  345. 1163781    71887 
##  9 KAMLOOPS A        51423 NA               BC     50.7 -120.  345. 1163781    71887 
## 10 KAMLOOPS A        51423 NA               BC     50.7 -120.  345. 1163781    71887 
## # ... with 1,094 more rows, and 26 more variables

You can also download data from multiple stations at once:

kam.pg <- weather_dl(station_ids = c(48248, 51423), start = "2016-01-01", end = "2016-02-15")
                    
kam.pg
## # A tibble: 2,208 x 35
##    station_name station_id station_operator prov    lat   lon  elev climate_id WMO_id
##  * <chr>             <dbl> <lgl>            <fct> <dbl> <dbl> <dbl> <chr>      <chr> 
##  1 PRINCE GEOR…      48248 NA               BC     53.9 -123.   680 1096453    71302 
##  2 PRINCE GEOR…      48248 NA               BC     53.9 -123.   680 1096453    71302 
##  3 PRINCE GEOR…      48248 NA               BC     53.9 -123.   680 1096453    71302 
##  4 PRINCE GEOR…      48248 NA               BC     53.9 -123.   680 1096453    71302 
##  5 PRINCE GEOR…      48248 NA               BC     53.9 -123.   680 1096453    71302 
##  6 PRINCE GEOR…      48248 NA               BC     53.9 -123.   680 1096453    71302 
##  7 PRINCE GEOR…      48248 NA               BC     53.9 -123.   680 1096453    71302 
##  8 PRINCE GEOR…      48248 NA               BC     53.9 -123.   680 1096453    71302 
##  9 PRINCE GEOR…      48248 NA               BC     53.9 -123.   680 1096453    71302 
## 10 PRINCE GEOR…      48248 NA               BC     53.9 -123.   680 1096453    71302 
## # ... with 2,198 more rows, and 26 more variables

And plot it:

ggplot(data = kam.pg, aes(x = time, y = temp, group = station_name, colour = station_name)) +
  theme(legend.position = "top") +
  geom_line() +
  theme_minimal()

Or you can use the vector created above:

stn_vec_df <- weather_dl(station_ids = stn_vector, start = "2016-01-01", end = "2016-02-15")

stn_vec_df
## # A tibble: 3,312 x 35
##    station_name station_id station_operator prov    lat   lon  elev climate_id WMO_id
##  * <chr>        <chr>      <lgl>            <fct> <dbl> <dbl> <dbl> <chr>      <chr> 
##  1 CRESTON CAM… 6838       NA               BC     49.1 -116.  641. 114B1F0    71770 
##  2 CRESTON CAM… 6838       NA               BC     49.1 -116.  641. 114B1F0    71770 
##  3 CRESTON CAM… 6838       NA               BC     49.1 -116.  641. 114B1F0    71770 
##  4 CRESTON CAM… 6838       NA               BC     49.1 -116.  641. 114B1F0    71770 
##  5 CRESTON CAM… 6838       NA               BC     49.1 -116.  641. 114B1F0    71770 
##  6 CRESTON CAM… 6838       NA               BC     49.1 -116.  641. 114B1F0    71770 
##  7 CRESTON CAM… 6838       NA               BC     49.1 -116.  641. 114B1F0    71770 
##  8 CRESTON CAM… 6838       NA               BC     49.1 -116.  641. 114B1F0    71770 
##  9 CRESTON CAM… 6838       NA               BC     49.1 -116.  641. 114B1F0    71770 
## 10 CRESTON CAM… 6838       NA               BC     49.1 -116.  641. 114B1F0    71770 
## # ... with 3,302 more rows, and 26 more variables

For more information on the data flags, see the Flags vignette, for more information on units and terms, see the Terms and Units vignette.