woody

Global process

Prepare data

Get `W` data about wood occurrences

wpath="../data-raw/mem_data/woody_data_train/"
mystation="V2942010"
fs::dir_tree(wpath)

## ../data-raw/mem_data/woody_data_train/
## ├── event_1
## │   ├── Wood Log 20071123 09h56m.txt
## │   ├── Wood Log 20071123 10h11m.txt
## │   ├── Wood Log 20071123 10h56m.txt
## │   ├── Wood Log 20071123 11h56m.txt
## │   └── Wood Log 20071123 12h56m.txt
## └── event_2
##     ├── Wood Log 20071123 13h56m.txt
##     ├── Wood Log 20071123 14h26m.txt
##     ├── Wood Log 20071123 14h56m.txt
##     ├── Wood Log 20071123 15h56m.txt
##     └── Wood Log 20071123 16h50m.txt

library(woody)

Import all wood log files inside a file (1 directory per event)

W=import_Wdata(wpath,site="training")

Get `Q` data about discharge

Import Q:

qpath="../data-raw/Qdata/Qdatc_Ain.csv"
Q=import_Qdata(path=qpath, site="Ain")

## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.

## Rows: 86528 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr  (2): site, station
## dbl  (4): Q, T_Q, S, rT_Q
## dttm (1): Time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

The first lines of Q look like this

station	Time	Q	site
V2942010	2001-01-01 00:00:00	0.1345238	Ain
V2942010	2001-01-01 03:00:00	0.1369048	Ain
V2942010	2001-01-01 04:48:00	0.1333333	Ain
V2942010	2001-01-01 05:20:00	0.1273810	Ain
V2942010	2001-01-01 06:06:00	0.1188095	Ain
V2942010	2001-01-01 07:18:00	0.1095238	Ain

Complete `Q` into `Qc`

Now complete Q with variables T_Q and S

result_file="../data-raw/results/Q.RDS"
if(!file.exists(result_file)){
  Qc=complete_Qdata(qtvar=Q,site="Ain")
  saveRDS(Qc,result_file)
}
Qc=readRDS(result_file)

The first lines of Qc look like this:

site	station	Time	Q	T_Q	S	rT_Q
Ain	V2942010	2001-01-01 00:00:00	0.1345238	0.000	NA	0.0000000
Ain	V2942010	2001-01-01 03:00:00	0.1369048	0.125	0.0000661	0.3535534
Ain	V2942010	2001-01-01 04:48:00	0.1333333	0.000	-0.0001653	0.0000000
Ain	V2942010	2001-01-01 05:20:00	0.1273810	0.000	-0.0009301	0.0000000
Ain	V2942010	2001-01-01 06:06:00	0.1188095	0.000	-0.0009317	0.0000000
Ain	V2942010	2001-01-01 07:18:00	0.1095238	0.000	-0.0006448	0.0000000

Complete `W` with `Q` and get `Wc`

Complete W with discharge data (interpolating Q)

Wc=complete_Wdata_with_Qdata(Wdata=W,
                             Qdata=Qc)

The first lines of Wc look like this:

site	event	sitevent	Time	Length	Date	Q	T_Q	S	rT_Q
training	event_1	training_event_1	2007-11-23 08:57:01	1.41	2007-11-23	0.4970903	135.0343	0.0025741	11.62042
training	event_1	training_event_1	2007-11-23 08:57:06	0.86	2007-11-23	0.4971250	135.0347	0.0025726	11.62044
training	event_1	training_event_1	2007-11-23 08:57:14	0.55	2007-11-23	0.4971806	135.0354	0.0025702	11.62047
training	event_1	training_event_1	2007-11-23 08:57:15	0.58	2007-11-23	0.4971875	135.0355	0.0025699	11.62048
training	event_1	training_event_1	2007-11-23 08:57:25	2.47	2007-11-23	0.4972569	135.0364	0.0025668	11.62051
training	event_1	training_event_1	2007-11-23 08:57:30	3.00	2007-11-23	0.4972917	135.0368	0.0025653	11.62053

Transform `Wc` into `Wwt`

Then calculate Wwt, where 1 row= 1 waiting time between two wood occurrences.

Wwt=Wdata_as_waiting_times(Wc)

The first lines of Wwt look like this:

site	event	sitevent	Time	Length	Date	Q	T_Q	S	rT_Q	TimeLagged	W	npieces	num	Y
training	event_1	training_event_1	2007-11-23 08:57:06	0.86	2007-11-23	0.4971250	135.0347	0.0025726	11.62044	2007-11-23 08:57:01	5	1	1	6.579251
training	event_1	training_event_1	2007-11-23 08:57:14	0.55	2007-11-23	0.4971806	135.0354	0.0025702	11.62047	2007-11-23 08:57:06	8	1	1	6.109248
training	event_1	training_event_1	2007-11-23 08:57:15	0.58	2007-11-23	0.4971875	135.0355	0.0025699	11.62048	2007-11-23 08:57:14	1	1	1	8.188689
training	event_1	training_event_1	2007-11-23 08:57:25	2.47	2007-11-23	0.4972569	135.0364	0.0025668	11.62051	2007-11-23 08:57:15	10	1	1	5.886104
training	event_1	training_event_1	2007-11-23 08:57:30	3.00	2007-11-23	0.4972917	135.0368	0.0025653	11.62053	2007-11-23 08:57:25	5	1	1	6.579251
training	event_1	training_event_1	2007-11-23 08:57:32	0.82	2007-11-23	0.4973056	135.0370	0.0025647	11.62054	2007-11-23 08:57:30	2	1	1	7.495542

Run random forest

myrf <- run_rf(Wwt)
myrf

## 
## Call:
##  randomForest(formula = Y ~ ., data = Wdata_rf, ntree = 1000) 
##                Type of random forest: regression
##                      Number of trees: 1000
## No. of variables tried at each split: 1
## 
##           Mean of squared residuals: 1.265999
##                     % Var explained: -15.74

Get predictions and assess random forest performance

On the training dataset itself

Predict on training data itself

Wp=predict_rf(obj_rf=myrf,
              newdata=Wwt)

R2:

calc_rf_R2(Wp)

## # A tibble: 1 × 3
##     SCR   SCT    R2
##   <dbl> <dbl> <dbl>
## 1  798. 2273. 0.649

On another dataset

wpathpred="../data-raw/mem_data/woody_data_predict/"

Wwt_test=import_Wdata(wpathpred) %>%
  complete_Wdata_with_Qdata(Qc) %>% 
  Wdata_as_waiting_times() 

Wp_test=predict_rf(Wwt_test,myrf)
calc_rf_R2(Wp_test)

## # A tibble: 1 × 3
##     SCR   SCT    R2
##   <dbl> <dbl> <dbl>
## 1  500.  154. -2.24

14 février 2022

Global process

Prepare data

Get W data about wood occurrences

Get Q data about discharge

Complete Q into Qc

Complete W with Q and get Wc

Transform Wc into Wwt