<a href="https://colab.research.google.com/github/zecojls/SoilSpec4GG_GoogleColab/blob/main/SoilSpec4GG_002_R.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initial setup and packages

## Installing packages
NOTE: the packages must be reinstalled every session

In [6]:
install.packages("prospectr")

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



## Loading packages

In [7]:
library(tidyverse)
library(prospectr)

“running command 'timedatectl' had status 1”
── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.4     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

prospectr version 0.2.2 -- 'flawil'

check the github repository at: http://github.com/l-ramirez-lopez/prospectr



# Loading the full OSSL dataset

Loading the full OSSL data from S3 bucket. It takes about 25 secs.

In [8]:
rm.ossl = readRDS(url("http://s3.us-east-1.wasabisys.com/soilspectroscopy/ossl_import/rm.ossl_v1.rds", "rb"))
dim(rm.ossl)

## Cheking object class and internal data

In [9]:
str(rm.ossl)

'data.frame':	152146 obs. of  2962 variables:
 $ id.layer_uuid_c                                             : chr  "bd934b2680a2478c8a8ce1913ae79172" "ff03863683a192953930bcb2a874b527" "1fe42f85d1cb818e99d190c80686063a" "8ea0ca98a9a8002ad7e713616d53ed86" ...
 $ id.layer_local_c                                            : chr  "icr006475" "icr006586" "icr007929" "icr008008" ...
 $ sample.doi_idf_c                                            : chr  "10.1016/j.geodrs.2015.06.002" "10.1016/j.geodrs.2015.06.002" "10.1016/j.geodrs.2015.06.002" "10.1016/j.geodrs.2015.06.002" ...
 $ sample.contact.name_utf8_txt                                : chr  "Keith Shepherd" "Keith Shepherd" "Keith Shepherd" "Keith Shepherd" ...
 $ sample.contact.email_ietf_email                             : chr  "afsis.info@africasoils.net" "afsis.info@africasoils.net" "afsis.info@africasoils.net" "afsis.info@africasoils.net" ...
 $ acid.tea_usda4b2_cmolkg                                     : num  NA NA NA NA NA NA 

## Checking available SSLs

In [10]:
rm.ossl %>%
  distinct(dataset.code_ascii_c)

dataset.code_ascii_c
<chr>
AFSIS1.SSL
AFSIS2.SSL
CAF.SSL
KSSL.SSL
ICRAF.ISRIC
LUCAS.SSL
NEON.SSL


## Checking the number of observations
SOC

In [17]:
rm.ossl %>%
  group_by(dataset.code_ascii_c) %>%
  filter(!(is.na(oc_usda.calc_wpct) | is.na(scan_mir.600_abs))) %>%
  summarize(n_observations = n()) %>%
  arrange(desc(n_observations))


dataset.code_ascii_c,n_observations
<chr>,<int>
KSSL.SSL,73028
ICRAF.ISRIC,3916
AFSIS2.SSL,781
LUCAS.SSL,605
NEON.SSL,251
CAF.SSL,34


pH H20

In [19]:
rm.ossl %>%
  group_by(dataset.code_ascii_c) %>%
  filter(!(is.na(ph.h2o_usda.4c1_index) | is.na(scan_mir.600_abs))) %>%
  summarize(n_observations = n()) %>%
  arrange(desc(n_observations))

dataset.code_ascii_c,n_observations
<chr>,<int>
KSSL.SSL,54790
ICRAF.ISRIC,3992
AFSIS1.SSL,1904
AFSIS2.SSL,773
LUCAS.SSL,605
CAF.SSL,534
NEON.SSL,303


Clay

In [20]:
rm.ossl %>%
  group_by(dataset.code_ascii_c) %>%
  filter(!(is.na(clay.tot_usda.3a1_wpct) | is.na(scan_mir.600_abs))) %>%
  summarize(n_observations = n()) %>%
  arrange(desc(n_observations))

dataset.code_ascii_c,n_observations
<chr>,<int>
KSSL.SSL,51349
ICRAF.ISRIC,3942
LUCAS.SSL,605
CAF.SSL,562
NEON.SSL,293


# Exploring the CAF.SSL

In [11]:
caf.ssl <- rm.ossl %>%
  filter(dataset.code_ascii_c == "CAF.SSL")
dim(caf.ssl)

The `grep` comand is used to find colums with **mir** prefix, and the first column is also selected as ID[texto do link](https:// [texto do link](https://)).

In [14]:
caf.ssl.info <- names(caf.ssl)
length(grep("mir", caf.ssl.info))

caf.ssl.mir <- caf.ssl[,c(1, grep("mir", caf.ssl.info))]
caf.ssl.mir


id.layer_local_c,mirmodel.code_any_c,scan_mir.600_abs,scan_mir.602_abs,scan_mir.604_abs,scan_mir.606_abs,scan_mir.608_abs,scan_mir.610_abs,scan_mir.612_abs,scan_mir.614_abs,⋯,scan_mir.3982_abs,scan_mir.3984_abs,scan_mir.3986_abs,scan_mir.3988_abs,scan_mir.3990_abs,scan_mir.3992_abs,scan_mir.3994_abs,scan_mir.3996_abs,scan_mir.3998_abs,scan_mir.4000_abs
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
bol_1x_mcrz_50,Bruker_Vertex_70.HTS.XT,1683,1666,1643,1632,1628,1626,1625,1622,⋯,254,254,253,253,252,251,251,252,253,253
bol_2x_mcrz_20,Bruker_Vertex_70.HTS.XT,1612,1597,1591,1589,1583,1574,1563,1550,⋯,237,237,237,236,236,236,235,235,235,235
bon_1x_mcrz_20,Bruker_Vertex_70.HTS.XT,1661,1662,1660,1647,1634,1620,1600,1580,⋯,257,257,256,256,256,255,255,255,254,254
bon_pp_mcar_50,Bruker_Vertex_70.HTS.XT,1755,1759,1754,1744,1729,1709,1689,1670,⋯,287,287,286,286,285,285,284,284,283,283
bon_2x_mcrz_20,Bruker_Vertex_70.HTS.XT,1755,1745,1745,1744,1737,1728,1723,1727,⋯,302,302,302,301,301,301,300,300,299,299
ctr_in_arar_50,Bruker_Vertex_70.HTS.XT,1773,1770,1767,1772,1781,1786,1790,1798,⋯,342,341,340,339,338,338,337,336,336,335
ctr_in_msms_20,Bruker_Vertex_70.HTS.XT,1761,1753,1757,1757,1749,1733,1713,1696,⋯,289,289,288,288,287,287,286,286,286,285
ctr_in_nbnb_20,Bruker_Vertex_70.HTS.XT,1690,1687,1692,1689,1680,1680,1689,1691,⋯,265,265,264,264,263,263,262,262,262,261
ctr_in_rzrz_50,Bruker_Vertex_70.HTS.XT,1802,1813,1815,1810,1799,1788,1791,1802,⋯,365,364,363,362,361,361,360,359,359,358
lif_s1_msms_20,Bruker_Vertex_70.HTS.XT,1735,1731,1717,1709,1694,1669,1655,1662,⋯,273,273,272,272,272,271,271,270,270,270
