Merge pull request #19 from MGousseff/ForReview4

For review4 : This new version of the package and of the article address the remarks of the reviewers.
orbisgis · Jul 10, 2023 · 88951d1 · 88951d1
2 parents b3ab391 + 5ab715b
commit 88951d1
Show file tree

Hide file tree

Showing 67 changed files with 117,419 additions and 5,631 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -4,3 +4,8 @@
 ^\.Rproj\.user$
 ^LICENSE\.md$
 ^docs$
+^\\.github$
+.github$
+^CITATION\.cff$
+^\\.gitignore$
+.gitignore$
diff --git a/.Rprofile b/.Rprofile
@@ -0,0 +1 @@
+source("renv/activate.R")
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,9 +3,9 @@ Title: lczexplore
 Version: 0.0.1.0002
 Authors@R: c(
     person("Matthieu", "Gousseff", , "matthieu.gousseff@univ-ubs.fr", role = c("aut", "cre")),
-    person(, "Centre National de la Recherche Scientifique, Lab-Sticc", role = "cph")
+    person(, "Centre National de la Recherche Scientifique, Lab-Sticc", role = "cph", comment = c(ORCID = "0000-0002-7106-2677"))
   )
-Description: This lczexplore package automatize the comparison of sets of local climate zone classifications.It was developped thanks to the project PAENDORA2 (Pour la gestion du confort estival : Données, Outils et Recherche-Action) (2022 -2025), funded by ADEME.
+Description: This lczexplore package automatize the comparison of sets of local climate zone classifications. It was developed thanks to the project PAENDORA2 (Pour la gestion du confort estival : Données, Outils et Recherche-Action) (2022 -2025), funded by ADEME.
 License: LGPL (>= 3)
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
@@ -22,6 +22,7 @@ Imports: RColorBrewer,
           units,
           rlang,
           grDevices,
+          DescTools,
           methods
 Suggests: 
     tinytest,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,14 +1,14 @@
 # Generated by roxygen2: do not edit by hand
 
 export(LCZareas)
-export(LCZgroup2)
 export(areColors)
 export(compareLCZ)
 export(confidSensib)
 export(fetchLCZ)
+export(groupLCZ)
 export(importLCZgc)
-export(importLCZgen)
 export(importLCZraster)
+export(importLCZvect)
 export(importQualVar)
 export(levCol)
 export(matConfLCZ)
@@ -28,6 +28,7 @@ import(sf)
 import(tidyr)
 import(units)
 import(utils)
+importFrom(DescTools,CohenKappa)
 importFrom(forcats,fct_recode)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,geom_sf)

diff --git a/R/areColors.R b/R/areColors.R
@@ -1,4 +1,4 @@
-#' Check if a string or a vector of string define colors in R
+#' Checks if a string or a vector of strings define colors in R
 #'
 #' @param x is the input string
 #' @return a vector of booleans indicting if the elements of x define a color in R (TRUE) or don't (FALSE)

diff --git a/R/compareLCZ.R b/R/compareLCZ.R
@@ -1,60 +1,57 @@
-#' Compares two LCZ classification on the same zone,
-#' produces a map for each classification, a map of their agreement and a representation of a confusion matrix between them
+#' Compares two set of geographical classifications, especially Local Climate Zones classifications. It
+#' produces a map for each classification, a map of their agreement (and a pseudo Kappa coefficent), 
+#' and a confusion matrix between them. All are stored in a list, easily reusable. 
 #'
-#' @param sf1 is the sf object that contains the first LCZ classification
-#' @param geomID1 is the name of the column storing the ID of the geoms in sf1
+#' @param sf1 is the sf object that contains the first (LCZ) classification
 #' @param column1 is the column of sf1 that contains the LCZ classification for each geom of sf1.
+#' @param geomID1 is the name of the optionnal column storing the ID of the geoms in sf1
 #' By defautl it is set to an empty string and no ID is loaded.
-#' @param confid1 is a column that contains an indicator of confidence
-#' of the level of the LCZ in column 1, e.g. a uniqueness value, or a probability of belonging to the class...
+#' @param confid1 is an optionnal column that contains an indicator of confidence
+#' of the values in column1, e.g. a uniqueness value, or a probability of belonging to the class...
 #' By defautl it is set to an empty string and no confidence indicator is loaded.
-#' @param wf1 is the workflow used to produce the first LCZ classification.
+#' @param wf1 is the workflow used to produce the first LCZ classification. 
+#' It is used ti create titles or legends. 
 #' When GeoClimate was used with BD_TOPO V2 data as input,
 #' use "bdtopo_2_2". When GeoClimate was used with Open Street Map data as input, use "osm".
 #' When the LCZ come from the wudapt Europe tiff, use "wudapt".
-#' @param sf2 is the sf object that contains the second LCZ classification
+#' @param sf2 is the sf object that contains the second (LCZ) classification
 #' @param geomID2 is the name of the column storing the ID of the geoms in sf2
 #' @param column2 is the column of sf2 that contains the LCZ classification for each geom of sf2
 #' @param confid2 is a column that contains an indicator of confidence
-#' of the level of the LCZ in column 2, e.g. a uniqueness value, or a probability of belonging to the class...
-#' By defautl it is set to an empty string and no confidence indicator is loaded.
 #' @param wf2 is the workflow used to produce the second LCZ classification.
-#' When GeoClimate was used with BD_TOPO V2 data as input,
-#' use "bdtopo_2_2". When GeoClimate was used with Open Street Map data as input, use "osm".
-#' When the LCZ come from the wudapt Europe tiff, use "wudapt".
-#' @param ref : If the coordinate reference system (CRS) of sf1 and sf2 differ, ref indicates which CRS to choose for both files (1 or 2)
+#' @param ref : If the coordinate reference systems (CRS) of sf1 and sf2 differ, ref indicates which CRS to choose for both files (1 or 2)
 #' @param repr "standard" means that standard values of LCZ are expected,
 #'  "alter" means other values are expected, like grouped values of LCZ or other qualitative variable.
-#'  In the latter case, the ... arguments must contain the groups and a color vector.
-#' @param plot : when FALSE non of the graphics are plotted or saved
+#'  In the latter case, the ... arguments can contain the expected levels and a color vector.
+#' @param plot : when FALSE none of the graphics are plotted or saved
 #' @param saveG : when an empty character string, "", the plots are not saved. Else, the saveG string is used to produce the name of the saved png file.
 #' @param location : the name of the study area, as chosen as the name of the directory on the GeoClimate team cloud.
 #' If the area you wish to analyse is not uploaded yet, please contact the GeoClimate Team.
 #' @param exwrite : when TRUE, the values of the LCZ on the intersected geoms are written down in a csv file
 #' @param outDir : when exwrite equals TRUE, outDir is the path to the folder where one wants to write
 #' the csv file containing the values of the LCZ on the intersected geoms
-#' @param tryGroup : when TRUE, if the specified level names don't match the data, but the specified levels do,
-#' a call to the LCZgroup2 function will be tried, and if it works, the resulting grouping column will be named
-#' "grouped" and the comparison will be done using it.
-#' @param ... allow to pass arguments if repr is set to alter.
+#' @param tryGroup : when TRUE, one can group and compare on-the-fly : if the specified level names 
+#' don't match the data, but the specified levels do, a call to the groupLCZ function will be tried, 
+#' and if it works, the resulting grouping columns wille be compared
+#' @param ... allow to pass optionnal arguments if repr is set to alter.
 #' The expected arguments are the name of each level of the variables contained 
-#' in column1 and column2, and last a vector cols of the colors to use to plot them.
+#' in column1 and column2, and also a vector called colors.
 #' @importFrom ggplot2 geom_sf guides ggtitle aes
+#' @importFrom DescTools CohenKappa
 #' @import sf dplyr cowplot forcats units tidyr RColorBrewer utils grDevices rlang
-#' @return returns an object called matConfOut which contains
-#' matConfLong, a confusion matrix in a longer form, which can be written in a file by the compareLCZ function
-#' and is used by the geom_tile function of the ggplot2 package.
-#' matConfPlot is a ggplot2 object showing the confusion matrix. If plot=T, it is also directly plotted
-#' areas contains the sums of each LCZ area
+#' @return returns graphics of comparison and an object called matConfOut which contains :
+#' matConfLong, a confusion matrix in a longer form, 
+#' matConfPlot is a ggplot2 object showing the confusion matrix.
 #' percAgg is the general agreement between the two sets of LCZ, expressed as a percentage of the total area of the study zone
+#' pseudoK is a heuristic estimate of a Cohen's kappa coefficient of agreement between classifications
 #' If saveG is not an empty string, graphics are saved under "saveG.png"
 #' @export
 #' @examples
 #' comparisonBDT_OSM<-compareLCZ(sf1=redonBDT, column1="LCZ_PRIMARY", geomID1 = "ID_RSU",
 #' confid1="LCZ_UNIQUENESS_VALUE", wf1="bdtopo_2_2",
 #' sf2=redonOSM, column2="LCZ_PRIMARY", geomID2 = "ID_RSU",
 #' confid2="LCZ_UNIQUENESS_VALUE", wf2="osm",
-#' repr="standard", saveG="", exwrite=TRUE, location="Redon", plot=TRUE)
+#' repr="standard", saveG="", exwrite=FALSE, location="Redon", plot=TRUE)
 #' # To get the summed area of each LCZ levels for both dataset : 
 #' comparisonBDT_OSM$areas
 #' # The plots of each dataset can be produced with the /`showLCZ/` function.
@@ -158,13 +155,13 @@ compareLCZ<-function(sf1,geomID1="",column1,confid1="",wf1="bdtopo_2_2",
     LCZlevels<-as.character(c(1:10,101:107))
     if (prod(uniqueData1%in%LCZlevels)==0){
       line1<-"The column chosen for the first data set dosen't seem to be a standard LCZ encoding. \n"
-      line2<-"Did you import the data with importLCZgen ? \n"
+      line2<-"Did you import the data with importLCZvect ? \n"
       line3<-" If the LCZ types are not standard, you can try to set repr to alter and specify the levels. \n"
       errorMessage<-paste(line1,line2,line3)
       stop(errorMessage) }
     if (prod(uniqueData2%in%LCZlevels)==0){
       line1<-"The column chosen for the second data set dosen't seem to be a standard LCZ encoding. \n"
-      line2<-"Did you import the data with importLCZgen ? \n"
+      line2<-"Did you import the data with importLCZvect ? \n"
       line3<-" If the LCZ types are not standard, you can try to set repr to alter and specify the levels. \n"
       errorMessage<-paste(line1,line2,line3)
       stop(errorMessage) }
@@ -262,13 +259,13 @@ compareLCZ<-function(sf1,geomID1="",column1,confid1="",wf1="bdtopo_2_2",
 
     # if there are several parameters to specify grouping levels
     # and their names don't cover the values in column, and if tryGroup is TRUE
-    # then we try to call LCZgroup2 And procede to grouping accordingly
+    # then we try to call groupLCZ And procede to grouping accordingly
 
     if (tryGroup==TRUE && (length(grep("14: ",levColCase1))!=0 ||length(grep("15: ",levColCase1))!=0 )){
       message("Level names in your 1st dataset didn't match original data.
-      As tryGroup=TRUE, the function LCZgroup2 will try to create a \"grouped\" column with level names and levels specified in (...).
+      As tryGroup=TRUE, the function groupLCZ will try to create a \"grouped\" column with level names and levels specified in (...).
       If this doesn't work, compareLCZ function may fail.")
-      sfNew1<-LCZgroup2(sf1,column = column1,...)
+      sfNew1<-groupLCZ(sf1,column = column1,...)
       #sf1[column1]<-sfNew1["grouped"]
       sf1<-sfNew1 %>% mutate(!!column1:=subset(sfNew1,select="grouped",drop=TRUE))
       # print(summary(sf1))
@@ -278,9 +275,9 @@ compareLCZ<-function(sf1,geomID1="",column1,confid1="",wf1="bdtopo_2_2",
     }
 
     if (tryGroup==TRUE && (length(grep("14: ",levColCase2))!=0 ||length(grep("15: ",levColCase2))!=0 )){
-      message("As tryGroup=TRUE, the function LCZgroup2 will try to create a \"grouped\" column with level names and levels specified in (...).
+      message("As tryGroup=TRUE, the function groupLCZ will try to create a \"grouped\" column with level names and levels specified in (...).
       If this doesn't work, compareLCZ function may fail.")
-      sfNew2<-LCZgroup2(sf2,column = column2,...)
+      sfNew2<-groupLCZ(sf2,column = column2,...)
       #sf2[column2]<-sfNew2["grouped"]
       sf2<-sfNew2 %>% mutate(!!column2:=subset(sfNew2,select="grouped",drop=TRUE))
       # print(summary(sf2))
@@ -381,16 +378,32 @@ matConfOut<-matConfLCZ(sf1=sf1, column1=column1, sf2=sf2, column2=column2,
 matConfOut$data<-echIntExpo
 matConfLong<-matConfOut$matConf
 matConfLarge<-pivot_wider(matConfLong,names_from = column2,values_from = agree)
-matConfOut$matConfLarge<-matConfLarge  
+matConfLarge<-matConfLarge %>% as.data.frame()
+row.names(matConfLarge)<-matConfLarge[,1] %>% as.character
+matConfLarge<-matConfLarge[,-1]
+matConfLarge<-as.matrix(matConfLarge)  
+
+
+# Add pseudo Kappa Statistic to output to   
+PseudoWeightedCross<-matConfLarge*100
+pseudoK<-DescTools::CohenKappa(x=PseudoWeightedCross)  
+matConfOut$pseudoK<-pseudoK
 
 areas<-matConfOut$areas
 percAgg<-matConfOut$percAgg
 
+
+
+
+
+
+
+
 ################################################
 #  GRAPHICS
 ################################################
 if (plot == TRUE){
-  if (repr=='standard'){titrou<-"LCZ"} else {titrou<-"Grouped LCZs"}
+  if (repr=='standard'){titrou<-"LCZ"} else {titrou<-"Levels"}
 
   if (wf1=="bdtopo_2_2"){adtitre1<-" BDTOPO V2.2"} else
     if(wf1=="osm"){adtitre1<-" OSM "} else
@@ -405,7 +418,7 @@ if (plot == TRUE){
   titre1<-paste(titrou,"from ", adtitre1)
   titre2<-paste(titrou,"from", adtitre2)
   titre3<-"Agreement between classifications"
-  titre4<-paste(" Repartition of", adtitre1, " LCZs into LCZs of", adtitre2)
+  titre4<-paste(" Distribution of", adtitre1, " levels \n into levels of", adtitre2)
 
 
   # ypos<-if (repr=="standard"){ypos=5} else {ypos=2}

diff --git a/R/confidSensib.R b/R/confidSensib.R
@@ -1,12 +1,14 @@
-#' Computes the agreement between LCZ classification on a range of values of
-#' an indicator of confidence granted to each LCZ classification. The input file or dataset must have been produced by compareLCZ function, or at least the columns must be in the same order.
+#' Computes the agreement between geographical (LCZ) classifications on a range of values of
+#' an indicator of confidence granted to each LCZ classification. 
+#' The input file or dataset must have been produced by compareLCZ function, 
+#' or at least the columns must be in the same order.
 #'
-#' @param inputDf is an R file with geom IDs, LCZ classifications and
-#' a confidence value granted for the LCZ value of each geom. Ignored if filePath is not empty.
+#' @param inputDf is an R file with geom IDs, (LCZ) classifications and
+#' a confidence value granted for the (LCZ) classifications values of each geom. Ignored if filePath is not empty.
 #' @param filePath is the path to a csv file containing geom IDs, LCZ classifications and
 #' a confidence value granted for the LCZ value of each geom.
 #' @param nPoints is the number of points (quantiles) of confidence for which
-#' the average agreement between classifs will be computed
+#' the average agreement between classifications will be computed
 #' @param wf1 is the name of the workflow used to produce the first LCZ
 #' @param wf2 is the name of the workflow used to produce the second LCZ
 #' @param geomID1 is the name of the column that contains the geom ID associated to the first workflow
@@ -20,7 +22,7 @@
 #' @param plot if TRUE the graph is plotted
 #' @param saveG if not an empty string, specifies where to save graphs
 #' @import dplyr ggplot2
-#' @return returns an object called sortie, which contains the values of the thresholds
+#' @return returns an object called output, which contains the values of the thresholds
 #' for the confidence value and the agreement between classifications for the LCZ levels presents in the dataset
 #' @export
 #'
@@ -173,13 +175,13 @@ typeLevels<-unique(echIntConf[,column1]) %>% as.vector
  # sortieParLCZ<-aggregate(echIntConf,by=echIntConf[[column1]],internFunction2,nPoints=nPoints)
  sortieParLCZ<-lapply(echIntConfSplit,internFunction2,nPoints=nPoints)
  nivList<-names(sortieParLCZ)
- sortie<-data.frame(Confidence=numeric(0), Agreement=numeric(0), Kept=character(0),
+ output<-data.frame(Confidence=numeric(0), Agreement=numeric(0), Kept=character(0),
                     nbGeom=numeric(0), LCZ=character(0))
  for (i in names(sortieParLCZ)){
-    sortie<-rbind(sortie,cbind(sortieParLCZ[[i]],LCZ=rep(i,nrow(sortieParLCZ[[i]]))))
+    output<-rbind(output,cbind(sortieParLCZ[[i]],LCZ=rep(i,nrow(sortieParLCZ[[i]]))))
  }
 
- byLCZPLot<-ggplot(data=sortie, aes(x=Confidence, y=Agreement, color=Kept, shape=Kept))+
+ byLCZPLot<-ggplot(data=output, aes(x=Confidence, y=Agreement, color=Kept, shape=Kept))+
    labs(x="Confidence threshold", color = "Geom set", shape="Geom set")+
    scale_fill_discrete(breaks=c("confidence >= threshold","confidence < threshold"),)+
    scale_color_manual(values =
@@ -203,6 +205,6 @@ typeLevels<-unique(echIntConf[,column1]) %>% as.vector
  }
 
 
- return(sortie)
+ return(output)
 
 }
diff --git a/R/fetchLCZ.R b/R/fetchLCZ.R
@@ -1,12 +1,12 @@
-#' Fetches the data from the Geomanum cloud, for locations already uploaded there by the Geoclimate team
+#' Fetches LCZ classification data, by default from the Geomanum cloud, for locations already uploaded there by the Geoclimate team
 #'
 #' @param location is the name of the place for which you want to get the GeoClimate data
 #' @param outDir is the path to the directory in which fetchLCZ will unpack the data
 #' @param wf workflow can be bdtopo_2_2, when GeoClimate used the BDTOPO data base, or osm when it used the open street map data.
 #' @param refYear this parameters is integrated in the name given to the data, in order to allow to re-run the function according to the reference year. At the moment only 2022 data have been tried
 #' @param baseURL is the adress of the server where the function fetches the data. The data must be organised in a bdtopo_2_2 and an osm/year directories.
 #' This function is intended to be used by Paendora project members, so the default is the Geomanum Foundation cloud.
-#' @param ... allows to pass arguments from the produce analysis function to the LCZgroup2 and compareLCZ functions
+#' @param ... allows to pass arguments from the produce analysis function to the groupLCZ and compareLCZ functions
 #' @import utils methods
 
 

diff --git a/R/LCZgroup2.R → R/groupLCZ.R b/R/LCZgroup2.R → R/groupLCZ.R
@@ -1,4 +1,5 @@
-#' Allows to group local climate zones to improve the analysis.
+#' Allows to group (Local Climate Zone) geographical classification levels into broader categories 
+#' to explore classification agreements
 #'
 #' @param sf is the input files. It must be an sf file and contain an LCZ column (levels 1 to 10 and 101 to 107). It must contain the geom column.
 #' @param column is the name of the column containing the LCZ to be grouped
@@ -10,12 +11,17 @@
 #' @export
 #'
 #' @examples
-#' redonBDTgrouped<-LCZgroup2(redonBDT,column="LCZ_PRIMARY",
+#' redonBDTgrouped<-groupLCZ(redonBDT,column="LCZ_PRIMARY",
 #' urban=c("1","2","3","4","5","6","7","8","9"),
 #' industry="10", vegetation=c("101","102","103","104"),
 #' impervious="105",pervious="106",water="107",
-#' cols=c("red","black","green","grey","burlywood","blue"))
-LCZgroup2<-function(sf,column,outCol='grouped',...)
+#' colors=c("red","black","green","grey","burlywood","blue"))
+#' 
+#' showLCZ(redonBDT,column="LCZ_PRIMARY", repr="standard")
+#' showLCZ(redonBDTgrouped,column="grouped",repr="alter",
+#' LCZlevels=c("urban","industry","vegetation","impervious","pervious","water"),
+#' colors=c("red","black","green","grey","burlywood","blue"),wf="BD TOPO")
+groupLCZ<-function(sf,column,outCol='grouped',...)
 {
   #require(forcats)
   #require(dplyr)

diff --git a/R/importLCZgc.R b/R/importLCZgc.R
@@ -1,4 +1,5 @@
-#' Imports the rsu_lcz geojson file produced by GeoClimate.
+#' Imports the rsu_lcz geojson file produced by GeoClimate. 
+#' Use the more generic /`importLCZvect/` function
 #'
 #' @param dirPath : the path where the rsu_lcz.geojson file is.
 #' @param output : if sfFile the rsu_lcz.geojson is imported as an sf file, if bBox, a bounding box of the area is returned
@@ -29,11 +30,11 @@ importLCZgc<-function(dirPath,output="sfFile"){
 
 
   if(output=="sfFile"){return(sfFile)} else {
-    if(output=="bBox"){bBox=st_bbox(sfFile,crs=st_crs(sfFile)) %>% st_as_sfc
+    if(output=="bBox"){bBox<-st_bbox(sfFile,crs=st_crs(sfFile)) %>% st_as_sfc
 
     return(bBox)} else {
       if(output=="contour"){
-        fileName2<-paste0(dirPath,"zones.geojson")
+        # fileName2<-paste0(dirPath,"zones.geojson")
         sfContour<-st_read(dsn=fileName) %>% st_geometry %>% st_union()
         return(sfContour)
       }