An R package for building Google's Data Sets Publication Language (DSPL) metadata files used in Public Data Explorer.
Features:
-
Reads tab, csv, xls and xlsx from a folder.
-
Identifies data types and distinguishes between dimensional and metric concepts.
-
Identifies dimensional data tabs.
-
Auto generates conceps id.
-
Auto data sorting on dimensional (no time) concepts.
-
Prints XML and csv files to upload to Public Data Explorer.
-
Some bug trackers before final printing XML.
-
Builds ZIP file containing CSV and XML files.
So you don't need to mess with the XML coding at all!
library(googlePublicData)
# This path has some csv files that we will use
data.path <-try(paste(.libPaths()[1],'/googlePublicData/data',sep=''), silent=T)
data.path
## [1] "/home/george/R/x86_64-pc-linux-gnu-library/3.4/googlePublicData/data"
# The dspl function looks for csv files in that paths, and analyzes them
mydspl <- dspl(path=data.path, sep=";")
## 6 files found...
## /home/george/R/x86_64-pc-linux-gnu-library/3.4/googlePublicData/data/countries.csv analyzed correctly
## /home/george/R/x86_64-pc-linux-gnu-library/3.4/googlePublicData/data/country_slice.csv analyzed correctly
## /home/george/R/x86_64-pc-linux-gnu-library/3.4/googlePublicData/data/gender_country_slice.csv analyzed correctly
## /home/george/R/x86_64-pc-linux-gnu-library/3.4/googlePublicData/data/genders.csv analyzed correctly
## /home/george/R/x86_64-pc-linux-gnu-library/3.4/googlePublicData/data/states.csv analyzed correctly
## /home/george/R/x86_64-pc-linux-gnu-library/3.4/googlePublicData/data/state_slice.csv analyzed correctly
# If we wanted to write the zip file... ready to be uploaded to
# http://publicdata.google.com
# dspl(path=data.path, sep=";", output= "mydspl.zip")
# Printing the data
mydspl
## <?xml version="1.0" encoding="UTF-8"?>
## <dspl xmlns="http://schemas.google.com/dspl/2010" xmlns:quantity="http://www.google.com/publicdata/dataset/google/quantity" xmlns:entity="http://www.google.com/publicdata/dataset/google/entity" xmlns:geo="http://www.google.com/publicdata/dataset/google/geo" xmlns:time="http://www.google.com/publicdata/dataset/google/time" xmlns:unit="http://www.google.com/publicdata/dataset/google/unit" targetNamespace="">
## <!--Concepts imports-->
## <import namespace="http://www.google.com/publicdata/dataset/google/quantity"/>
## <import namespace="http://www.google.com/publicdata/dataset/google/entity"/>
## <import namespace="http://www.google.com/publicdata/dataset/google/geo"/>
## <import namespace="http://www.google.com/publicdata/dataset/google/time"/>
## <import namespace="http://www.google.com/publicdata/dataset/google/unit"/>
## <!--Info lines-->
## <info>
## <name>
## <value xml:lang="es">No name</value>
## </name>
## <description>
## <value xml:lang="es">No description</value>
## </description>
## </info>
## <!--Data Provider-->
## <provider>
## <name>
## <value xml:lang="es">No provider</value>
## </name>
## </provider>
## <!--Concepts Definitions-->
## <concepts>
## <concept id="country" extends="geo:location">
## <info>
## <name>
## <value xml:lang="es">Country</value>
## </name>
## </info>
## <type ref="string"/>
## <table ref="countries_table"/>
## </concept>
## <concept id="population">
## <info>
## <name>
## <value xml:lang="es">Population</value>
## </name>
## </info>
## <type ref="integer"/>
## </concept>
## <concept id="gender" extends="entity:entity">
## <info>
## <name>
## <value xml:lang="es">Gender</value>
## </name>
## </info>
## <type ref="string"/>
## <table ref="genders_table"/>
## </concept>
## <concept id="state" extends="geo:location">
## <info>
## <name>
## <value xml:lang="es">State</value>
## </name>
## </info>
## <type ref="string"/>
## <table ref="states_table"/>
## </concept>
## <concept id="unemployment_rate">
## <info>
## <name>
## <value xml:lang="es">Unemployment Rate</value>
## </name>
## </info>
## <type ref="float"/>
## </concept>
## </concepts>
## <!--Slices Definitions-->
## <slices>
## <slice id="country_slice_slice">
## <dimension concept="country"/>
## <dimension concept="time:year"/>
## <metric concept="population"/>
## <table ref="country_slice_table"/>
## </slice>
## <slice id="gender_country_slice_slice">
## <dimension concept="country"/>
## <dimension concept="gender"/>
## <dimension concept="time:year"/>
## <metric concept="population"/>
## <table ref="gender_country_slice_table"/>
## </slice>
## <slice id="state_slice_slice">
## <dimension concept="state"/>
## <dimension concept="time:year"/>
## <metric concept="population"/>
## <metric concept="unemployment_rate"/>
## <table ref="state_slice_table"/>
## </slice>
## </slices>
## <!--Tables Definitios-->
## <tables>
## <table id="countries_table">
## <column id="country" type="string"/>
## <column id="name" type="string"/>
## <column id="latitude" type="float"/>
## <column id="longitude" type="float"/>
## <data>
## <file format="csv" encoding="utf8">countries.csv</file>
## </data>
## </table>
## <table id="country_slice_table">
## <column id="country" type="string"/>
## <column id="year" type="date" format="yyyy"/>
## <column id="population" type="integer"/>
## <data>
## <file format="csv" encoding="utf8">country_slice.csv</file>
## </data>
## </table>
## <table id="gender_country_slice_table">
## <column id="country" type="string"/>
## <column id="gender" type="string"/>
## <column id="year" type="date" format="yyyy"/>
## <column id="population" type="integer"/>
## <data>
## <file format="csv" encoding="utf8">gender_country_slice.csv</file>
## </data>
## </table>
## <table id="genders_table">
## <column id="gender" type="string"/>
## <column id="name" type="string"/>
## <data>
## <file format="csv" encoding="utf8">genders.csv</file>
## </data>
## </table>
## <table id="states_table">
## <column id="state" type="string"/>
## <column id="name" type="string"/>
## <column id="latitude" type="float"/>
## <column id="longitude" type="float"/>
## <data>
## <file format="csv" encoding="utf8">states.csv</file>
## </data>
## </table>
## <table id="state_slice_table">
## <column id="state" type="string"/>
## <column id="year" type="date" format="yyyy"/>
## <column id="population" type="integer"/>
## <column id="unemployment_rate" type="float"/>
## <data>
## <file format="csv" encoding="utf8">state_slice.csv</file>
## </data>
## </table>
## </tables>
## </dspl>
# Summary of the dspl class object
summary(mydspl)
## Attributes
## $names
## [1] "dspl" "concepts.by.table" "dimtabs"
## [4] "slices" "concepts" "dimentions"
## [7] "statistics"
##
## $class
## [1] "dspl"
##
## Dataset contents
## $dimtabs
## [1] "countries" "genders" "states"
##
## $slices
## [1] "countries" "country_slice" "gender_country_slice"
## [4] "genders" "states" "state_slice"
##
## $concepts
## [1] "Country" "name" "latitude"
## [4] "longitude" "Year" "Population"
## [7] "Gender" "State" "Unemployment Rate"
##
## $dimentions
## label
## 1 Country
## 12 Gender
## 14 State
##
## $statistics
## slices concepts dimentions
## [1,] 6 9 3