diff --git a/output/easy/analysis.md b/output/easy/README.md similarity index 100% rename from output/easy/analysis.md rename to output/easy/README.md diff --git a/output/easy/analysis.html b/output/easy/analysis.html deleted file mode 100644 index ecf0bc7..0000000 --- a/output/easy/analysis.html +++ /dev/null @@ -1,699 +0,0 @@ - - - - - - - - - - - - - - - - - - - -

Easy Test Analysis

-

Introduction

-

This document demonstrates the analysis of XML data using R, focusing -on extracting information about movies from an XML string. The analysis -leverages the xml2 and stringr libraries in R -to parse and manipulate XML data.

-

Setting Up the Environment

-

XML Data

-

The XML string contains information about two movies, including their -titles, directors, release years, and genres. The structure of the XML -string is hierarchical, with each movie enclosed within -<movie> tags.

-
library(xml2) 
-library(stringr)
-
-xml_string <- c(
-  '<?xml version="1.0" encoding="UTF-8"?>',
-  '<movies>',
-  '<movie mins="126" lang="eng">',
-  '<title>Good Will Hunting</title>',
-  '<director>',
-  '<first_name>Gus</first_name>',
-  '<last_name>Van Sant</last_name>',
-  '</director>',
-  '<year>1998</year>',
-  '<genre>drama</genre>',
-  '</movie>',
-  '<movie mins="106" lang="spa">',
-  '<title>Y tu mama tambien</title>',
-  '<director>',
-  '<first_name>Alfonso</first_name>',
-  '<last_name>Cuaron</last_name>',
-  '</director>',
-  '<year>2001</year>',
-  '<genre>drama</genre>',
-  '</movie>',
-  '</movies>')
-

Parsing XML Data

-

To analyze the XML data, we first need to parse it into an R object. -The read_xml function from the xml2 library is -used for this purpose. This function converts the XML string into an XML -document object, which can then be manipulated using R.

-
doc <- read_xml(paste(xml_string, collapse = ''))
-doc
-
## {xml_document}
-## <movies>
-## [1] <movie mins="126" lang="eng">\n  <title>Good Will Hunting</title>\n  <dir ...
-## [2] <movie mins="106" lang="spa">\n  <title>Y tu mama tambien</title>\n  <dir ...
-

Extracting Movie Information

-

To extract information about a specific movie, we use the -xml_child function to select the movie by its position in -the XML document. We then use the xml_children function to -access the child nodes of the movie, such as the title, director, year, -and genre.

-
mama_tambien <- xml_child(doc, search = 2) 
-mama_tambien
-
## {xml_node}
-## <movie mins="106" lang="spa">
-## [1] <title>Y tu mama tambien</title>
-## [2] <director>\n  <first_name>Alfonso</first_name>\n  <last_name>Cuaron</last ...
-## [3] <year>2001</year>
-## [4] <genre>drama</genre>
-
xml_children(mama_tambien)
-
## {xml_nodeset (4)}
-## [1] <title>Y tu mama tambien</title>
-## [2] <director>\n  <first_name>Alfonso</first_name>\n  <last_name>Cuaron</last ...
-## [3] <year>2001</year>
-## [4] <genre>drama</genre>
-

Displaying Results

-

The xml_name function is used to display the name of the -XML node, while the xml_attrs function shows the attributes -of the node. This provides a clear overview of the movie’s -information.

-
xml_name(mama_tambien) 
-
## [1] "movie"
-
xml_attrs(mama_tambien)
-
##  mins  lang 
-## "106" "spa"
-

Extracting Director -Information

-

To extract information about a specific movie, we use the -xml_child function to select the movie by its position in -the XML document. We then use the xml_children function to -access the child nodes of the movie, such as the title, director, year, -and genre.

-
director <- xml_child(mama_tambien,"director")
-director
-
## {xml_node}
-## <director>
-## [1] <first_name>Alfonso</first_name>
-## [2] <last_name>Cuaron</last_name>
-
xml_text(director)
-
## [1] "AlfonsoCuaron"
- - - diff --git a/output/medium/analysis.md b/output/medium/README.md similarity index 100% rename from output/medium/analysis.md rename to output/medium/README.md diff --git a/output/medium/analysis.html b/output/medium/analysis.html deleted file mode 100644 index 14a2198..0000000 --- a/output/medium/analysis.html +++ /dev/null @@ -1,758 +0,0 @@ - - - - - - - - - - - - - - - - - - - -

Medium Test Analysis

-

Introduction

-

This document demonstrates the analysis of XML data using R, focusing -on extracting information about movies from an XML string. The analysis -leverages the XML library in R to parse and manipulate XML -data.

-

Setting Up the Environment

-

XML Data

-

The XML string contains information about two movies, including their -titles, directors, release years, and genres. The structure of the XML -string is hierarchical, with each movie enclosed within -<movie> tags.

-
library(XML)
-xml_content <- c(
-    '<?xml version="1.0" encoding="UTF-8"?>',
-    "<movies>",
-    '<movie mins="126" lang="eng">',
-    "<title>Good Will Hunting</title>",
-    "<director>",
-    "<first_name>Gus</first_name>",
-    "<last_name>Van Sant</last_name>",
-    "</director>",
-    "<year>1998</year>",
-    "<genre>drama</genre>",
-    "</movie>",
-    '<movie mins="106" lang="spa">',
-    "<title>Y tu mama tambien</title>",
-    "<director>",
-    "<first_name>Alfonso</first_name>",
-    "<last_name>Cuaron</last_name>",
-    "</director>",
-    "<year>2001</year>",
-    "<genre>drama</genre>",
-    "</movie>",
-    "</movies>"
-)
-

Parsing XML Data

-

To analyze the XML data, we first need to parse it into an R object. -The xmlTreeParse function from the XML library -is used for this purpose. This function converts the XML string into an -XML document object, which can then be manipulated using R.

-
xml_doc <- xmlTreeParse(paste(xml_content, collapse = ""), useInternalNodes = TRUE)
-print(xml_doc)
-
## <?xml version="1.0" encoding="UTF-8"?>
-## <movies>
-##   <movie mins="126" lang="eng">
-##     <title>Good Will Hunting</title>
-##     <director>
-##       <first_name>Gus</first_name>
-##       <last_name>Van Sant</last_name>
-##     </director>
-##     <year>1998</year>
-##     <genre>drama</genre>
-##   </movie>
-##   <movie mins="106" lang="spa">
-##     <title>Y tu mama tambien</title>
-##     <director>
-##       <first_name>Alfonso</first_name>
-##       <last_name>Cuaron</last_name>
-##     </director>
-##     <year>2001</year>
-##     <genre>drama</genre>
-##   </movie>
-## </movies>
-## 
-

Extracting Movie Information

-

To extract information about the movies, we use the -xmlRoot function to access the root node of the XML -document. We then iterate over the child nodes of the root node, which -represent the movies, and extract their information.

-
movies_node <- xmlRoot(xml_doc)
-
-cat("Root Node Name:", xmlName(movies_node), "\n")
-
## Root Node Name: movies
-
root_attrs <- xmlAttrs(movies_node)
-
-cat("Root Node Attributes:", "\n")
-
## Root Node Attributes:
-
print(root_attrs)
-
## NULL
-
movie_nodes <- xmlChildren(movies_node)
-

Iterate -through each Movie child node and display Information

-
for (i in seq_along(movie_nodes)) {
-    movie_node <- movie_nodes[[i]]
-
-    cat("Movie Node", i, "Name:", xmlName(movie_node), "\n")
-
-    movie_attrs <- xmlAttrs(movie_node)
-
-    cat("Movie Node", i, "Attributes:", "\n")
-
-    print(movie_attrs)
-
-    movie_children <- xmlChildren(movie_node)
-
-    for (j in seq_along(movie_children)) {
-        child_node <- movie_children[[j]]
-
-        cat("Child Node", j, "Name:", xmlName(child_node), "\n")
-
-        cat("Child Node", j, "Content:", xmlValue(child_node), "\n")
-
-        child_attrs <- xmlAttrs(child_node)
-
-        cat("Child Node", j, "Attributes:", "\n")
-        print(child_attrs)
-    }
-
-    cat("\n")
-}
-
## Movie Node 1 Name: movie 
-## Movie Node 1 Attributes: 
-##  mins  lang 
-## "126" "eng" 
-## Child Node 1 Name: title 
-## Child Node 1 Content: Good Will Hunting 
-## Child Node 1 Attributes: 
-## NULL
-## Child Node 2 Name: director 
-## Child Node 2 Content: GusVan Sant 
-## Child Node 2 Attributes: 
-## NULL
-## Child Node 3 Name: year 
-## Child Node 3 Content: 1998 
-## Child Node 3 Attributes: 
-## NULL
-## Child Node 4 Name: genre 
-## Child Node 4 Content: drama 
-## Child Node 4 Attributes: 
-## NULL
-## 
-## Movie Node 2 Name: movie 
-## Movie Node 2 Attributes: 
-##  mins  lang 
-## "106" "spa" 
-## Child Node 1 Name: title 
-## Child Node 1 Content: Y tu mama tambien 
-## Child Node 1 Attributes: 
-## NULL
-## Child Node 2 Name: director 
-## Child Node 2 Content: AlfonsoCuaron 
-## Child Node 2 Attributes: 
-## NULL
-## Child Node 3 Name: year 
-## Child Node 3 Content: 2001 
-## Child Node 3 Attributes: 
-## NULL
-## Child Node 4 Name: genre 
-## Child Node 4 Content: drama 
-## Child Node 4 Attributes: 
-## NULL
- - -