Word Clouds in RapidMiner and R
Tutorials RapidMiner R StatisticsThere was a question from the RapidMiner Community on how to make a word clouds in RapidMiner using R. It’s really easy.
First, you’ll need to make sure you have the Execute R extension installed and configured, then you need to download the “wordcloud” and “RColorBrewer” packages from R Cran Repository.
Finally, grab this sample XML and pop it in your RapidMiner Studio. Note: I saved the image to my desktop, you’ll have to repath it to where you want to save the PNG.
<?xml version="1.0" encoding="UTF-8"?><process version="8.2.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.2.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="social_media:search_twitter" compatibility="8.1.000" expanded="true" height="68" name="Search Twitter" width="90" x="45" y="34">
<parameter key="connection" value="Twitter"/>
<parameter key="query" value="rapidminer"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Text"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="8.2.001" expanded="true" height="82" name="Nominal to Text" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Text"/>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="447" y="34">
<parameter key="prune_method" value="percentual"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34"/>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:wordlist_to_data" compatibility="8.1.000" expanded="true" height="82" name="WordList to Data" width="90" x="581" y="85"/>
<operator activated="true" class="select_attributes" compatibility="8.2.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="715" y="85">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="word|total"/>
</operator>
<operator activated="true" class="r_scripting:execute_r" compatibility="8.1.000" expanded="true" height="82" name="Execute R" width="90" x="849" y="85">
<parameter key="script" value="# rm_main is a mandatory function, # the number of arguments has to be the number of input ports (can be none) rm_main = function(data) { 	library("wordcloud") 	library("RColorBrewer") 	 	png("C:\Users\TomOtt\Desktop\wordcloud.png", width=1280,height=800) 	wordcloud(words = data$word, freq = data$total, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35, colors=brewer.pal(8, "Dark2")) 	dev.off() 	 	return (data) } "/>
</operator>
<connect from_op="Search Twitter" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="word list" to_op="WordList to Data" to_port="word list"/>
<connect from_op="WordList to Data" from_port="example set" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Execute R" to_port="input 1"/>
<connect from_op="Execute R" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>