#AI · R RapidMiner Word Clouds Data Visualization Tutorial · 2018-07-05 · Thomas Ott

~2 min read

There was a quesiton from the RapidMiner Community on how to make a word cloud using R and RapidMiner. It's really easy.

First you'll need to make sure you have the Execute R extension installed and configured, then you need to download the "wordcloud" and "RColorBrewer" packages from R Cran Repository.

Finally, grab this sample XML and pop it in your RapidMiner Studio. Note: I saved the image to my desktop, you'll have to repath it to where you want to save the PNG.

<?xml version="1.0" encoding="UTF-8"?><process version="8.2.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.2.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="social_media:search_twitter" compatibility="8.1.000" expanded="true" height="68" name="Search Twitter" width="90" x="45" y="34">
        <parameter key="connection" value="Twitter"/>
        <parameter key="query" value="rapidminer"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Text"/>
      </operator>
      <operator activated="true" class="nominal_to_text" compatibility="8.2.001" expanded="true" height="82" name="Nominal to Text" width="90" x="313" y="34">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Text"/>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="447" y="34">
        <parameter key="prune_method" value="percentual"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34"/>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="text:wordlist_to_data" compatibility="8.1.000" expanded="true" height="82" name="WordList to Data" width="90" x="581" y="85"/>
      <operator activated="true" class="select_attributes" compatibility="8.2.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="715" y="85">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="word|total"/>
      </operator>
      <operator activated="true" class="r_scripting:execute_r" compatibility="8.1.000" expanded="true" height="82" name="Execute R" width="90" x="849" y="85">
        <parameter key="script" value="# rm_main is a mandatory function, &#10;# the number of arguments has to be the number of input ports (can be none)&#10;rm_main = function(data)&#10;{&#10;&#9;library(&quot;wordcloud&quot;)&#10;&#9;library(&quot;RColorBrewer&quot;)&#10;&#9;&#10;&#9;png(&quot;C:\\Users\\TomOtt\\Desktop\\wordcloud.png&quot;, width=1280,height=800)&#10;&#9;wordcloud(words = data$word, freq = data$total, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35, colors=brewer.pal(8, &quot;Dark2&quot;))&#10;&#9;dev.off()&#10;&#9;&#10;&#9;return (data)&#10;}&#10;"/>
      </operator>
      <connect from_op="Search Twitter" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
      <connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
      <connect from_op="Process Documents from Data" from_port="word list" to_op="WordList to Data" to_port="word list"/>
      <connect from_op="WordList to Data" from_port="example set" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Execute R" to_port="input 1"/>
      <connect from_op="Execute R" from_port="output 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>