Word2Vec Example Process in RapidMiner


This is an example process of how to use Word2Vec in RapidMiner with the Search Twitter operator. For more information check out this post on the community.

I'll be going over this in a bit more detail at my next live stream here.

Word2VecRapidMiner

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="8.1.001" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="social_media:search_twitter" compatibility="8.1.000" expanded="true" height="68" name="Search Twitter" width="90" x="45" y="34">
            <parameter key="connection" value="Twitter - Studio Connection"/>
            <parameter key="query" value="rapidminer"/>
            <parameter key="locale" value="en"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="8.1.001" expanded="true" height="82" name="Select Attributes" width="90" x="45" y="136">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="Text"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="nominal_to_text" compatibility="8.1.001" expanded="true" height="82" name="Nominal to Text" width="90" x="45" y="238"/>
          <operator activated="true" class="text:data_to_documents" compatibility="8.1.000" expanded="true" height="68" name="Data to Documents" width="90" x="246" y="34">
            <list key="specify_weights"/>
          </operator>
          <operator activated="true" class="loop_collection" compatibility="8.1.001" expanded="true" height="82" name="Loop Collection" width="90" x="246" y="136">
            <process expanded="true">
              <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="112" y="34"/>
              <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="581" y="34"/>
              <connect from_port="single" to_op="Transform Cases" to_port="document"/>
              <connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
              <connect from_op="Tokenize" from_port="document" to_port="output 1"/>
              <portSpacing port="source_single" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="word2vec:Word2Vec_Learner" compatibility="1.0.000" expanded="true" height="68" name="Word2Vec " width="90" x="447" y="34"/>
          <operator activated="true" class="word2vec:Get_Vocabulary" compatibility="1.0.000" expanded="true" height="82" name="Extract Vocabulary" width="90" x="581" y="34">
            <parameter key="Get Full Vocabulary" value="true"/>
            <parameter key="Take Random Words" value="false"/>
            <parameter key="Number of Words to Pull" value="10"/>
          </operator>
          <connect from_op="Search Twitter" from_port="output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
          <connect from_op="Nominal to Text" from_port="example set output" to_op="Data to Documents" to_port="example set"/>
          <connect from_op="Data to Documents" from_port="documents" to_op="Loop Collection" to_port="collection"/>
          <connect from_op="Loop Collection" from_port="output 1" to_op="Word2Vec " to_port="doc"/>
          <connect from_op="Word2Vec " from_port="mod" to_op="Extract Vocabulary" to_port="mod"/>
          <connect from_op="Extract Vocabulary" from_port="exa" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>