Neural Market Trends |||

Extract Ernst Hemingway Quotes from Goodreads

Here’s a fast and simple process to extract Ernst Hemingway Quotes from Goodreads. The process is not done, I still need to loop over each quote and add 1 day to the %{now} macro. The goal is to then write them in markdown with %{now}+1 day and auto schedule them on my other website (thomasott.io).

Right now the Goodreads.com web structure is easy to extract but I suspect they’ll make it harder one day.

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="8.1.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
            <list key="attribute_values">
              <parameter key="get_date" value="date_now()"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="date_to_nominal" compatibility="8.1.000" expanded="true" height="82" name="Date to Nominal" width="90" x="179" y="34">
            <parameter key="attribute_name" value="get_date"/>
            <parameter key="date_format" value="yyyy-MM-dd"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="8.1.000" expanded="true" height="68" name="Extract Macro" width="90" x="313" y="34">
            <parameter key="macro" value="now"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="attribute_name" value="get_date"/>
            <parameter key="example_index" value="1"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page" width="90" x="447" y="34">
            <parameter key="url" value="https://www.goodreads.com/work/quotes/2459084-a-moveable-feast"/>
            <list key="query_parameters"/>
            <list key="request_properties"/>
            <description align="center" color="transparent" colored="false" width="126">Read Moveable Feast Quotes</description>
          </operator>
          <operator activated="true" class="text:cut_document" compatibility="8.1.000" expanded="true" height="68" name="Cut Document" width="90" x="581" y="34">
            <list key="string_machting_queries">
              <parameter key="MoveableFeastQuotes" value="<div class=&quot;quoteText&quot;>.</div>"/>
            </list>
            <list key="regular_expression_queries"/>
            <list key="regular_region_queries"/>
            <list key="xpath_queries">
              <parameter key="<div class =&quot;quoteText&quot;>" value="</div>"/>
            </list>
            <list key="namespaces"/>
            <list key="index_queries"/>
            <list key="jsonpath_queries"/>
            <process expanded="true">
              <operator activated="true" class="web:extract_html_text_content" compatibility="7.3.000" expanded="true" height="68" name="Extract Content" width="90" x="45" y="34"/>
              <operator activated="true" class="text:extract_information" compatibility="8.1.000" expanded="true" height="68" name="Extract Information" width="90" x="179" y="34">
                <parameter key="query_type" value="Regular Expression"/>
                <list key="string_machting_queries">
                  <parameter key="QuoteText" value="&quot;.&quot;"/>
                </list>
                <list key="regular_expression_queries">
                  <parameter key="QuoteText" value="\&quot;.*\&quot;"/>
                </list>
                <list key="regular_region_queries"/>
                <list key="xpath_queries"/>
                <list key="namespaces"/>
                <list key="index_queries"/>
                <list key="jsonpath_queries"/>
              </operator>
              <operator activated="true" class="text:documents_to_data" compatibility="8.1.000" expanded="true" height="82" name="Documents to Data" width="90" x="313" y="34">
                <parameter key="text_attribute" value="ExtractedText"/>
              </operator>
              <operator activated="true" class="extract_macro" compatibility="8.1.000" expanded="true" height="68" name="Extract Quote" width="90" x="447" y="34">
                <parameter key="macro" value="Quote"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="attribute_name" value="ExtractedText"/>
                <parameter key="example_index" value="1"/>
                <list key="additional_macros"/>
              </operator>
              <operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="68" name="Create Document" width="90" x="648" y="34">
                <parameter key="text" value="Title: Hemingway Quote for %{now}&#10;Date: %{now}&#10;&#10;#Hemingway Quote for %{now}&#10;&#10;%{Quote}"/>
              </operator>
              <connect from_port="segment" to_op="Extract Content" to_port="document"/>
              <connect from_op="Extract Content" from_port="document" to_op="Extract Information" to_port="document"/>
              <connect from_op="Extract Information" from_port="document" to_op="Documents to Data" to_port="documents 1"/>
              <connect from_op="Documents to Data" from_port="example set" to_op="Extract Quote" to_port="example set"/>
              <connect from_op="Create Document" from_port="output" to_port="document 1"/>
              <portSpacing port="source_segment" spacing="0"/>
              <portSpacing port="sink_document 1" spacing="0"/>
              <portSpacing port="sink_document 2" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Date to Nominal" to_port="example set input"/>
          <connect from_op="Date to Nominal" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Get Page" from_port="output" to_op="Cut Document" to_port="document"/>
          <connect from_op="Cut Document" from_port="documents" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
Up next Predicting Historical Volatility for the S&P500 Predicting Historical Volatility is easy with RapidMiner. The attached process uses RapidMiner to recreate a research paper Options trading driven How StockTwits Uses Machine Learning Fascinating behind the scenes interview of StockTwit’s Senior Data Scientist Garrett Hoffman. He shares great tidbits on how StockTwits uses machine
Latest posts Phone Addiction Version 12 Launches Today! Machine Learning Making Pesto Tastier 5 Dangerous Things You Should Let Your Kids Do The Pyschology of Writing TensorFlow and High Level APIs Driving Marketing Performance with H2O Driverless AI Machine Learning and Data Munging in H2O Driverless AI with datatable Making AI Happen Without Getting Fired Latest Musings from a Traveling Sales Engineer The Night before H2O World 2019 Why Forex Trading is Frustrating Functional Programming in Python Automatic Feature Engineering with Driverless AI Ray Dalio's Pure Alpha Fund What's new in Driverless AI? Latest Writings Elsewhere - December 2018 House Buying Guide for Millennials Changing Pinboard Tags with Python Automate Feed Extraction and Posting it to Twitter Flux: A Machine Learning Framework for Julia Getting Started in Data Science Part 2 Makers vs Takers How Passive Investing Saved My Life Startups and Open Source The Process of Writing H2O AI World 2018 in London Ray Dalio's Pure Alpha Fund Isolation Forests in H2O.ai Living the Dream? Humility and Equanimity in Sales