#!/bin/bash
_name="tw2txt"
_author="mio"
_desc="download twitter user_timeline.json using twurl and convert to twtxt."
_version="0.1 (2017-03-06)"
_license="BSD-3"

twurl_src="/1.1/statuses/user_timeline.json"
input="/home/$(whoami)/twitter.json"
output="$(dirname $input)/tw2txt.txt"

convert() {
    if [ -n "$1" ]; then output="$1"; fi
    mkdir -p "$(dirname $input)"
    twurl "$twurl_src" > $input

    # Concat json, remove [] wrapper
    # Split at date start and remove date label, split at date end
    # Replace text label with placeholder
    # Remove unneeded lines, remove extra newlines
    # Replace placeholder
    tdata=$(cat $input | tr -d "[]" | \
        sed "s/{\"created_at\":\"/\n/g" | sed "s/\",\"/\n/g" | \
        sed "s/text\":\"/_TWT_/g" | \
        sed "s/.*\":\".*//g" | sed "/^$/d" | \
        sed "N;s/\n_TWT_/\t/g")
    rm -rf $input

    # Convert timestamp
    local idt
    IFS=$old_ifs
    IFS=$'\n'
    for line in $tdata; do
        idt=$(echo $line | cut -f 1)
        # Remove any extra newlines from tweet body
        # $()\t$() = [timestamp][tab][tweet]
        echo -e "$(date -d"$idt" "+%FT%T%:z")\t\
$(echo -e $line | cut -f 2 | tr -d "\n")" >> $output
    done
    IFS=$old_ifs

    # Remove escape backslashes from double quotes and urls
    sed -i 's/\\"/\"/g' $output
    sed -i "s|\\\/|/|g" $output
}

layout() {
    # Convert timestamp
    local idt is_date odt
    IFS=$old_ifs
    IFS=$'\n'
    for line in $(cat "$output"); do
        idt=$(echo $line | cut -f 1)
        # Check if valid date or newline in tweet
        # This step should be unneeded after removing \n in convert()
        # and is an extra check to avoid date conversion error
        is_date=`date -d $idt 2>: 1>:; echo $?`
        if [ ! "$is_date" = "1" ]; then
            odt=$(date -d"$idt" "+%B %d, %Y %H:%M %Z")
            echo -e "$line\n" | sed "s/$idt\t/$odt\n/"
        else
            echo -e "$line\n"
        fi
    done
    IFS=$old_ifs
}

case "$1" in
    output|-o) convert "$2";;
    parse|-p) convert "$output"; cat "$output"; rm -rf "$output";;
    help|--help) echo -e "$_name — $_desc\n\n\
Options:\n\
  output [file]\t\tOutput twtxt to file\n\
  parse\t\t\tView timeline in a parse-friendly format\n\
  --version\t\tShow the version";;
    version|--version) echo -e "$_name $_version";;
    *) convert "$output"; layout; rm -rf "$output";;
esac