scripts/twtxt/tw2txt

79 lines
2.3 KiB
Bash
Executable File

#!/bin/bash
_name="tw2txt"
_author="mio"
_desc="download twitter user_timeline.json using twurl and convert to twtxt."
_version="0.1 (2017-03-06)"
_license="BSD-3"
twurl_src="/1.1/statuses/user_timeline.json"
input="/home/$(whoami)/twitter.json"
output="$(dirname $input)/tw2txt.txt"
convert() {
if [ -n "$1" ]; then output="$1"; fi
mkdir -p "$(dirname $input)"
twurl "$twurl_src" > $input
# Concat json, remove [] wrapper
# Split at date start and remove date label, split at date end
# Replace text label with placeholder
# Remove unneeded lines, remove extra newlines
# Replace placeholder
tdata=$(cat $input | tr -d "[]" | \
sed "s/{\"created_at\":\"/\n/g" | sed "s/\",\"/\n/g" | \
sed "s/text\":\"/_TWT_/g" | \
sed "s/.*\":\".*//g" | sed "/^$/d" | \
sed "N;s/\n_TWT_/\t/g")
rm -rf $input
# Convert timestamp
local idt
IFS=$old_ifs
IFS=$'\n'
for line in $tdata; do
idt=$(echo $line | cut -f 1)
# Remove any extra newlines from tweet body
# $()\t$() = [timestamp][tab][tweet]
echo -e "$(date -d"$idt" "+%FT%T%:z")\t\
$(echo -e $line | cut -f 2 | tr -d "\n")" >> $output
done
IFS=$old_ifs
# Remove escape backslashes from double quotes and urls
sed -i 's/\\"/\"/g' $output
sed -i "s|\\\/|/|g" $output
}
layout() {
# Convert timestamp
local idt is_date odt
IFS=$old_ifs
IFS=$'\n'
for line in $(cat "$output"); do
idt=$(echo $line | cut -f 1)
# Check if valid date or newline in tweet
# This step should be unneeded after removing \n in convert()
# and is an extra check to avoid date conversion error
is_date=`date -d $idt 2>: 1>:; echo $?`
if [ ! "$is_date" = "1" ]; then
odt=$(date -d"$idt" "+%B %d, %Y %H:%M %Z")
echo -e "$line\n" | sed "s/$idt\t/$odt\n/"
else
echo -e "$line\n"
fi
done
IFS=$old_ifs
}
case "$1" in
output|-o) convert "$2";;
parse|-p) convert "$output"; cat "$output"; rm -rf "$output";;
help|--help) echo -e "$_name$_desc\n\n\
Options:\n\
output [file]\t\tOutput twtxt to file\n\
parse\t\t\tView timeline in a parse-friendly format\n\
--version\t\tShow the version";;
version|--version) echo -e "$_name $_version";;
*) convert "$output"; layout; rm -rf "$output";;
esac