forked from mio/scripts
79 lines
2.3 KiB
Plaintext
79 lines
2.3 KiB
Plaintext
|
#!/bin/bash
|
||
|
_name="tw2txt"
|
||
|
_author="mio"
|
||
|
_desc="download twitter user_timeline.json using twurl and convert to twtxt."
|
||
|
_version="0.1 (2017-03-06)"
|
||
|
_license="BSD-3"
|
||
|
|
||
|
twurl_src="/1.1/statuses/user_timeline.json"
|
||
|
input="/home/$(whoami)/twitter.json"
|
||
|
output="$(dirname $input)/tw2txt.txt"
|
||
|
|
||
|
convert() {
|
||
|
if [ -n "$1" ]; then output="$1"; fi
|
||
|
mkdir -p "$(dirname $input)"
|
||
|
twurl "$twurl_src" > $input
|
||
|
|
||
|
# Concat json, remove [] wrapper
|
||
|
# Split at date start and remove date label, split at date end
|
||
|
# Replace text label with placeholder
|
||
|
# Remove unneeded lines, remove extra newlines
|
||
|
# Replace placeholder
|
||
|
tdata=$(cat $input | tr -d "[]" | \
|
||
|
sed "s/{\"created_at\":\"/\n/g" | sed "s/\",\"/\n/g" | \
|
||
|
sed "s/text\":\"/_TWT_/g" | \
|
||
|
sed "s/.*\":\".*//g" | sed "/^$/d" | \
|
||
|
sed "N;s/\n_TWT_/\t/g")
|
||
|
rm -rf $input
|
||
|
|
||
|
# Convert timestamp
|
||
|
local idt
|
||
|
IFS=$old_ifs
|
||
|
IFS=$'\n'
|
||
|
for line in $tdata; do
|
||
|
idt=$(echo $line | cut -f 1)
|
||
|
# Remove any extra newlines from tweet body
|
||
|
# $()\t$() = [timestamp][tab][tweet]
|
||
|
echo -e "$(date -d"$idt" "+%FT%T%:z")\t\
|
||
|
$(echo -e $line | cut -f 2 | tr -d "\n")" >> $output
|
||
|
done
|
||
|
IFS=$old_ifs
|
||
|
|
||
|
# Remove escape backslashes from double quotes and urls
|
||
|
sed -i 's/\\"/\"/g' $output
|
||
|
sed -i "s|\\\/|/|g" $output
|
||
|
}
|
||
|
|
||
|
layout() {
|
||
|
# Convert timestamp
|
||
|
local idt is_date odt
|
||
|
IFS=$old_ifs
|
||
|
IFS=$'\n'
|
||
|
for line in $(cat "$output"); do
|
||
|
idt=$(echo $line | cut -f 1)
|
||
|
# Check if valid date or newline in tweet
|
||
|
# This step should be unneeded after removing \n in convert()
|
||
|
# and is an extra check to avoid date conversion error
|
||
|
is_date=`date -d $idt 2>: 1>:; echo $?`
|
||
|
if [ ! "$is_date" = "1" ]; then
|
||
|
odt=$(date -d"$idt" "+%B %d, %Y %H:%M %Z")
|
||
|
echo -e "$line\n" | sed "s/$idt\t/$odt\n/"
|
||
|
else
|
||
|
echo -e "$line\n"
|
||
|
fi
|
||
|
done
|
||
|
IFS=$old_ifs
|
||
|
}
|
||
|
|
||
|
case "$1" in
|
||
|
output|-o) convert "$2";;
|
||
|
parse|-p) convert "$output"; cat "$output"; rm -rf "$output";;
|
||
|
help|--help) echo -e "$_name — $_desc\n\n\
|
||
|
Options:\n\
|
||
|
output [file]\t\tOutput twtxt to file\n\
|
||
|
parse\t\t\tView timeline in a parse-friendly format\n\
|
||
|
--version\t\tShow the version";;
|
||
|
version|--version) echo -e "$_name $_version";;
|
||
|
*) convert "$output"; layout; rm -rf "$output";;
|
||
|
esac
|