-
Notifications
You must be signed in to change notification settings - Fork 4
/
handy-oneliners.txt
118 lines (85 loc) · 10.9 KB
/
handy-oneliners.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# Find all json data files with errors in them, and return their path, contents and activity_id
find data -name '*.json' | grep -v "data/output" | xargs -L1 grep -l '"error":'
# For OSX with gnu-grep from homebrew:
find data -name '*.json' | grep -v "data/output" | xargs -L1 ggrep -l '"error":' | xargs -L1 bash -c 'echo $0 && cat $0 && echo "$(echo $0 | grep -oP "data/gplus/activities/\K([^/]+)")"'
find data -name '*.json' | grep -v "data/output" | xargs -L1 grep -l '"error":' | xargs -L1 bash -c 'echo $0 && cat $0 && echo "$(echo $0 | grep -oP "data/gplus/activities/\K([^/]+)")"'
# Find all json data files with empty item lists in them, and return their path, contents and activity_id
#OSX:
find data -name '*.json' | grep -v "data/output" | xargs -L1 ggrep -l '"items": \[\]' | xargs -L1 bash -c 'echo $0 && cat $0 && echo "$(echo $0 | ggrep -oP "data/gplus/activities/\K([^/]+)")"'
# GNU systems:
find data -name '*.json' | grep -v "data/output" | xargs -L1 grep -l '"items": \[\]' | xargs -L1 bash -c 'echo $0 && cat $0 && echo "$(echo $0 | grep -oP "data/gplus/activities/\K([^/]+)")"'
# Combined:
#OSX:
find data -name '*.json' | grep -v "data/output" | xargs -L1 ggrep -Pl '("items": \[\]|"error":)' | xargs -L1 bash -c 'echo $0 && cat $0 && echo "$(echo $0 | ggrep -oP "data/gplus/activities/\K([^/]+)")"'
#GNU:
find data -name '*.json' | grep -v "data/output" | xargs -L1 grep -Pl '("items": \[\]|"error":)' | xargs -L1 bash -c 'echo $0 && cat $0 && echo "$(echo $0 | grep -oP "data/gplus/activities/\K([^/]+)")"'
# Retry failed/empty comments items:
#OSX:
find data -name 'comments_for_*.json' | grep -v "data/output" | xargs -L1 ggrep -Pl '("items": \[\]|"error":)'| xargs -L1 bash -c 'echo $0 && cat $0 && rm "$0" && DEBUG=1 ./bin/get_gplus_api_comments_by_gplus_activity_id.sh "$(echo $0 | ggrep -oP "data/gplus/activities/\K([^/]+)")"'
# With `hilite`:
hilite find data -name 'comments_for_*.json' | hilite grep -v "data/output" | hilite xargs -L1 ggrep -Pl '("items": \[\]|"error":)'| xargs -L1 bash -c 'echo $0 && cat $0 && rm "$0" && DEBUG=1 hilite ./bin/get_gplus_api_comments_by_gplus_activity_id.sh "$(echo $0 | ggrep -oP "data/gplus/activities/\K([^/]+)")"'
#GNU:
find data -name 'comments_for_*.json' | grep -v "data/output" | xargs -L1 grep -Pl '("items": \[\]|"error":)'| xargs -L1 bash -c 'echo $0 && cat $0 && rm "$0" && DEBUG=1 ./bin/get_gplus_api_comments_by_gplus_activity_id.sh "$(echo $0 | grep -oP "data/gplus/activities/\K([^/]+)")"'
#Find all zero-byte data files:
find data -size 0
# Export first 20 comments in a combined JSON Blogger G+ Comments export file to HTML
cat data/output/$domain.json | jq -cr '[[.blog .posts[] |select(.|keys|any(. == "activities")) | .activities[] | .object .replies | select(.|keys|any(. == "comments"))|.comments[]|.items]]|flatten[range(20)]|@base64'|gxargs -I@@ bin/generate-comment-template-from-api-comment-json-in-base64.sh templates/h-entry-p-comment-microformat.template.html templates/h-entry-author.template.html @@ > tmp-comments-body.html && bin/generate-html-template-layout.sh "default" "tmp-comments-body.html" > tmp-comments.html
# Export all comments in a combined JSON Blogger G+ Comments export file to HTML
cat data/output/$domain.json | jq -cr '[[.blog .posts[] |select(.|keys|any(. == "activities")) | .activities[] | .object .replies | select(.|keys|any(. == "comments"))|.comments[]|.items]]|flatten[]|@base64'|gxargs -I@@ bin/generate-comment-template-from-api-comment-json-in-base64.sh templates/h-entry-p-comment-microformat.template.html templates/h-entry-author.template.html @@ > tmp-comments-body.html && bin/generate-html-template-layout.sh "default" "tmp-comments-body.html" > tmp-comments.html
# Backup data dumps.
mkdir -p archived-output && find data/output -name '*.json' | gxargs -I@@ bash -c 'cp "$0" "archived-output/$(basename "$0"|ggrep -Po '\''.*(?=\.)'\'')-$(gdate +"%Y-%m-%d-%H%M").json"' "@@"
# Get permalinks from posts I've commented on:
find ./extracted/Takeout -path '*ActivityLog/Comments.json' -exec cat "{}" \; | jq -r '.items[] .commentCreatedItem .postPermalink'
# Get permalinks from PUBLIC posts I've commented on:
find ./extracted/Takeout -path '*ActivityLog/Comments.json' -exec cat "{}" \; | jq -r '.items[]| select(.visibility == "PUBLIC") .commentCreatedItem .postPermalink'|grep -v '^null$' | sort -u > tmp-public-posts-ive-commented-on.txt
# Get uids from people whose posts I've commented on ActivityLog/Comments.json
find ./extracted/Takeout -path '*ActivityLog/Comments.json' -exec cat "{}" \; | jq '.items[] .commentCreatedItem .postPermalink'|ggrep -oP 'https://plus\.google\.com/\K([^/]{1,})'| sort -u > uids-people_i_commented_on.txt
# From your extracted Takeout archive, look into the Comments.json ActivityLog file and return all Activity IDs for all PUBLIC posts you've commented on.
find ./extracted/Takeout -path '*ActivityLog/Comments.json' -exec cat "{}" \; | jq -r '.items[]| select(.visibility == "PUBLIC") .commentCreatedItem .commentActivityId' | grep -v '^null$' | ggrep -oP '^([^#]+)' | sort -u > tmp-activity-ids-from-json-for-public-posts-ive-commented-on.txt
# Permalinks to posts with comments I've +1'ed
jq -r '[.items[] |.commentPlusOneAddedItem .postPermalink] | unique | join("\n")' extracted/Takeout/Google+\ Stream/ActivityLog/+1s\ on\ comments.json > combined_activities-FiXato-page-plusones_on_comments-permalinks.txt
# Permalinks to posts I've +1'ed
jq -r '[.items[] |.postPlusOneAddedItem .postPermalink] | unique | join("\n")' extracted/Takeout/Google+\ Stream/ActivityLog/+1s\ on\ posts.json > combined_activities-FiXato-page-plusones_on_posts-permalinks.txt
# Permalinks to polls I've voted on
jq -r '[.items[] |.pollVoteAddedItem .postPermalink] | unique | join("\n")' extracted/Takeout/Google+\ Stream/ActivityLog/Poll\ Votes.json > combined_activities-FiXato-page-votes_on_polls-permalinks.txt
# Archive all the public posts you've commented on, through The Internet Archive's Way Back Machine, and extract the activity ID from it. (That extraction is not necessary though, as those IDs are already in the JSON apparently!)
hilite cat tmp-public-posts-ive-commented-on.txt | DEBUG=1 hilite xargs -L1 ./bin/archive_url.sh | xargs -L1 cat | DEBUG=1 hilite ./bin/get_gplus_api_activity_id_from_gplus_waybackmachine_dump.sh | tee -a tmp-activity-ids-from-posts-ive-commented-on.txt
# Archive all the public posts you've commented on, through The Internet Archive's Way Back Machine, and extract the comment/metadata JSON from it
hilite cat tmp-public-posts-ive-commented-on.txt | DEBUG=1 hilite xargs -L1 ./bin/archive_url.sh | xargs -L1 cat | DEBUG=1 hilite ./bin/get_gplus_api_activity_and_comments_data_from_gplus_waybackmachine_dump.sh | tee -a tmp-activity_and_comment_data-from-posts-ive-commented-on.txt
#Get all participants in a community from a F+Me export file:
cat ./f-plus-me-community-export.json | jq -r '[.accounts[0]|.communities[] .categories[] .posts[] as $post | [$post .author .id, $post .comments[] .author .id]|flatten]|flatten|unique|join("\n")'
# Extract all profile URLs from G+ Circles Takeout file
cat Takeout/Google+\ Circles/circle-name.json | jq -r '[.person[] .profileUrl] | unique|join("\n")' | sort -u > my-circle-profile-urls.txt
# Extract all profile URLs from all G+ Circles Takeout file
cat Takeout/Google+\ Circles/*.json | jq -r '[select(.person != null) .person[] .profileUrl] | unique |join("\n")' | sed '/^$/d' | sort -u > all-my-circles-profile-urls.txt
# Get unique user urls (protocols and www. subdomains stripped) from queried People API JSON.
cat ./data/gplus/users/*.json | jq -r 'select(.urls != null) | .urls[] .value' | ggrep -oP 'https?://(www.)?\K(.+)' | sort -u
# Combine posts JSON files
gfind 'Takeout/Google+ Stream/Posts' -iname '*.json' -exec cat {} + | jq -s '.' > combined_activities-FiXato-page.json
# Get all public posts and sort them by creation time, from the Combined posts JSON file
jq 'include "plexodus-tools"; . | is_public | sort_by_creation_time' combined_activities-FiXato-page.json
# Extract all activityIds from the public posts Stream JSON file.
jq -r '[.[] .activityId] | join("\n")' combined_activities-FiXato-page-public_by_creation_time.json | sort -u > combined_activities-FiXato-page-public_by_creation_time-activityIds.txt
# Request the Activity JSON files from the API for the activityIds extracted from the posts files:
cat combined_activities-FiXato-page-public_by_creation_time-activityIds.txt | gxargs -L1 bin/get_gplus_api_comments_by_gplus_activity_id.sh
cat combined_activities-FiXato-page-public_by_creation_time-activityIds.txt | gxargs -L1 bin/get_gplus_api_activity_by_gplus_activity_id.sh | gxargs -L1 bin/get_gplus_api_comments_by_gplus_activity_file.sh
# Extract all commenters ProfilePageURLs from the posts Stream JSON file.
jq -r '.[] | select(.comments | length > 0) | .comments[] | .author .profilePageUrl' combined_activities-FiXato-page.json | sort -u
# And cache them:
cat combined_activities-FiXato-page-commenters-profilePageUrls.json | gxargs -L1 bin/people_api_data_for_gplus_profile.sh
# Excluding +CustomUrlHandles, since the new People API doesn't support them anymore...
cat combined_activities-FiXato-page-commenters-profilePageUrls.json | ggrep -v 'https://plus\.google\.com/+' | gxargs -L1 bin/people_api_data_for_gplus_profile.sh
#Alternatively:
find ./extracted/Takeout/Google+\ Stream/Posts -iname '*.json' | gxargs -I @@ jq '.comments//[] | .[] .author .profilePageUrl' '@@'| sort -u | gxargs -I @@ DEBUG=1 hilite ./bin/people_api_data_for_gplus_profile.sh '@@' | tee ./data/output/result-lists/FiXato-Filip-commenters-people-json-files.txt
# +CustomURLHandles no longer need to be excluded, as I've patched my script to try to retrieve the numeric ID instead.
# And the same for commenters on your posts:
find ./extracted/Takeout/Google+\ Stream/Posts -iname '*.json' | gxargs -I @@ jq '.comments//[] | .[] .author .profilePageUrl' '@@' | DEBUG=1 gxargs -I @@ hilite ./bin/people_api_data_for_gplus_profile.sh '@@' | tee ./data/output/result-lists/commenters-people-json-files.txt
# Collect all the JSON files from the extracted Google Data Takeout Google Stream Posts archive, reverse order them to process the newest first, and generate HTML from it, storing it as "$year-$month-$day-$activityID-$titleSummary.html" into the following folder structure:
# data/output/html/exported_activities/limited/communities/$communityID-$communityName/$userID
# data/output/html/exported_activities/limited/events/$eventID-$eventName/$userID
# data/output/html/exported_activities/limited/posts
# data/output/html/exported_activities/limited/posts/$userID/circles/$circleName
# data/output/html/exported_activities/limited/posts/$userID/collections/$collectionID-$collectionName
# data/output/html/exported_activities/limited/posts/$userID/users/$userId-$userName
# data/output/html/exported_activities/public/posts/$userID/public
find "extracted/Takeout/Google+ Stream/Posts" -type f -iname '*.json' | gtac | DEBUG=1 gxargs -I @@ bin/generate_html_for_takeout_post.sh "@@"