Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions instagram_scrape.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

//returns a big old hunk of JSON from a non-private IG account page.
function scrape_insta($username) {
$insta_source = file_get_contents('http://instagram.com/'.$username);
$shards = explode('window._sharedData = ', $insta_source);
$insta_json = explode(';</script>', $shards[1]);
$insta_array = json_decode($insta_json[0], TRUE);
return $insta_array;
}

//Supply a username
$my_account = 'taylorswift';

//Do the deed
$results_array = scrape_insta($my_account);

//An example of where to go from there
$strs = [];
for ($x = 0; $x < 5; $x++ ){
$arr = [];
$tmp = $results_array['entry_data']['ProfilePage'][0]['user']['media']['nodes'][$x];
$caption = $tmp['caption'];
//date is in unix time
$date = (string) $tmp['date'];
// this is the pic you want to display
$display = $tmp['display_src'];
// might be useful?
$thumbnail = $tmp['thumbnail_src'];
$is_video = $tmp['is_video'];
if( $is_video != 1 ){
$is_video=0;
}
$is_video = (string) $is_video;
$likes = (string) $tmp['likes']['count'];
$arr = [ $caption, $date, $display, $thumbnail, $is_video, $likes ];
array_push($strs, $arr);

}
$str = json_encode( $strs );

$output = fopen("insta.out","w");
fwrite( $output, $str );

// shamelessly stolen from https://gist.github.com/cosmocatalano/4544576
?>
24 changes: 24 additions & 0 deletions parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from json import loads,dumps
from subprocess import call
import datetime

NUM_POSTS = 5

f = open("insta.out")
s = f.read()
insta = loads(s)
i = 0
posts = []
insta_url = 'https://instagram.com/taylorswift'
insta_image_url = 'static/website/images/icon/insta.png'
for (caption, unix_time, media_url, thumbnail, is_video, likes) in insta:
time = datetime.datetime.fromtimestamp( int(unix_time) ).strftime('%Y-%m-%d')
posts.append( [ caption, insta_url, time, media_url, insta_image_url, likes ] )

posts = posts[:NUM_POSTS]
sorted( posts, key=lambda x: x[2] ) #sort by unix_time
for i in range(NUM_POSTS):
#saves output file to relevant place
call( [ "wget", posts[i][3], "-O", "../src/website/static/website/images/icons/post"+str(i)+".jpg" ] )
f = open("../src/website/posts.out","w")
f.write( dumps(posts) )