Skip to content

Commit

Permalink
adds new crawling mode for Radio FM4. This mode is highly specific fo…
Browse files Browse the repository at this point in the history
…r their API
  • Loading branch information
crohrer committed May 1, 2017
1 parent 51539fb commit 43f112f
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 59 deletions.
3 changes: 2 additions & 1 deletion main.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ start();
function start(){
spotifyPlaylist.getAllTracks()
.then(radioCrawler.getTracks)
.then(radioTracks => spotifySearch.searchTracks(radioTracks))
.then(radioTracks => radioCrawler.cleanTracks(radioTracks))
.then(cleanedTracks => spotifySearch.searchTracks(cleanedTracks))
.then(newTracks => spotifyPlaylist.addTracks(newTracks))
.then(process.exit);
}
Expand Down
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
{
"name": "spotifyRadioPlaylist",
"version": "2.3.0",
"version": "2.3.1",
"dependencies": {
"bluebird": "^3.5.0",
"cheerio": "^0.19.0"
},
"scripts": {
"start": "node ./main.js"
}
}
143 changes: 120 additions & 23 deletions radioCrawler.js
Original file line number Diff line number Diff line change
@@ -1,22 +1,39 @@
/**
* Created by chris on 06.01.16.
*/

"use strict";
var http = require('http');
var https = require('https');
var Promise = require('bluebird');
var fs = require('fs');
var cheerio = require('cheerio');
var logger = require('./logger');
var config = JSON.parse(fs.readFileSync('config.json', 'utf8'));

String.prototype.trimEx = function() {return this.trim().replace(/^\s?-\s/, '').toUpperCase()}; // we compare our strings later in uppercase
String.prototype.isEmpty = function() {return (!this || !this.length)};

/**
* getTracks
* @param {string} [trackserviceUrl]
* @returns {Promise}
*/
function getTracks(trackserviceUrl){
var url = trackserviceUrl || config.radioTrackserviceUrl;
if(config.fm4Api){
return getFm4Broadcasts(url)
.then(broadcasts => broadcasts.map(broadcast => getFm4BroadcastTracks(broadcast)))
.then(AllBroadcastsWithTracks => Promise.all(AllBroadcastsWithTracks))
.then(broadcasts => {
var tracks = [];
broadcasts.forEach(broadcast => {
broadcast.forEach(track => tracks.push(track));
});
return tracks;
});
}

return new Promise((resolve, reject) => {
var url = trackserviceUrl || config.radioTrackserviceUrl;
console.log('getting tracks from radio trackservice');
var trackserviceReq = http.request(url, function(res) {
var html = '';
Expand Down Expand Up @@ -60,28 +77,13 @@ function getTracks(trackserviceUrl){
$artist = $entry.find(config.radioArtistSelector);
}

String.prototype.trimEx = function() { return this.trim().replace(/^\s?-\s/, '').toUpperCase(); } // we compare our strings later in uppercase
title = $title.text().trimEx();
artist = $artist.text().trimEx();

String.prototype.isEmpty = function() { return (!this || !this.length); }
if (title.isEmpty() || artist.isEmpty()){
return;
}
title = $title.text();
artist = $artist.text();

// check for duplicates
tracks.forEach(function(track){
if(track.artist + '-' + track.title === artist + '-' + title){
isUnique = false;
}
tracks.push({
title: title,
artist: artist
});

if(isUnique){
tracks.push({
title: title,
artist: artist
});
}
});

if(tracks.length === 0){
Expand All @@ -103,6 +105,101 @@ function getTracks(trackserviceUrl){
});
}

function getFm4Broadcasts(broadcastsUrl){
return new Promise((resolve, reject) => {
https.get(broadcastsUrl, (res) => {
if(res.statusCode !== 200){
console.log(res.statusCode);
reject();
}

let rawData = '';
res.on('data', (chunk) => { rawData += chunk; });
res.on('end', () => {
try {
var days = JSON.parse(rawData);
var allBroadcasts = [];
days.map(day => day.broadcasts).map(broadcasts => {
broadcasts.map(broadcast => allBroadcasts.push(broadcast))
});
resolve(allBroadcasts);
} catch (e) {
console.error(e.message);
}
});
}).on('error', function(err) {
console.error(err);
});
});
}

function getFm4BroadcastTracks(broadcast){
return new Promise((resolve, reject) => {
https.get(broadcast.href, (res) => {
if(res.statusCode !== 200){
console.log(res.statusCode);
reject();
}

let rawData = '';
res.on('data', (chunk) => { rawData += chunk; });
res.on('end', () => {
try {
var data = JSON.parse(rawData);
var tracks = data.items
.map(broadcastItem => {
return {
title: broadcastItem.title,
artist: broadcastItem.interpreter
}
});
resolve(tracks);
} catch (e) {
console.error(e.message);
}
});
}).on('error', function(err) {
console.error(err);
});
});
}

/**
* Deduplicates Tracks and removes empty entries
* @param {Array} tracks
* @return {Array}
*/
function cleanTracks(tracks){
return new Promise((resolve) => {
var cleanedTracks = [];

tracks
.filter(track => track.artist && track.title)
.forEach((track) => {
var isUnique = true;
var artist = track.artist.trimEx();
var title = track.title.trimEx();

// check for duplicates
cleanedTracks.forEach(function(cleanTrack){
if(cleanTrack.artist + '-' + cleanTrack.title === artist + '-' + title){
isUnique = false;
}
});

if(isUnique){
cleanedTracks.push({
artist: artist,
title: title
});
}
});

resolve(cleanedTracks);
});
}

module.exports = {
getTracks: getTracks
getTracks: getTracks,
cleanTracks: cleanTracks
};
2 changes: 1 addition & 1 deletion spotifyOAuth.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ var spotifyOAuth = {
};

function handleRequest(request, response){
data = '';
var data = '';
request.on('data', function (chunk) {
data += chunk;
});
Expand Down
64 changes: 31 additions & 33 deletions spotifyPlaylist.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ function getTracks(offset){
*/
function addTracks(results){
var accessToken = spotifyOAuth.getAccessToken(),
addRequest,
LIMIT = 40; // limit how many tracks will be added in one request

if(accessToken === false){
Expand All @@ -109,42 +108,41 @@ function addTracks(results){
var requests = results
// we can only add max 40 tracks at once. So we split all results in chunks of 40 tracks
.map((item, i) => (i % LIMIT === 0) ? results.slice(i, i + LIMIT) : null)
.filter(item => item)
// join all 40 track ids for the query string
.map(group => group.join())
.map(uris => makeAddRequest(uris));
.filter(item => item && item.length)
.map((items, i) => makeAddRequest(items, i * 100));

return Promise.all(requests);

function makeAddRequest(uris){
function makeAddRequest(items, timeout){
return new Promise(resolve => {
addRequest = https.request({
hostname: 'api.spotify.com',
path: '/v1/users/'+config.userId+'/playlists/'+config.playlistId+'/tracks?position=0&uris='+uris,
method: 'POST',
headers: {
'Authorization': 'Bearer '+ accessToken,
'Accept': 'application/json'
}
}, function(res){
if(res.statusCode === 201){
logger.log('Success! Added '+ results.length + ' tracks.');
resolve();
return;
} else {
spotifyHelper.checkForRateLimit(res, 'adding to playlist', () => resolve(spotifyPlaylist.addTracks(results)))
.then(() => {
if(res.statusCode === 401){
spotifyOAuth.refresh();
} else {
logger.log("Error adding to playlist. Status "+res.statusCode);
process.exit(1);
}
});
}
});

addRequest.end();
setTimeout(() => {
var addRequest = https.request({
hostname: 'api.spotify.com',
path: '/v1/users/'+config.userId+'/playlists/'+config.playlistId+'/tracks?position=0&uris='+items.join(), // join all 40 track ids for the query string
method: 'POST',
headers: {
'Authorization': 'Bearer '+ accessToken,
'Accept': 'application/json'
}
}, function(res){
if(res.statusCode === 201){
logger.log('Success! Added '+ items.length + ' tracks.');
resolve();
return;
} else {
spotifyHelper.checkForRateLimit(res, 'adding to playlist', () => resolve(spotifyPlaylist.addTracks(results)))
.then(() => {
if(res.statusCode === 401){
spotifyOAuth.refresh();
} else {
logger.log("Error adding to playlist. Status "+res.statusCode);
process.exit(1);
}
});
}
});
addRequest.end();
}, timeout);
});
}
}
Expand Down

0 comments on commit 43f112f

Please sign in to comment.