From 43f112f4f9b786c530a5d9766cae011d91c8f9a5 Mon Sep 17 00:00:00 2001 From: Christoph Rohrer Date: Mon, 1 May 2017 22:02:34 +0200 Subject: [PATCH] adds new crawling mode for Radio FM4. This mode is highly specific for their API --- main.js | 3 +- package.json | 5 +- radioCrawler.js | 143 +++++++++++++++++++++++++++++++++++++-------- spotifyOAuth.js | 2 +- spotifyPlaylist.js | 64 ++++++++++---------- 5 files changed, 158 insertions(+), 59 deletions(-) diff --git a/main.js b/main.js index f007af8..4fa94cd 100644 --- a/main.js +++ b/main.js @@ -13,7 +13,8 @@ start(); function start(){ spotifyPlaylist.getAllTracks() .then(radioCrawler.getTracks) - .then(radioTracks => spotifySearch.searchTracks(radioTracks)) + .then(radioTracks => radioCrawler.cleanTracks(radioTracks)) + .then(cleanedTracks => spotifySearch.searchTracks(cleanedTracks)) .then(newTracks => spotifyPlaylist.addTracks(newTracks)) .then(process.exit); } diff --git a/package.json b/package.json index d405c32..a37b71b 100644 --- a/package.json +++ b/package.json @@ -1,8 +1,11 @@ { "name": "spotifyRadioPlaylist", - "version": "2.3.0", + "version": "2.3.1", "dependencies": { "bluebird": "^3.5.0", "cheerio": "^0.19.0" + }, + "scripts": { + "start": "node ./main.js" } } diff --git a/radioCrawler.js b/radioCrawler.js index 87f83b9..9a5184d 100644 --- a/radioCrawler.js +++ b/radioCrawler.js @@ -1,22 +1,39 @@ /** * Created by chris on 06.01.16. */ - +"use strict"; var http = require('http'); +var https = require('https'); var Promise = require('bluebird'); var fs = require('fs'); var cheerio = require('cheerio'); var logger = require('./logger'); var config = JSON.parse(fs.readFileSync('config.json', 'utf8')); +String.prototype.trimEx = function() {return this.trim().replace(/^\s?-\s/, '').toUpperCase()}; // we compare our strings later in uppercase +String.prototype.isEmpty = function() {return (!this || !this.length)}; + /** * getTracks * @param {string} [trackserviceUrl] * @returns {Promise} */ function getTracks(trackserviceUrl){ + var url = trackserviceUrl || config.radioTrackserviceUrl; + if(config.fm4Api){ + return getFm4Broadcasts(url) + .then(broadcasts => broadcasts.map(broadcast => getFm4BroadcastTracks(broadcast))) + .then(AllBroadcastsWithTracks => Promise.all(AllBroadcastsWithTracks)) + .then(broadcasts => { + var tracks = []; + broadcasts.forEach(broadcast => { + broadcast.forEach(track => tracks.push(track)); + }); + return tracks; + }); + } + return new Promise((resolve, reject) => { - var url = trackserviceUrl || config.radioTrackserviceUrl; console.log('getting tracks from radio trackservice'); var trackserviceReq = http.request(url, function(res) { var html = ''; @@ -60,28 +77,13 @@ function getTracks(trackserviceUrl){ $artist = $entry.find(config.radioArtistSelector); } - String.prototype.trimEx = function() { return this.trim().replace(/^\s?-\s/, '').toUpperCase(); } // we compare our strings later in uppercase - title = $title.text().trimEx(); - artist = $artist.text().trimEx(); - - String.prototype.isEmpty = function() { return (!this || !this.length); } - if (title.isEmpty() || artist.isEmpty()){ - return; - } + title = $title.text(); + artist = $artist.text(); - // check for duplicates - tracks.forEach(function(track){ - if(track.artist + '-' + track.title === artist + '-' + title){ - isUnique = false; - } + tracks.push({ + title: title, + artist: artist }); - - if(isUnique){ - tracks.push({ - title: title, - artist: artist - }); - } }); if(tracks.length === 0){ @@ -103,6 +105,101 @@ function getTracks(trackserviceUrl){ }); } +function getFm4Broadcasts(broadcastsUrl){ + return new Promise((resolve, reject) => { + https.get(broadcastsUrl, (res) => { + if(res.statusCode !== 200){ + console.log(res.statusCode); + reject(); + } + + let rawData = ''; + res.on('data', (chunk) => { rawData += chunk; }); + res.on('end', () => { + try { + var days = JSON.parse(rawData); + var allBroadcasts = []; + days.map(day => day.broadcasts).map(broadcasts => { + broadcasts.map(broadcast => allBroadcasts.push(broadcast)) + }); + resolve(allBroadcasts); + } catch (e) { + console.error(e.message); + } + }); + }).on('error', function(err) { + console.error(err); + }); + }); +} + +function getFm4BroadcastTracks(broadcast){ + return new Promise((resolve, reject) => { + https.get(broadcast.href, (res) => { + if(res.statusCode !== 200){ + console.log(res.statusCode); + reject(); + } + + let rawData = ''; + res.on('data', (chunk) => { rawData += chunk; }); + res.on('end', () => { + try { + var data = JSON.parse(rawData); + var tracks = data.items + .map(broadcastItem => { + return { + title: broadcastItem.title, + artist: broadcastItem.interpreter + } + }); + resolve(tracks); + } catch (e) { + console.error(e.message); + } + }); + }).on('error', function(err) { + console.error(err); + }); + }); +} + +/** + * Deduplicates Tracks and removes empty entries + * @param {Array} tracks + * @return {Array} + */ +function cleanTracks(tracks){ + return new Promise((resolve) => { + var cleanedTracks = []; + + tracks + .filter(track => track.artist && track.title) + .forEach((track) => { + var isUnique = true; + var artist = track.artist.trimEx(); + var title = track.title.trimEx(); + + // check for duplicates + cleanedTracks.forEach(function(cleanTrack){ + if(cleanTrack.artist + '-' + cleanTrack.title === artist + '-' + title){ + isUnique = false; + } + }); + + if(isUnique){ + cleanedTracks.push({ + artist: artist, + title: title + }); + } + }); + + resolve(cleanedTracks); + }); +} + module.exports = { - getTracks: getTracks + getTracks: getTracks, + cleanTracks: cleanTracks }; diff --git a/spotifyOAuth.js b/spotifyOAuth.js index 0669545..de7dd31 100644 --- a/spotifyOAuth.js +++ b/spotifyOAuth.js @@ -54,7 +54,7 @@ var spotifyOAuth = { }; function handleRequest(request, response){ - data = ''; + var data = ''; request.on('data', function (chunk) { data += chunk; }); diff --git a/spotifyPlaylist.js b/spotifyPlaylist.js index b6dd790..fa3e8ba 100644 --- a/spotifyPlaylist.js +++ b/spotifyPlaylist.js @@ -94,7 +94,6 @@ function getTracks(offset){ */ function addTracks(results){ var accessToken = spotifyOAuth.getAccessToken(), - addRequest, LIMIT = 40; // limit how many tracks will be added in one request if(accessToken === false){ @@ -109,42 +108,41 @@ function addTracks(results){ var requests = results // we can only add max 40 tracks at once. So we split all results in chunks of 40 tracks .map((item, i) => (i % LIMIT === 0) ? results.slice(i, i + LIMIT) : null) - .filter(item => item) - // join all 40 track ids for the query string - .map(group => group.join()) - .map(uris => makeAddRequest(uris)); + .filter(item => item && item.length) + .map((items, i) => makeAddRequest(items, i * 100)); return Promise.all(requests); - function makeAddRequest(uris){ + function makeAddRequest(items, timeout){ return new Promise(resolve => { - addRequest = https.request({ - hostname: 'api.spotify.com', - path: '/v1/users/'+config.userId+'/playlists/'+config.playlistId+'/tracks?position=0&uris='+uris, - method: 'POST', - headers: { - 'Authorization': 'Bearer '+ accessToken, - 'Accept': 'application/json' - } - }, function(res){ - if(res.statusCode === 201){ - logger.log('Success! Added '+ results.length + ' tracks.'); - resolve(); - return; - } else { - spotifyHelper.checkForRateLimit(res, 'adding to playlist', () => resolve(spotifyPlaylist.addTracks(results))) - .then(() => { - if(res.statusCode === 401){ - spotifyOAuth.refresh(); - } else { - logger.log("Error adding to playlist. Status "+res.statusCode); - process.exit(1); - } - }); - } - }); - - addRequest.end(); + setTimeout(() => { + var addRequest = https.request({ + hostname: 'api.spotify.com', + path: '/v1/users/'+config.userId+'/playlists/'+config.playlistId+'/tracks?position=0&uris='+items.join(), // join all 40 track ids for the query string + method: 'POST', + headers: { + 'Authorization': 'Bearer '+ accessToken, + 'Accept': 'application/json' + } + }, function(res){ + if(res.statusCode === 201){ + logger.log('Success! Added '+ items.length + ' tracks.'); + resolve(); + return; + } else { + spotifyHelper.checkForRateLimit(res, 'adding to playlist', () => resolve(spotifyPlaylist.addTracks(results))) + .then(() => { + if(res.statusCode === 401){ + spotifyOAuth.refresh(); + } else { + logger.log("Error adding to playlist. Status "+res.statusCode); + process.exit(1); + } + }); + } + }); + addRequest.end(); + }, timeout); }); } }