Skip to content

Commit

Permalink
Fixes #11, now pages with ajax are supported. Also adds option to rem…
Browse files Browse the repository at this point in the history
…ove unwanted parts from artist and title strings, the option "fm4Api" is changed to "orfApi". Better readme
  • Loading branch information
crohrer committed May 6, 2017
1 parent 853b832 commit 5445a34
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 55 deletions.
11 changes: 10 additions & 1 deletion config.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,16 @@
"fm4": {
"playlistId": "",
"radioTrackserviceUrl": "https://audioapi.orf.at/fm4/api/json/current/broadcasts",
"fm4Api": true
"orfApi": true
},
"fip": {
"playlistId": "",
"radioTrackserviceUrl": "http://www.fipradio.fr/archives-antenne",
"radioEntrySelector": ".list-song > .son",
"radioTitleSelector": ".titre_title",
"removeFromTitleString": "",
"radioArtistSelector": ".titre_artiste",
"removeFromArtistString": "par :"
}
}
}
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
{
"name": "spotifyRadioPlaylist",
"version": "2.4.0",
"version": "2.5.0",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/crohrer/spotifyRadioPlaylist.git"
},
"dependencies": {
"bluebird": "^3.5.0",
"cheerio": "^0.19.0"
"cheerio": "^0.19.0",
"node-horseman": "^3.3.0"
},
"scripts": {
"start": "node ./main.js"
Expand Down
71 changes: 25 additions & 46 deletions radioCrawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
* Created by chris on 06.01.16.
*/
"use strict";
var http = require('http');
var https = require('https');
var Promise = require('bluebird');
var fs = require('fs');
var cheerio = require('cheerio');
var logger = require('./logger');
var Horseman = require('node-horseman');
var horseman = new Horseman();
var config = JSON.parse(fs.readFileSync('config.json', 'utf8'));

String.prototype.trimEx = function() {return this.trim().replace(/^\s?-\s/, '').toUpperCase()}; // we compare our strings later in uppercase
String.prototype.isEmpty = function() {return (!this || !this.length)};

/**
* getTracks
Expand All @@ -22,11 +22,11 @@ String.prototype.isEmpty = function() {return (!this || !this.length)};
function getTracks(playlistName, trackserviceUrl){
let playlistConfig = config.playlists[playlistName];
let url = trackserviceUrl || playlistConfig.radioTrackserviceUrl;
if(playlistConfig.fm4Api){
return getFm4Broadcasts(url)
if(playlistConfig.orfApi){
return getOrfBroadcasts(url)
.then(broadcasts => {
console.log('getting tracks from API for '+broadcasts.length+' broadcasts');
return broadcasts.map(broadcast => getFm4BroadcastTracks(broadcast));
return broadcasts.map(broadcast => getOrfBroadcastTracks(broadcast));
})
.then(AllBroadcastsWithTracks => Promise.all(AllBroadcastsWithTracks))
.then(broadcasts => {
Expand All @@ -40,28 +40,15 @@ function getTracks(playlistName, trackserviceUrl){

return new Promise((resolve, reject) => {
console.log('getting tracks from radio trackservice');
let trackserviceReq = http.request(url, function(res) {
let html = '';

if(res.statusCode === 302){
console.log('following redirect to ' + res.headers.location);
resolve(getTracks(playlistName, res.headers.location));
return;
}
if(res.statusCode !== 200){
let error = 'Trackservice Error: Status '+res.statusCode;
logger.log(error, playlistName);
reject(error);
process.exit(1);
return;
}

res.setEncoding('utf8');
res.on('data', function (chunk) {
html += chunk;
});
res.on('end', function() {
horseman
.userAgent('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:27.0) Gecko/20100101 Firefox/27.0')
.open(url)
.waitForSelector(playlistConfig.radioEntrySelector)
.html()
.then((html) => {
let $ = cheerio.load(html),
searchInArtist = playlistConfig.removeFromArtistString || '',
searchInTitle = playlistConfig.removeFromTitleString || '',
tracks = [];

$(playlistConfig.radioEntrySelector).each(function(i, elem){
Expand All @@ -81,8 +68,8 @@ function getTracks(playlistName, trackserviceUrl){
$artist = $entry.find(playlistConfig.radioArtistSelector);
}

title = $title.text();
artist = $artist.text();
title = $title.text().replace(searchInTitle, '');
artist = $artist.text().replace(searchInArtist, '');

tracks.push({
title: title,
Expand All @@ -92,24 +79,16 @@ function getTracks(playlistName, trackserviceUrl){

if(tracks.length === 0){
logger.log('no tracks found on radio trackservice.', playlistName);
return;
process.exit(1);
return reject();
}

resolve(tracks);
});
});

trackserviceReq.on('error', function(e) {
logger.log('problem with trackservice request: ' + e.message, playlistName);
process.exit(1);
});

trackserviceReq.end();
})
.close();
});
}

function getFm4Broadcasts(broadcastsUrl){
function getOrfBroadcasts(broadcastsUrl){
return new Promise((resolve, reject) => {
https.get(broadcastsUrl, (res) => {
if(res.statusCode !== 200){
Expand Down Expand Up @@ -137,7 +116,7 @@ function getFm4Broadcasts(broadcastsUrl){
});
}

function getFm4BroadcastTracks(broadcast){
function getOrfBroadcastTracks(broadcast){
return new Promise((resolve, reject) => {
https.get(broadcast.href, (res) => {
if(res.statusCode !== 200){
Expand All @@ -149,8 +128,8 @@ function getFm4BroadcastTracks(broadcast){
res.on('data', (chunk) => { rawData += chunk; });
res.on('end', () => {
try {
var data = JSON.parse(rawData);
var tracks = data.items
let data = JSON.parse(rawData);
let tracks = data.items
.map(broadcastItem => {
return {
title: broadcastItem.title,
Expand Down Expand Up @@ -180,9 +159,9 @@ function cleanTracks(tracks){
tracks
.filter(track => track.artist && track.title)
.forEach((track) => {
var isUnique = true;
var artist = track.artist.trimEx();
var title = track.title.trimEx();
let isUnique = true;
let artist = track.artist.trimEx();
let title = track.title.trimEx();

// check for duplicates
cleanedTracks.forEach(function(cleanTrack){
Expand Down
22 changes: 22 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,28 @@ You may want to run this on a server via cronjob every X minutes or so (dependin
4. Run `npm install`
5. Configure your cronjob to run `node main.js <stationIdentifier>` every X minutes (don't forget to change to the correct directory first! - this can be done with a bash script)

## Adding new Radio Stations to the config

Currently there are two different types of crawling available:

1. Special API crawling for ORF APIs like `https://audioapi.orf.at/fm4/api/json/current/broadcasts`.
2. Classic crawling for HTML tracklistings. This covers most radio stations.

For the first type (ORF API) only three Parameters are required:
* `playlistId`: spotify ID for the playlist to add new tracks to
* `orfApi: true`: flag to activate this mode
* `radioTrackserviceUrl`: API URL

For the second type there are also some optional parameters available. This is the default crawling mode.
* `playlistId`: spotify ID for the playlist to add new tracks to
* `radioTrackserviceUrl`: URL to the page with track listings
* `radioEntrySelector`: jQuery style Selector for the encapsulating element that contains the info for exactly one track (i.e. `.list-item`). This is not used, when using the option `searchLinear`.
* `searchLinear: true`: optional & experimental. Set this to true, when tracks are not encapsulated individually, but are listed one after another. The website this mode was implemented for doesn't exist anymore, so this is not well tested at the moment.
* `radioTitleSelector`: jQuery style Selector for the title text.
* `removeFromTitleString`: optional. String to remove from title (i.e. `Title:`). This is necessary when title info and other texts are not separated correctly by the website owner.
* `radioArtistSelector`: jQuery style Selector for the artist text.
* `removeFromArtistString`: optional. String to remove from Artist (i.e. `Artist:`). This is necessary when artist info and other texts are not separated correctly by the website owner.

## Updates

To update run `git pull` in your installation and you will get the latest changes.
Expand Down
54 changes: 48 additions & 6 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# yarn lockfile v1


bluebird@^3.5.0:
bluebird@^3.0.1, bluebird@^3.5.0:
version "3.5.0"
resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.5.0.tgz#791420d7f551eea2897453a8a77653f96606d67c"

Expand All @@ -20,6 +20,14 @@ cheerio@^0.19.0:
htmlparser2 "~3.8.1"
lodash "^3.2.0"

clone@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/clone/-/clone-1.0.2.tgz#260b7a99ebb1edfe247538175f783243cb19d149"

cookies.txt@^0.1.1:
version "0.1.2"
resolved "https://registry.yarnpkg.com/cookies.txt/-/cookies.txt-0.1.2.tgz#a8b249d9ee9699305362d604b7e0ac0f20e7519e"

core-util-is@~1.0.0:
version "1.0.2"
resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7"
Expand All @@ -37,18 +45,30 @@ [email protected]:
version "1.0.0"
resolved "https://registry.yarnpkg.com/css-what/-/css-what-1.0.0.tgz#d7cc2df45180666f99d2b14462639469e00f736c"

[email protected]:
version "0.0.4"
resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-0.0.4.tgz#46e13ab9da8e309745c8d01ce547213ebdb2fe3f"

debug@^2.1.1, debug@^2.2.0:
version "2.6.6"
resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.6.tgz#a9fa6fbe9ca43cf1e79f73b75c0189cbb7d6db5a"
dependencies:
ms "0.7.3"

defaults@~1.0.0:
version "1.0.3"
resolved "https://registry.yarnpkg.com/defaults/-/defaults-1.0.3.tgz#c656051e9817d9ff08ed881477f3fe4019f3ef7d"
dependencies:
clone "^1.0.2"

dom-serializer@0, dom-serializer@~0.1.0:
version "0.1.0"
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-0.1.0.tgz#073c697546ce0780ce23be4a28e293e40bc30c82"
dependencies:
domelementtype "~1.1.1"
entities "~1.1.1"

domelementtype@1:
version "1.3.0"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.0.tgz#b17aed82e8ab59e52dd9c19b1756e0fc187204c2"

domelementtype@~1.1.1:
domelementtype@1, domelementtype@~1.1.1:
version "1.1.3"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.1.3.tgz#bd28773e2642881aec51544924299c5cd822185b"

Expand Down Expand Up @@ -101,6 +121,28 @@ lodash@^3.2.0:
version "3.10.1"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-3.10.1.tgz#5bf45e8e49ba4189e17d482789dfd15bd140b7b6"

[email protected]:
version "0.7.3"
resolved "https://registry.yarnpkg.com/ms/-/ms-0.7.3.tgz#708155a5e44e33f5fd0fc53e81d0d40a91be1fff"

node-horseman@^3.3.0:
version "3.3.0"
resolved "https://registry.yarnpkg.com/node-horseman/-/node-horseman-3.3.0.tgz#86100369437de794f669e0f52fd94cf945981bb2"
dependencies:
bluebird "^3.0.1"
clone "^1.0.2"
cookies.txt "^0.1.1"
data-uri-to-buffer "0.0.4"
debug "^2.1.1"
defaults "~1.0.0"
node-phantom-simple "^2.2.4"

node-phantom-simple@^2.2.4:
version "2.2.4"
resolved "https://registry.yarnpkg.com/node-phantom-simple/-/node-phantom-simple-2.2.4.tgz#4fc4effbb02f241fb5082bd4fbab398e4aecb64d"
dependencies:
debug "^2.2.0"

nth-check@~1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-1.0.1.tgz#9929acdf628fc2c41098deab82ac580cf149aae4"
Expand Down

0 comments on commit 5445a34

Please sign in to comment.