88 */
99namespace Piwik ;
1010
11- use Piwik \ Container \ StaticContainer ;
11+ use Matomo \ Cache \ Lazy ;
1212
1313/**
1414 * This class provides detection functions for specific content on a site. It can be used to easily detect the
1919 *
2020 * Usage:
2121 *
22- * $contentDetector = SiteContentDetector::getInstance ();
22+ * $contentDetector = new SiteContentDetector();
2323 * $contentDetector->detectContent([SiteContentDetector::GA3]);
2424 * if ($contentDetector->ga3) {
2525 * // site is using GA3
@@ -46,20 +46,25 @@ class SiteContentDetector
4646 public $ gtm ; // True if GTM was detected on the site
4747
4848 private $ siteData ;
49- private $ siteId ;
5049
51- /**
52- * @return SiteContentDetector
53- */
54- public static function getInstance (): SiteContentDetector
50+ /** @var Lazy */
51+ private $ cache ;
52+
53+ public function __construct (? Lazy $ cache = null )
5554 {
56- return StaticContainer::get ('Piwik\SiteContentDetector ' );
55+ if ($ cache === null ) {
56+ $ this ->cache = Cache::getLazyCache ();
57+ } else {
58+ $ this ->cache = $ cache ;
59+ }
5760 }
5861
5962 /**
6063 * Reset the detection properties
64+ *
65+ * @return void
6166 */
62- private function resetDetectionProperties () : void
67+ private function resetDetectionProperties (): void
6368 {
6469 $ this ->consentManagerId = null ;
6570 $ this ->consentManagerUrl = null ;
@@ -78,55 +83,161 @@ private function resetDetectionProperties() : void
7883 * will speed up the detection check
7984 * @param ?int $idSite Override the site ID, will use the site from the current request if null
8085 * @param string|null $siteData String containing the site data to search, if blank then data will be retrieved
81- * from the current request site via cURL
82- * @param int $timeOut How long to wait for the site to response, defaults to 60 seconds
86+ * from the current request site via an http request
87+ * @param int $timeOut How long to wait for the site to response, defaults to 5 seconds
88+ * @return void
8389 */
8490 public function detectContent (array $ detectContent = [SiteContentDetector::ALL_CONTENT ],
85- ?int $ idSite = null , ?string $ siteData = null , int $ timeOut = 60 )
91+ ?int $ idSite = null , ?string $ siteData = null , int $ timeOut = 5 ): void
8692 {
8793
88- // If the site data was already retrieved and stored in this object and it is for the same site id and we're
89- // not being passed a specific sitedata parameter then avoid making another request and just return
90- if ($ siteData === null && $ this ->siteData != null && $ idSite == $ this ->siteId ) {
91- return ;
92- }
93-
9494 $ this ->resetDetectionProperties ();
9595
96- // No site data was passed or previously retrieved, so grab the current site main page as a string
97- if ($ siteData === null ) {
98-
96+ // If site data was passed in, then just run the detection checks against it and return.
97+ if ($ siteData ) {
98+ $ this ->siteData = $ siteData ;
99+ $ this ->detectionChecks ($ detectContent );
100+ return ;
101+ }
99102
100- if ($ idSite === null ) {
101- if (!isset ($ _REQUEST ['idSite ' ])) {
102- return ;
103- }
104- $ idSite = Common::getRequestVar ('idSite ' , null , 'int ' );
105- if (!$ idSite ) {
106- return ;
107- }
103+ // Get the site id from the request object if not explicitly passed
104+ if ($ idSite === null ) {
105+ if (!isset ($ _REQUEST ['idSite ' ])) {
106+ return ;
108107 }
109-
110- $ url = Site::getMainUrlFor ($ idSite );
111- if (!$ url ) {
108+ $ idSite = Common::getRequestVar ('idSite ' , null , 'int ' );
109+ if (!$ idSite ) {
112110 return ;
113111 }
112+ }
114113
115- try {
116- $ siteData = Http::sendHttpRequestBy ('curl ' , $ url , $ timeOut , null , null ,
117- null , 0 , false , true );
118- } catch (\Exception $ e ) {
114+ // Check and load previously cached site content detection data if it exists
115+ $ cacheKey = 'SiteContentDetector_ ' .$ idSite ;
116+ $ requiredProperties = $ this ->getRequiredProperties ($ detectContent );
117+ $ siteContentDetectionCache = $ this ->cache ->fetch ($ cacheKey );
118+ if ($ siteContentDetectionCache !== false ) {
119+ if ($ this ->checkCacheHasRequiredProperties ($ requiredProperties , $ siteContentDetectionCache )) {
120+ $ this ->loadRequiredPropertiesFromCache ($ requiredProperties , $ siteContentDetectionCache );
121+ return ;
119122 }
120-
121123 }
122124
125+ // No cache hit, no passed data, so make a request for the site content
126+ $ siteData = $ this ->requestSiteData ($ idSite , $ timeOut );
127+
123128 // Abort if still no site data
124129 if ($ siteData === null || $ siteData === '' ) {
125130 return ;
126131 }
127132
128133 $ this ->siteData = $ siteData ;
129- $ this ->siteId = $ idSite ;
134+
135+ // We now have site data to analyze, so run the detection checks
136+ $ this ->detectionChecks ($ detectContent );
137+
138+ // A request was made to get this data and it isn't currently cached, so write it to the cache now
139+ $ cacheLife = (60 * 60 * 24 * 7 );
140+ $ this ->savePropertiesToCache ($ cacheKey , $ requiredProperties , $ cacheLife );
141+ }
142+
143+ /**
144+ * Returns an array of properties required by the detect content array
145+ *
146+ * @param array $detectContent
147+ *
148+ * @return array
149+ */
150+ private function getRequiredProperties (array $ detectContent ): array
151+ {
152+ $ requiredProperties = [];
153+ if (in_array (SiteContentDetector::CONSENT_MANAGER , $ detectContent ) || in_array (SiteContentDetector::ALL_CONTENT , $ detectContent )) {
154+ $ requiredProperties = array_merge ($ requiredProperties , ['consentManagerId ' , 'consentManagerName ' , 'consentManagerUrl ' , 'isConnected ' ]);
155+ }
156+ if (in_array (SiteContentDetector::GA3 , $ detectContent ) || in_array (SiteContentDetector::ALL_CONTENT , $ detectContent )) {
157+ $ requiredProperties [] = 'ga3 ' ;
158+ }
159+ if (in_array (SiteContentDetector::GA4 , $ detectContent ) || in_array (SiteContentDetector::ALL_CONTENT , $ detectContent )) {
160+ $ requiredProperties [] = 'ga4 ' ;
161+ }
162+ if (in_array (SiteContentDetector::GTM , $ detectContent ) || in_array (SiteContentDetector::ALL_CONTENT , $ detectContent )) {
163+ $ requiredProperties [] = 'gtm ' ;
164+ }
165+
166+ return $ requiredProperties ;
167+ }
168+
169+ /**
170+ * Checks that all required properties are in the cache array
171+ *
172+ * @param array $properties
173+ * @param array $cache
174+ *
175+ * @return bool
176+ */
177+ private function checkCacheHasRequiredProperties (array $ properties , array $ cache ): bool
178+ {
179+ foreach ($ properties as $ prop ) {
180+ if (!array_key_exists ($ prop , $ cache )) {
181+ return false ;
182+ }
183+ }
184+ return true ;
185+ }
186+
187+ /**
188+ * Load object properties from the cache array
189+ *
190+ * @param array $properties
191+ * @param array $cache
192+ *
193+ * @return void
194+ */
195+ private function loadRequiredPropertiesFromCache (array $ properties , array $ cache ): void
196+ {
197+ foreach ($ properties as $ prop ) {
198+ if (!array_key_exists ($ prop , $ cache )) {
199+ continue ;
200+ }
201+ $ this ->{$ prop } = $ cache [$ prop ];
202+ }
203+ }
204+
205+ /**
206+ * Save properties to the cache
207+ *
208+ * @param string $cacheKey
209+ * @param array $properties
210+ * @param int $cacheLife
211+ *
212+ * @return void
213+ */
214+ private function savePropertiesToCache (string $ cacheKey , array $ properties , int $ cacheLife ): void
215+ {
216+
217+ $ cacheData = [];
218+
219+ // Load any existing cached values
220+ $ siteContentDetectionCache = $ this ->cache ->fetch ($ cacheKey );
221+ if (is_array ($ siteContentDetectionCache )) {
222+ $ cacheData = $ siteContentDetectionCache ;
223+ }
224+
225+ foreach ($ properties as $ prop ) {
226+ $ cacheData [$ prop ] = $ this ->{$ prop };
227+ }
228+
229+ $ this ->cache ->save ($ cacheKey , $ cacheData , $ cacheLife );
230+ }
231+
232+ /**
233+ * Run various detection checks for site content
234+ *
235+ * @param array $detectContent Array of detection types used to filter the checks that are run
236+ *
237+ * @return void
238+ */
239+ private function detectionChecks ($ detectContent ): void
240+ {
130241
131242 if (in_array (SiteContentDetector::CONSENT_MANAGER , $ detectContent ) || in_array (SiteContentDetector::ALL_CONTENT , $ detectContent )) {
132243 $ this ->detectConsentManager ();
@@ -143,7 +254,30 @@ public function detectContent(array $detectContent = [SiteContentDetector::ALL_C
143254 if (in_array (SiteContentDetector::GTM , $ detectContent ) || in_array (SiteContentDetector::ALL_CONTENT , $ detectContent )) {
144255 $ this ->detectGTM ();
145256 }
257+ }
146258
259+ /**
260+ * Retrieve data from the specified site using an HTTP request
261+ *
262+ * @param int $idSite
263+ * @param int $timeOut
264+ *
265+ * @return string|null
266+ */
267+ private function requestSiteData (int $ idSite , int $ timeOut ): ?string
268+ {
269+ $ siteData = null ;
270+ $ url = Site::getMainUrlFor ($ idSite );
271+ if (!$ url ) {
272+ return null ;
273+ }
274+
275+ try {
276+ $ siteData = Http::sendHttpRequestBy (Http::getTransportMethod (), $ url , $ timeOut , null , null ,
277+ null , 0 , false , true );
278+ } catch (\Exception $ e ) {
279+ }
280+ return $ siteData ;
147281 }
148282
149283 /**
@@ -153,7 +287,7 @@ public function detectContent(array $detectContent = [SiteContentDetector::ALL_C
153287 *
154288 * @return void
155289 */
156- private function detectConsentManager () : void
290+ private function detectConsentManager (): void
157291 {
158292
159293 $ defs = SiteContentDetector::getConsentManagerDefinitions ();
@@ -191,7 +325,7 @@ private function detectConsentManager() : void
191325 *
192326 * @return void
193327 */
194- private function detectGA3 () : void
328+ private function detectGA3 (): void
195329 {
196330 if (strpos ($ this ->siteData , '(i,s,o,g,r,a,m) ' ) !== false ) {
197331 $ this ->ga3 = true ;
@@ -203,7 +337,7 @@ private function detectGA3() : void
203337 *
204338 * @return void
205339 */
206- private function detectGA4 () : void
340+ private function detectGA4 (): void
207341 {
208342 if (strpos ($ this ->siteData , 'gtag.js ' ) !== false ) {
209343 $ this ->ga4 = true ;
@@ -215,7 +349,7 @@ private function detectGA4() : void
215349 *
216350 * @return void
217351 */
218- private function detectGTM () : void
352+ private function detectGTM (): void
219353 {
220354 if (strpos ($ this ->siteData , 'gtm.js ' ) !== false ) {
221355 $ this ->gtm = true ;
@@ -228,7 +362,7 @@ private function detectGTM() : void
228362 *
229363 * @return array[]
230364 */
231- public static function getConsentManagerDefinitions () : array
365+ public static function getConsentManagerDefinitions (): array
232366 {
233367 return [
234368
0 commit comments