<?php
namespace JExtstore\Component\JMap\Administrator\Framework\Seostats\Services;
/**
 *
 * @package JMAP::SEOSTATS::administrator::components::com_jmap
 * @subpackage seostats
 * @subpackage services
 * @subpackage freestats
 * @author Joomla! Extensions Store
 * @copyright (C) 2021 - Joomla! Extensions Store
 * @license GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html
 */
defined ( '_JEXEC' ) or die ( 'Restricted access' );

/**
 * Freestats stats service
 *
 * @package JMAP::SEOSTATS::administrator::components::com_jmap
 * @subpackage seostats
 * @subpackage services
 * @subpackage freestats
 * @since 4.26
 */
class Freestats {
	/**
	 * Get full stats data
	 *
	 * @access public
	 * @static
	 * @return array
	 */
	public static function getStatsData($domain, $cParams) {
		// API Keys
		$OPENPAGERANK_KEY = $cParams->get('openpagerank_apikey', 'wwoc8cgw88go0cswscw44g88ggwg0s0o4g8o4ok0');
		$SERPAPI_KEY = $cParams->get('serpapi_apikey', 'b8462820d0a8e0df9fcaa08f5c2e46ab98282494b7f9bc47f18825ffe44dea48');
		
		// === HELPER FUNCTIONS ===
		function fetch_json($url, $headers = []) {
			$ch = curl_init($url);
			curl_setopt_array($ch, [
					CURLOPT_RETURNTRANSFER => 1,
					CURLOPT_USERAGENT => 'JSitemapBot/1.0',
					CURLOPT_HTTPHEADER => $headers,
					CURLOPT_TIMEOUT => 15,
					CURLOPT_FOLLOWLOCATION => true,
					CURLOPT_SSL_VERIFYPEER => false,
			]);
			$body = curl_exec($ch);
			$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			curl_close($ch);
			return $code == 200 ? json_decode($body, true) : null;
		}
		
		function fetch_with_details($url) {
			$ch = curl_init($url);
			curl_setopt_array($ch, [
					CURLOPT_RETURNTRANSFER => true,
					CURLOPT_TIMEOUT => 10,
					CURLOPT_FOLLOWLOCATION => true,
					CURLOPT_SSL_VERIFYPEER => false,
					CURLOPT_NOBODY => false,
					CURLOPT_HEADER => true
			]);
			$t0 = microtime(true);
			$response = curl_exec($ch);
			$load_time = round(microtime(true) - $t0, 3);
			$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			$total_size = curl_getinfo($ch, CURLINFO_SIZE_DOWNLOAD);
			
			$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
			$headers = substr($response, 0, $header_size);
			$body = substr($response, $header_size);
			
			curl_close($ch);
			
			return [
					'code' => $http_code,
					'load_time' => $load_time,
					'size' => $total_size,
					'headers' => $headers,
					'body' => $body
			];
		}
		
		function extract_meta_tags($html) {
			$meta = [];
			
			// Title
			if (preg_match('/<title[^>]*>([^<]+)<\/title>/i', $html, $m)) {
				$meta['title'] = trim($m[1]);
				$meta['title_length'] = strlen($meta['title']);
			}
			
			// Meta description
			if (preg_match('/<meta[^>]+name=["\']description["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) {
				$meta['description'] = trim($m[1]);
				$meta['description_length'] = strlen($meta['description']);
			}
			
			// Meta keywords
			if (preg_match('/<meta[^>]+name=["\']keywords["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) {
				$meta['keywords'] = explode(',', trim($m[1]));
			}
			
			// Canonical
			if (preg_match('/<link[^>]+rel=["\']canonical["\'][^>]+href=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) {
				$meta['canonical'] = trim($m[1]);
			}
			
			// Open Graph
			preg_match_all('/<meta[^>]+property=["\']og:([^"\']+)["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $og_matches, PREG_SET_ORDER);
			foreach ($og_matches as $og) {
				$meta['og_' . $og[1]] = $og[2];
			}
			
			// Twitter Card
			preg_match_all('/<meta[^>]+name=["\']twitter:([^"\']+)["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $tw_matches, PREG_SET_ORDER);
			foreach ($tw_matches as $tw) {
				$meta['twitter_' . $tw[1]] = $tw[2];
			}
			
			// Robots
			if (preg_match('/<meta[^>]+name=["\']robots["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) {
				$meta['robots'] = trim($m[1]);
			}
			
			// Viewport
			if (preg_match('/<meta[^>]+name=["\']viewport["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) {
				$meta['viewport'] = trim($m[1]);
				$meta['mobile_optimized'] = strpos($m[1], 'width=device-width') !== false;
			}
			
			// Charset
			if (preg_match('/<meta[^>]+charset=["\']?([^"\'>\s]+)["\']?[^>]*>/i', $html, $m)) {
				$meta['charset'] = strtoupper($m[1]);
			}
			
			// Language
			if (preg_match('/<html[^>]+lang=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) {
				$meta['language'] = $m[1];
			}
			
			return $meta;
		}
		
		function analyze_content($html) {
			$content = [];
			
			// Headings
			for ($i = 1; $i <= 6; $i++) {
				preg_match_all("/<h{$i}[^>]*>(.+?)<\/h{$i}>/is", $html, $matches);
				$content["h{$i}_count"] = count($matches[0]);
				if ($i === 1 && !empty($matches[1])) {
					$content['h1_text'] = array_map('strip_tags', array_slice($matches[1], 0, 3));
				}
			}
			
			// Images
			preg_match_all('/<img[^>]+>/i', $html, $images);
			$content['images_total'] = count($images[0]);
			
			$images_with_alt = 0;
			foreach ($images[0] as $img) {
				if (preg_match('/alt=["\']([^"\']*)["\']/', $img)) {
					$images_with_alt++;
				}
			}
			$content['images_with_alt'] = $images_with_alt;
			$content['images_without_alt'] = $content['images_total'] - $images_with_alt;
			
			// Links
			preg_match_all('/<a[^>]+href=["\']([^"\']+)["\'][^>]*>/i', $html, $links);
			$content['total_links'] = count($links[0]);
			
			// Word count
			$text = strip_tags($html);
			$text = preg_replace('/\s+/', ' ', $text);
			$content['word_count'] = str_word_count($text);
			$content['text_length'] = strlen(trim($text));
			
			// Structured data (JSON-LD)
			preg_match_all('/<script[^>]+type=["\']application\/ld\+json["\'][^>]*>(.+?)<\/script>/is', $html, $jsonld);
			$content['structured_data_count'] = count($jsonld[0]);
			if (!empty($jsonld[1])) {
				$schemas = [];
				foreach ($jsonld[1] as $json) {
					$data = json_decode(trim($json), true);
					if ($data && isset($data['@type'])) {
						$schemas[] = $data['@type'];
					}
				}
				$content['schema_types'] = array_unique($schemas);
			}
			
			return $content;
		}
		
		function count_links_in_html($html, $type = 'internal', $domain = '') {
			if (!$html) return 0;
			preg_match_all('/<a\s+[^>]*href=["\']([^"\']+)["\'][^>]*>/i', $html, $matches);
			$count = 0;
			foreach ($matches[1] as $url) {
				if ($type === 'internal') {
					if (strpos($url, $domain) !== false || $url[0] === '/') $count++;
				} else {
					if (strpos($url, 'http') === 0 && strpos($url, $domain) === false) $count++;
				}
			}
			return $count;
		}
		
		// === START AGGREGATION ===
		$out = [
				'domain' => $domain,
				'timestamp' => date('Y-m-d H:i:s'),
				'url' => "https://$domain",
				
				// === RANKING & TRAFFIC ===
				'global_rank' => null,
				'page_rank' => null,
				'website_rank' => null,
				'daily_visitors' => null,
				'monthly_visitors' => null,
				'bounce_rate' => null,
				'rating' => null,
				
				// === PERFORMANCE ===
				'load_time_s' => null,
				'page_size_kb' => null,
				
				// === INDEXING ===
				'google_indexed' => null,
				'images_indexed' => [],
				
				// === LINKS ===
				'internal_links' => null,
				'external_links' => null,
				'backlinks_est' => null,
				
				// === KEYWORDS & SERP ===
				'serp_keywords' => null,
				'organic_keywords' => [],
				'related_searches' => [],
				'featured_snippet' => null,
				'knowledge_graph' => null,
				'local_results' => [],
				
				// === CONTENT QUALITY ===
				'meta_tags' => [],
				'content_analysis' => [],
		];
		
		// FETCH HOMEPAGE + ANALYSIS
		$page = fetch_with_details("https://$domain/");
		if ($page && $page['code'] == 200) {
			$out['load_time_s'] = $page['load_time'];
			$out['page_size_kb'] = round($page['size'] / 1024, 2);
			
			$html = $page['body'];
			
			// Meta tags
			$out['meta_tags'] = extract_meta_tags($html);
			
			// Content analysis
			$out['content_analysis'] = analyze_content($html);
			
			// Links
			$out['internal_links'] = count_links_in_html($html, 'internal', $domain);
			$out['external_links'] = count_links_in_html($html, 'external', $domain);
		}
		
		// TRANCO GLOBAL RANK
		if ($t = fetch_json("https://tranco-list.eu/api/ranks/domain/$domain")) {
			if (!empty($t['ranks']) && is_array($t['ranks'])) {
				$out['global_rank'] = $t['ranks'][0]['rank'] ?? null;
			}
		}
		
		// OPENPAGERANK
		if ($OPENPAGERANK_KEY) {
			$opr = fetch_json(
					"https://openpagerank.com/api/v1.0/getPageRank?domains[0]=$domain",
					["API-OPR: $OPENPAGERANK_KEY"]
					);
			if ($opr && isset($opr['response'][0])) {
				$out['page_rank'] = $opr['response'][0]['page_rank_decimal'] ?? null;
				$out['website_rank'] = $opr['response'][0]['rank'] ?? null;
			}
		}
		
		// === BACKLINKS ESTIMATION (robust free Bing-based heuristic) ===
		function estimate_backlinks_free($domain) {
			$url = "https://www.bing.com/search?q=" . urlencode($domain);
			
			$ch = curl_init($url);
			curl_setopt_array($ch, [
					CURLOPT_RETURNTRANSFER => true,
					CURLOPT_FOLLOWLOCATION => true,
					CURLOPT_TIMEOUT => 10,
					CURLOPT_SSL_VERIFYPEER => false,
					CURLOPT_ENCODING => '',
					CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36 JSitemapBot/1.0',
					CURLOPT_HTTPHEADER => [
							'Accept-Language: it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
							'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'
					]
			]);
			$html = curl_exec($ch);
			curl_close($ch);
			
			if (!$html || strlen($html) < 500) return null;
			
			// Normalizza HTML
			$text = strip_tags($html);
			$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
			
			// Pattern multi-lingua: About / Circa / Aproximadamente / Über / À propos / ecc.
			$patterns = [
					'/(?:About|Circa|Aproximadamente|Über|À\s*propos|Acerca\s*de|Około|約)\s*([\d\.\,]+)/iu',
					'/([\d\.\,]+)\s+(?:results|risultati|résultats|resultados)/iu'
			];
			
			foreach ($patterns as $pattern) {
				if (preg_match($pattern, $text, $m)) {
					$num = preg_replace('/[^\d]/', '', $m[1]);
					if (is_numeric($num) && $num > 0) {
						return (int)$num;
					}
				}
			}
			
			// Fallback: conta i link unici alla query
			if (preg_match_all('/<a\s+[^>]*href="https?:\/\/[^"]+"/i', $html, $matches)) {
				$count = count(array_unique($matches[0]));
				if ($count > 0) return $count * 100; // euristica di base
			}
			
			return null;
		}
		$out['backlinks_est'] = estimate_backlinks_free($domain);
		
		// === SERPAPI BATTERY ===
		if ($SERPAPI_KEY) {
			
			// Google Web Search (organic results + features)
			$q1 = "https://serpapi.com/search.json?engine=google&q=$domain&api_key=$SERPAPI_KEY&num=100";
			if ($s1 = fetch_json($q1)) {
				// Organic results
				$out['serp_keywords'] = count($s1['organic_results'] ?? []);
				foreach (($s1['organic_results'] ?? []) as $idx => $result) {
					if ($idx < 20) { // Top 20
						$out['organic_keywords'][] = [
								'title' => $result['title'] ?? '',
								'position' => $result['position'] ?? null,
								'link' => $result['link'] ?? '',
								'snippet' => $result['snippet'] ?? ''
						];
					}
				}
				
				// Related searches
				$out['related_searches'] = array_map(function($r) {
					return $r['query'] ?? '';
				}, $s1['related_searches'] ?? []);
					
				// Featured Snippet
				if (isset($s1['answer_box'])) {
					$out['featured_snippet'] = [
							'type' => $s1['answer_box']['type'] ?? 'unknown',
							'title' => $s1['answer_box']['title'] ?? '',
							'snippet' => $s1['answer_box']['snippet'] ?? ''
					];
				}
				
				// Knowledge Graph
				if (isset($s1['knowledge_graph'])) {
					$out['knowledge_graph'] = [
							'title' => $s1['knowledge_graph']['title'] ?? '',
							'type' => $s1['knowledge_graph']['type'] ?? '',
							'description' => $s1['knowledge_graph']['description'] ?? ''
					];
				}
				
				// Local Results (Maps)
				if (isset($s1['local_results'])) {
					$out['local_results'] = array_map(function($l) {
						return [
								'title' => $l['title'] ?? '',
								'rating' => $l['rating'] ?? null,
								'reviews' => $l['reviews'] ?? null
						];
					}, array_slice($s1['local_results']['places'] ?? [], 0, 3));
				}
			}
			
			// Site: operator (indexed pages)
			$q2 = "https://serpapi.com/search.json?engine=google&q=site%3A$domain&api_key=$SERPAPI_KEY";
			if ($s2 = fetch_json($q2)) {
				$out['google_indexed'] = $s2['search_information']['total_results'] ?? null;
			}
			
			// Images indexed
			$q3 = "https://serpapi.com/search.json?engine=google&q=site%3A$domain&tbm=isch&api_key=$SERPAPI_KEY";
			if ($s3 = fetch_json($q3)) {
				$out['images_indexed'] = $s3['images_results'] ?? [];
			}
		}
		
		// DERIVED METRICS
		if ($out['global_rank']) {
			$rank = max(1, (int)$out['global_rank']);
			$daily = max(100, round(10000000 / sqrt($rank)));
			$out['daily_visitors'] = $daily;
			$out['monthly_visitors'] = $daily * 30;
			$out['bounce_rate'] = min(90, max(30, round(50 + (log($rank) * 5))));
			
			if ($rank < 10000) $out['rating'] = 5.0;
			elseif ($rank < 100000) $out['rating'] = 4.5;
			elseif ($rank < 1000000) $out['rating'] = 4.0;
			elseif ($rank < 10000000) $out['rating'] = 3.5;
			else $out['rating'] = 3.0;
		}
		
		// === TRAFFIC TREND BASED ON TRANCO HISTORICAL RANKS ===
		$trend_url = "https://tranco-list.eu/api/ranks/domain/" . urlencode($domain);
		$trend_data = fetch_json($trend_url);
		
		if ($trend_data && isset($trend_data['ranks']) && is_array($trend_data['ranks'])) {
			$trend_points = [];
			$counter = 0;
			$span = 5; // mostra un punto ogni 5 giorni
			
			foreach ($trend_data['ranks'] as $entry) {
				if (!isset($entry['date']) || !isset($entry['rank'])) {
					continue;
				}
				$counter++;
				// salta i punti intermedi per ridurre densità
				if ($counter % $span !== 0) {
					continue;
				}
				
				$date = $entry['date'];
				$rank = (int)$entry['rank'];
				// formula smussata
				$visitors = max(100, round(5000000 / pow($rank, 0.45)));
				$trend_points[] = urlencode($date . ':' . $visitors);
			}
			
			// Add last/first element for "Today"
			if (!empty($trend_data['ranks'])) {
				$first = reset($trend_data['ranks']);
				if (isset($first['date']) && isset($first['rank'])) {
					$visitors = max(100, round(5000000 / pow((int)$first['rank'], 0.45)));
					array_unshift($trend_points, urlencode($first['date'] . ':' . $visitors));
				}
			}

			$trend_points = array_reverse ( $trend_points );
			
			if (!empty($trend_points)) {
				$out['trafficgraph'] = ['data' => $trend_points];
			}
		}
		
		// Final cleanup
		$out = array_filter($out, function($v) {
			return $v !== null && $v !== [] && $v !== '';
		}, ARRAY_FILTER_USE_BOTH);
		
		return $out;
	}
}