import os
import django
import sys
import feedparser
import requests
import time
import random
import string
import re
import hashlib # ✅ Short ID banane ke liye
from urllib.parse import quote
from django.utils.text import slugify
from datetime import datetime

# --- 1. DJANGO SETUP ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)

if os.path.exists(os.path.join(BASE_DIR, 'videshchalo/settings.py')):
    os.environ['DJANGO_SETTINGS_MODULE'] = 'videshchalo.settings'
elif os.path.exists(os.path.join(BASE_DIR, 'job_portal/settings.py')):
    os.environ['DJANGO_SETTINGS_MODULE'] = 'job_portal.settings'
else:
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'job_portal.settings')

django.setup()

from jobs.models import NewsPost

# --- 2. CONFIG ---
SEARCH_QUERY = 'Dubai Hiring OR Gulf Vacancy OR Europe Recruitment OR NRI News' 
RSS_URL = f"https://www.bing.com/news/search?q={quote(SEARCH_QUERY)}&format=rss"
FALLBACK_QUERY = 'Indian Worker'
FALLBACK_RSS_URL = f"https://www.bing.com/news/search?q={quote(FALLBACK_QUERY)}&format=rss"

USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
]

def generate_short_id(url):
    """
    URL kitna bhi lamba ho (1000+ chars), ye uska hamesha 
    ek chota 32-character unique hash banayega taaki DB crash na ho.
    """
    return hashlib.md5(url.encode()).hexdigest()

def fetch_msn_news():
    print(f"\n" + "="*60)
    print(f"🚀 VIDESH CHALO: Smart RSS News Fetcher v2.3 (Full URL Support)")
    print(f"📅 Time: {datetime.now()}")
    print("="*60)

    try:
        headers = {'User-Agent': random.choice(USER_AGENTS)}
        response = requests.get(RSS_URL, headers=headers, timeout=15)
        feed = feedparser.parse(response.content)
        
        if not feed.entries:
            print("ℹ️ Trying Fallback...")
            response = requests.get(FALLBACK_RSS_URL, headers=headers, timeout=15)
            feed = feedparser.parse(response.content)

        if not feed.entries:
            print("ℹ️ No news entries found. Try later.")
            return

        count_added = 0
        count_skipped = 0

        for entry in feed.entries[:20]:
            raw_link = entry.link
            title = entry.title
            title_lower = title.lower()
            
            # 🛡️ SMART ID LOGIC: MD5 hash prevents "Data too long" for API_ID
            unique_api_id = generate_short_id(raw_link)
            
            # Duplicate check
            if NewsPost.objects.filter(api_id=unique_api_id).exists() or NewsPost.objects.filter(title=title).exists():
                count_skipped += 1
                continue

            print(f"Processing: {title[:50]}...")

            # Smart Category Detection
            detected_category = 'overseas'
            if any(k in title_lower for k in ['nri', 'diaspora', 'origin']): detected_category = 'diaspora'
            elif any(k in title_lower for k in ['sarkari', 'government']): detected_category = 'sarkari'

            # Slug & Summary
            base_slug = slugify(title)[:240]
            final_slug = f"{base_slug}-" + ''.join(random.choices(string.digits, k=4)) if NewsPost.objects.filter(slug=base_slug).exists() else base_slug
            summary = re.sub('<[^<]+?>', '', entry.description if hasattr(entry, 'description') else "").strip()[:300]
            source = entry.source if hasattr(entry, 'source') else "Global Media"

            # 💾 DATABASE SAVING (Storing FULL URL)
            try:
                NewsPost.objects.create(
                    title=title,
                    slug=final_slug,
                    category=detected_category,
                    content=f"Source: {source}<br><br>{summary}<br><br>Read full details at the source link.",
                    short_description=summary,
                    source_name=source,
                    source_url=raw_link, # ✅ NO TRIMMING: Pura lamba URL yahan save hoga
                    api_id=unique_api_id, # ✅ HASHED ID: Chota aur unique rahega
                    status='draft'
                )
                count_added += 1
                print(f"   ✅ Saved Full URL: (Cat: {detected_category})")
            except Exception as inner_e:
                print(f"   ❌ DB SAVE ERROR (Check source_url max_length): {inner_e}")

        print(f"\n📊 REPORT: Added: {count_added} | Skipped: {count_skipped}\n")

    except Exception as e:
        print(f"❌ CRITICAL ERROR: {str(e)}")

if __name__ == "__main__":
    fetch_msn_news()