import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

def determine_weights_by_cluster(cluster):
    cluster = (cluster or "").lower()
    if "smart" in cluster:
        return [0.5, 0.3, 0.2]  # prioritize price
    elif "premium" in cluster:
        return [0.2, 0.3, 0.5]  # prioritize rating
    return [0.3, 0.3, 0.4]  # default equal importance

def calculate_priority_vector(matrix):
    eigvals, eigvecs = np.linalg.eig(matrix)
    max_index = np.argmax(eigvals.real)
    priority_vector = eigvecs[:, max_index].real
    return priority_vector / priority_vector.sum()

def build_pairwise_matrix(values, benefit=True):
    n = len(values)
    matrix = np.ones((n, n))
    for i in range(n):
        for j in range(n):
            if benefit:
                matrix[i, j] = values[i] / values[j] if values[j] != 0 else 1
            else:
                matrix[i, j] = values[j] / values[i] if values[i] != 0 else 1
    return matrix

def ahp_rank_products(df, selected_product_id, price_margin=0.10, criteria_weights=None, max_items=30):
    if criteria_weights is None:
        criteria_weights = [0.4, 0.3, 0.3]  # [price, description, rating]

    selected_product_row = df[df['_id'] == selected_product_id]
    if selected_product_row.empty:
        raise ValueError("Selected product ID not found in DataFrame.")
    selected_product = selected_product_row.iloc[0]

    selected_price = selected_product.get('price_actual_original', 0)
    same_cluster = selected_product.get('cluster_label')
    same_category = selected_product.get('item_category_detail')

    min_price = selected_price * (1 - price_margin)
    max_price = selected_price * (1 + price_margin)

    filtered_df = df[
        (df['cluster_label'] == same_cluster) &
        (df['item_category_detail'] == same_category) &
        (df['price_actual_original'] >= min_price) &
        (df['price_actual_original'] <= max_price)
    ].copy()

    if selected_product_id not in filtered_df['_id'].values:
        filtered_df = pd.concat([pd.DataFrame([selected_product]), filtered_df], ignore_index=True)

    filtered_df.drop_duplicates(subset='_id', inplace=True)
    filtered_df = filtered_df.head(max_items).reset_index(drop=True)

    selected_index_list = filtered_df.index[filtered_df['_id'] == selected_product_id].tolist()
    if not selected_index_list:
        raise ValueError("Selected product not found in filtered_df after insertion.")
    selected_index = selected_index_list[0]

    descriptions = filtered_df['specification'].fillna('') + ' ' + filtered_df['title'].fillna('')
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(descriptions)
    similarity_matrix = cosine_similarity(tfidf_matrix)
    desc_similarity = similarity_matrix[selected_index]

    prices = pd.to_numeric(filtered_df['price_actual_original'], errors='coerce').fillna(0).values
    ratings = pd.to_numeric(filtered_df['item_rating'], errors='coerce').fillna(0).values

    price_matrix = build_pairwise_matrix(prices, benefit=False)
    desc_matrix = build_pairwise_matrix(desc_similarity, benefit=True)
    rating_matrix = build_pairwise_matrix(ratings, benefit=True)

    price_priority = calculate_priority_vector(price_matrix)
    desc_priority = calculate_priority_vector(desc_matrix)
    rating_priority = calculate_priority_vector(rating_matrix)

    criteria_weights = np.array(criteria_weights)
    criteria_weights = criteria_weights / criteria_weights.sum()

    final_scores = (
        criteria_weights[0] * price_priority +
        criteria_weights[1] * desc_priority +
        criteria_weights[2] * rating_priority
    )

    filtered_df['AHP_score'] = final_scores
    filtered_df['base_product_id'] = selected_product_id

    ranked_df = filtered_df.sort_values(by='AHP_score', ascending=False).reset_index(drop=True)

    return ranked_df[[
        'base_product_id',
        '_id',
        'title',
        'price_actual_display',
        'item_rating_display',
        'total_sold_display',
        'AHP_score',
        'seller_name'
    ]]

def ahp_rank_all_products(df, price_margin=0.10, criteria_weights=None, max_items=30):
    all_rankings = []
    for product_id in tqdm(df['_id'].unique(), desc="Ranking products"):
        try:
            ranking = ahp_rank_products(df, product_id, price_margin, criteria_weights, max_items)
            all_rankings.append(ranking)
        except Exception as e:
            print(f"Skipping product {product_id}: {e}")
    return pd.concat(all_rankings, ignore_index=True) if all_rankings else pd.DataFrame()

def ahp_rank_selected_products(df, selected_ids, criteria_weights=None):
    if criteria_weights is None:
        criteria_weights = [0.4, 0.3, 0.3]  # [price, description, rating]

    selected_df = df[df['_id'].isin(selected_ids)].copy()
    if selected_df.empty or len(selected_df) < 2:
        raise ValueError("Selected products not found or insufficient.")

    descriptions = selected_df['specification'].fillna('') + ' ' + selected_df['title'].fillna('')
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(descriptions)
    desc_similarity_vector = cosine_similarity(tfidf_matrix).mean(axis=0)

    prices = pd.to_numeric(selected_df['price_actual_original'], errors='coerce').fillna(0).values
    ratings = pd.to_numeric(selected_df['item_rating'], errors='coerce').fillna(0).values

    price_matrix = build_pairwise_matrix(prices, benefit=False)
    desc_matrix = build_pairwise_matrix(desc_similarity_vector, benefit=True)
    rating_matrix = build_pairwise_matrix(ratings, benefit=True)

    price_priority = calculate_priority_vector(price_matrix)
    desc_priority = calculate_priority_vector(desc_matrix)
    rating_priority = calculate_priority_vector(rating_matrix)

    criteria_weights = np.array(criteria_weights)
    criteria_weights = criteria_weights / criteria_weights.sum()

    final_scores = (
        criteria_weights[0] * price_priority +
        criteria_weights[1] * desc_priority +
        criteria_weights[2] * rating_priority
    )

    selected_df['AHP_score'] = final_scores
    return selected_df.sort_values(by='AHP_score', ascending=False).reset_index(drop=True)