# projects/python_generator/core/data_fetcher.py

import pandas as pd
import random
from processing.formatters import get_raw_value
from config.table_definitions import SEARCH_COLUMNS_MAP_REG_NR

def fetch_main_company_data(df_register: pd.DataFrame, reg_nr_str: str):
    """
    Atrod galveno informāciju par uzņēmumu no 'register' DataFrame.
    """
    if df_register is None or df_register.empty:
        return None
    company_row = df_register[df_register['regcode'] == str(reg_nr_str)]
    if not company_row.empty:
        return company_row.iloc[0]
    return None

def fetch_data_for_table(df: pd.DataFrame, reg_nr_str: str, columns_to_search: list):
    """
    Atlasa datus no DataFrame, meklējot reģistrācijas numuru norādītajās kolonnās.
    """
    if df is None or df.empty or not columns_to_search:
        return pd.DataFrame()

    mask = pd.Series([False] * len(df))
    for col in columns_to_search:
        if col in df.columns:
            mask = mask | (df[col] == str(reg_nr_str))
    return df[mask]

def sort_historical_names(df: pd.DataFrame):
    """
    Kārto DataFrame ar vēsturiskajiem nosaukumiem pēc 'date_to' kolonnas dilstošā secībā.
    """
    if df is None or df.empty or 'date_to' not in df.columns:
        return df

    df['sort_key_date'] = pd.to_datetime(df['date_to'], errors='coerce')
    df_sorted = df.sort_values(by='sort_key_date', ascending=False, na_position='last')
    df_sorted = df_sorted.drop(columns=['sort_key_date'])
    return df_sorted

def fetch_all_data_for_reg_nr(dataframes: dict, reg_nr_str: str, table_names: list):
    """
    Apkopo visus datus par vienu reģistrācijas numuru no visām nodotajām tabulām.
    """
    all_results = {}
    reg_nr_str_clean = str(reg_nr_str)

    # Iegūstam datus no visām primārajām tabulām, izmantojot dinamisko sarakstu
    for table_name in table_names:
        if table_name in dataframes and table_name in SEARCH_COLUMNS_MAP_REG_NR:
            df_current = dataframes[table_name]
            if df_current is None or df_current.empty: continue

            columns_to_search = SEARCH_COLUMNS_MAP_REG_NR[table_name]
            if columns_to_search:
                data_for_table = fetch_data_for_table(df_current, reg_nr_str_clean, columns_to_search).copy()

                if table_name == 'register_name_history' and not data_for_table.empty:
                    data_for_table = sort_historical_names(data_for_table)

                if not data_for_table.empty:
                    all_results[table_name] = data_for_table.to_dict(orient='records')

    # Saistīto finanšu datu piesaiste (paliek nemainīga)
    if 'financial_statements' in all_results:
        fs_ids = {str(get_raw_value(row, 'id')) for row in all_results['financial_statements'] if get_raw_value(row, 'id')}
        if fs_ids:
            for detail_table in ['income_statements', 'balance_sheets', 'cash_flow_statements']:
                df_detail = dataframes.get(detail_table)
                if df_detail is not None and not df_detail.empty and 'statement_id' in df_detail.columns:
                    linked_data = df_detail[df_detail['statement_id'].isin(list(fs_ids))]
                    if not linked_data.empty:
                        all_results[detail_table] = linked_data.to_dict(orient='records')

    # Saistīto kopīpašnieku datu piesaiste (paliek nemainīga)
    joint_owner_configs = [
        ('members', 'members_joint_owners', dataframes.get('members_joint_owners')),
        ('stockholders', 'stockholders_joint_owners', dataframes.get('stockholders_joint_owners'))
    ]
    for parent_key, joint_table, df_joint in joint_owner_configs:
        if parent_key in all_results and df_joint is not None and not df_joint.empty and 'member_id' in df_joint.columns:
            parent_ids = {str(get_raw_value(row, 'id')) for row in all_results[parent_key] if get_raw_value(row, 'id')}
            if parent_ids:
                joint_data = df_joint[df_joint['member_id'].isin(list(parent_ids))]
                if not joint_data.empty:
                    all_results[joint_table] = joint_data.to_dict(orient='records')

    return all_results


def fetch_member_as_entity_data(df_members: pd.DataFrame, reg_nr_str: str):
    """
    Atrod ierakstus, kur meklētais uzņēmums ir dalībnieks citos uzņēmumos.
    """
    if df_members is None or df_members.empty or 'legal_entity_registration_number' not in df_members.columns:
        return []
    data = df_members[df_members['legal_entity_registration_number'] == str(reg_nr_str)]
    return data.to_dict(orient='records') if not data.empty else []

def fetch_super_id(df_register: pd.DataFrame, reg_nr_str: str):
    """
    Iegūst 'super_id' konkrētam reģistrācijas numuram.
    """
    if df_register is None or df_register.empty: return None
    row = df_register[df_register['regcode'] == str(reg_nr_str)]
    if not row.empty:
        super_id_val = get_raw_value(row.iloc[0], 'super_id')
        try:
            return int(float(super_id_val)) if super_id_val is not None else None
        except (ValueError, TypeError):
            return None
    return None

def _get_active_companies_df(df_register: pd.DataFrame):
    """
    Helper function to filter the register dataframe for active companies.
    """
    if df_register is None or df_register.empty:
        return pd.DataFrame()
    
    active_mask = (
        (~df_register['closed'].isin(['L', 'R'])) &
        (df_register['terminated'].isna() | df_register['terminated'].isin(['', '0000-00-00']))
    )
    return df_register[active_mask]

def fetch_regcodes_by_super_ids(df_register: pd.DataFrame, super_ids_list: list, active_only: bool = False):
    """
    Atrod visus reģistrācijas numurus, kas atbilst sarakstā norādītajiem 'super_id'.
    Var filtrēt, lai atgrieztu tikai aktīvus uzņēmumus.
    """
    if df_register is None or df_register.empty or not super_ids_list: return []

    df_to_search = _get_active_companies_df(df_register) if active_only else df_register

    try:
        df_to_search_super_id = pd.to_numeric(df_to_search['super_id'], errors='coerce')
        compare_ids_numeric = [pd.to_numeric(sid, errors='coerce') for sid in super_ids_list]
        clean_ids = [sid for sid in compare_ids_numeric if pd.notna(sid)]
        if not clean_ids: return []
        mask = df_to_search_super_id.isin(clean_ids)
    except Exception:
        mask = df_to_search['super_id'].isin([str(s) for s in super_ids_list])

    return [str(rc) for rc in df_to_search.loc[mask, 'regcode'].tolist() if pd.notna(rc) and str(rc).strip()]

def fetch_random_regcodes(df_register: pd.DataFrame, limit: int = 10, active_only: bool = False):
    """
    Iegūst nejaušus, derīgus reģistrācijas numurus no 'register' DataFrame.
    Var filtrēt, lai atgrieztu tikai aktīvus uzņēmumus.
    """
    if df_register is None or df_register.empty or 'regcode' not in df_register.columns:
        return []

    df_to_sample = _get_active_companies_df(df_register) if active_only else df_register

    valid_regcodes = df_to_sample['regcode'].dropna()
    valid_regcodes = valid_regcodes[valid_regcodes.str.match(r'^\d{11}$')].unique()
    if len(valid_regcodes) == 0:
        return []

    actual_limit = min(limit, len(valid_regcodes))
    return random.sample(list(valid_regcodes), actual_limit)