diff --git a/oai.py b/oai.py index 746f759..b7b38b8 100644 --- a/oai.py +++ b/oai.py @@ -4532,12 +4532,24 @@ def chat(): current_index = len(session_history) - 1 if conversation_memory_enabled: memory_start_index = 0 + + # Recalculate session totals from loaded message history total_input_tokens = 0 total_output_tokens = 0 total_cost = 0.0 message_count = 0 + + for msg in session_history: + # Handle both old format (no cost data) and new format (with cost data) + total_input_tokens += msg.get('prompt_tokens', 0) + total_output_tokens += msg.get('completion_tokens', 0) + total_cost += msg.get('msg_cost', 0.0) + message_count += 1 + console.print(f"[bold green]Conversation '{conversation_name}' loaded with {len(session_history)} messages.[/]") - app_logger.info(f"Conversation '{conversation_name}' loaded with {len(session_history)} messages") + if total_cost > 0: + console.print(f"[dim cyan]Restored session totals: {total_input_tokens + total_output_tokens} tokens, ${total_cost:.4f} cost[/]") + app_logger.info(f"Conversation '{conversation_name}' loaded with {len(session_history)} messages, restored cost: ${total_cost:.4f}") continue elif user_input.lower().startswith("/delete"): @@ -4680,8 +4692,8 @@ def chat(): total_output_tokens = 0 total_cost = 0.0 message_count = 0 - console.print("[bold green]Conversation context reset.[/]") - app_logger.info("Conversation context reset by user") + console.print("[bold green]Conversation context reset. Totals cleared.[/]") + app_logger.info("Conversation context reset by user - all totals reset to 0") continue elif user_input.lower().startswith("/info"): @@ -5776,10 +5788,13 @@ All queries are read-only. INSERT/UPDATE/DELETE are not allowed.""" stream_interrupted = False if is_streaming: + # Store the last chunk to get usage data after streaming completes + last_chunk = None try: with Live("", console=console, refresh_per_second=10, auto_refresh=True) as live: try: for chunk in response: + last_chunk = chunk # Keep track of last chunk for usage data if hasattr(chunk, 'error') and chunk.error: console.print(f"\n[bold red]Stream error: {chunk.error.message}[/]") app_logger.error(f"Stream error: {chunk.error.message}") @@ -5835,6 +5850,14 @@ All queries are read-only. INSERT/UPDATE/DELETE are not allowed.""" pass continue # Now it's safe to continue + + # For streaming, try to get usage from last chunk + if last_chunk and hasattr(last_chunk, 'usage'): + response.usage = last_chunk.usage + app_logger.debug("Extracted usage data from last streaming chunk") + elif last_chunk: + app_logger.warning("Last streaming chunk has no usage data") + else: full_response = response.choices[0].message.content if response.choices else "" # Clear any processing messages before showing response @@ -5847,13 +5870,54 @@ All queries are read-only. INSERT/UPDATE/DELETE are not allowed.""" console.print() console.print(Panel(md, title="[bold green]AI Response[/]", title_align="left", border_style="green")) - session_history.append({'prompt': user_input, 'response': full_response}) - current_index = len(session_history) - 1 - + # Extract usage data BEFORE appending to history usage = getattr(response, 'usage', None) - input_tokens = usage.input_tokens if usage and hasattr(usage, 'input_tokens') else 0 - output_tokens = usage.output_tokens if usage and hasattr(usage, 'output_tokens') else 0 - msg_cost = usage.total_cost_usd if usage and hasattr(usage, 'total_cost_usd') else estimate_cost(input_tokens, output_tokens) + + # DEBUG: Log what OpenRouter actually returns + if usage: + app_logger.debug(f"Usage object type: {type(usage)}") + app_logger.debug(f"Usage attributes: {dir(usage)}") + if hasattr(usage, '__dict__'): + app_logger.debug(f"Usage dict: {usage.__dict__}") + else: + app_logger.warning("No usage object in response!") + + # Try both attribute naming conventions (OpenAI standard vs Anthropic) + input_tokens = 0 + output_tokens = 0 + + if usage: + # Try prompt_tokens/completion_tokens (OpenAI/OpenRouter standard) + if hasattr(usage, 'prompt_tokens'): + input_tokens = usage.prompt_tokens or 0 + elif hasattr(usage, 'input_tokens'): + input_tokens = usage.input_tokens or 0 + + if hasattr(usage, 'completion_tokens'): + output_tokens = usage.completion_tokens or 0 + elif hasattr(usage, 'output_tokens'): + output_tokens = usage.output_tokens or 0 + + app_logger.debug(f"Extracted tokens: input={input_tokens}, output={output_tokens}") + + # Get cost from API or estimate + msg_cost = 0.0 + if usage and hasattr(usage, 'total_cost_usd') and usage.total_cost_usd: + msg_cost = float(usage.total_cost_usd) + app_logger.debug(f"Using API cost: ${msg_cost:.6f}") + else: + msg_cost = estimate_cost(input_tokens, output_tokens) + app_logger.debug(f"Estimated cost: ${msg_cost:.6f} (from {input_tokens} input + {output_tokens} output tokens)") + + # NOW append to history with cost data + session_history.append({ + 'prompt': user_input, + 'response': full_response, + 'msg_cost': msg_cost, + 'prompt_tokens': input_tokens, + 'completion_tokens': output_tokens + }) + current_index = len(session_history) - 1 total_input_tokens += input_tokens total_output_tokens += output_tokens