bug fixing
This commit is contained in:
@@ -48,7 +48,6 @@ class RSSFetcher:
|
||||
List of Article objects from the feed
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Fetching RSS feed: {source.name}")
|
||||
response = await self.client.get(str(source.url))
|
||||
response.raise_for_status()
|
||||
|
||||
@@ -56,7 +55,7 @@ class RSSFetcher:
|
||||
feed = feedparser.parse(response.text)
|
||||
|
||||
if feed.bozo:
|
||||
logger.warning(f"Feed parsing warning for {source.name}: {feed.bozo_exception}")
|
||||
logger.debug(f"Feed parsing warning for {source.name}: {feed.bozo_exception}")
|
||||
|
||||
articles = []
|
||||
cutoff_time = datetime.now(timezone.utc) - timedelta(hours=self.hours_lookback)
|
||||
@@ -67,10 +66,9 @@ class RSSFetcher:
|
||||
if article and article.published >= cutoff_time:
|
||||
articles.append(article)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse entry from {source.name}: {e}")
|
||||
logger.debug(f"Failed to parse entry from {source.name}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Fetched {len(articles)} articles from {source.name}")
|
||||
return articles
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
@@ -158,5 +156,4 @@ class RSSFetcher:
|
||||
articles = await self.fetch(source)
|
||||
all_articles.extend(articles)
|
||||
|
||||
logger.info(f"Total articles fetched from all sources: {len(all_articles)}")
|
||||
return all_articles
|
||||
|
||||
@@ -28,7 +28,7 @@ class OpenRouterClient:
|
||||
)
|
||||
|
||||
self.model = config.ai.model
|
||||
logger.info(f"Initialized OpenRouter client with model: {self.model}")
|
||||
logger.debug(f"Initialized OpenRouter client with model: {self.model}")
|
||||
|
||||
async def chat_completion(
|
||||
self,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Article relevance filtering using AI"""
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
from ..storage.models import Article
|
||||
@@ -87,7 +88,7 @@ class ArticleFilter:
|
||||
self, articles: list[Article], max_articles: Optional[int] = None
|
||||
) -> list[tuple[Article, float]]:
|
||||
"""
|
||||
Filter and rank articles by relevance
|
||||
Filter and rank articles by relevance (processes articles concurrently)
|
||||
|
||||
Args:
|
||||
articles: Articles to filter
|
||||
@@ -98,11 +99,27 @@ class ArticleFilter:
|
||||
"""
|
||||
scored_articles: list[tuple[Article, float]] = []
|
||||
|
||||
for article in articles:
|
||||
is_relevant, score = await self.is_relevant(article)
|
||||
# Process articles concurrently in batches to avoid rate limits
|
||||
batch_size = 20 # Process 20 at a time (increased for powerful servers)
|
||||
|
||||
if is_relevant and score is not None:
|
||||
scored_articles.append((article, score))
|
||||
for i in range(0, len(articles), batch_size):
|
||||
batch = articles[i : i + batch_size]
|
||||
logger.debug(f"Processing batch {i // batch_size + 1} ({len(batch)} articles)")
|
||||
|
||||
# Score all articles in batch concurrently
|
||||
tasks = [self.is_relevant(article) for article in batch]
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Collect successful results
|
||||
for article, result in zip(batch, results):
|
||||
if isinstance(result, BaseException):
|
||||
logger.error(f"Error scoring article '{article.title}': {result}")
|
||||
continue
|
||||
|
||||
# result is a tuple: (is_relevant, score)
|
||||
is_relevant, score = result
|
||||
if is_relevant and score is not None:
|
||||
scored_articles.append((article, score))
|
||||
|
||||
# Sort by score descending
|
||||
scored_articles.sort(key=lambda x: x[1], reverse=True)
|
||||
@@ -111,7 +128,7 @@ class ArticleFilter:
|
||||
if max_articles:
|
||||
scored_articles = scored_articles[:max_articles]
|
||||
|
||||
logger.info(
|
||||
logger.debug(
|
||||
f"Filtered {len(articles)} articles down to {len(scored_articles)} relevant ones"
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Article summarization using AI"""
|
||||
|
||||
import asyncio
|
||||
|
||||
from ..storage.models import Article
|
||||
from ..logger import get_logger
|
||||
from .client import OpenRouterClient
|
||||
@@ -54,7 +56,7 @@ class ArticleSummarizer:
|
||||
|
||||
async def summarize_batch(self, articles: list[Article]) -> dict[str, str]:
|
||||
"""
|
||||
Summarize multiple articles
|
||||
Summarize multiple articles concurrently
|
||||
|
||||
Args:
|
||||
articles: List of articles to summarize
|
||||
@@ -64,9 +66,25 @@ class ArticleSummarizer:
|
||||
"""
|
||||
summaries = {}
|
||||
|
||||
for article in articles:
|
||||
summary = await self.summarize(article)
|
||||
summaries[article.id] = summary
|
||||
# Process in batches to avoid overwhelming the API
|
||||
batch_size = 20 # Increased for powerful servers
|
||||
|
||||
logger.info(f"Summarized {len(summaries)} articles")
|
||||
for i in range(0, len(articles), batch_size):
|
||||
batch = articles[i : i + batch_size]
|
||||
logger.debug(f"Summarizing batch {i // batch_size + 1} ({len(batch)} articles)")
|
||||
|
||||
# Summarize all articles in batch concurrently
|
||||
tasks = [self.summarize(article) for article in batch]
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Collect results
|
||||
for article, result in zip(batch, results):
|
||||
if isinstance(result, BaseException):
|
||||
logger.error(f"Error summarizing '{article.title}': {result}")
|
||||
# Use fallback summary
|
||||
result = article.summary if article.summary else article.content[:200] + "..."
|
||||
|
||||
summaries[article.id] = result
|
||||
|
||||
logger.debug(f"Summarized {len(summaries)} articles")
|
||||
return summaries
|
||||
|
||||
@@ -66,7 +66,7 @@ class EmailGenerator:
|
||||
# Generate plain text version
|
||||
text = self._generate_text_version(entries, date_str, subject)
|
||||
|
||||
logger.info(f"Generated email with {len(entries)} articles")
|
||||
logger.debug(f"Generated email with {len(entries)} articles")
|
||||
|
||||
return html_inlined, text
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ class EmailSender:
|
||||
|
||||
# Send email
|
||||
server.send_message(msg)
|
||||
logger.info(f"Email sent successfully to {self.config.to}")
|
||||
logger.debug(f"Email sent successfully to {self.config.to}")
|
||||
return True
|
||||
|
||||
finally:
|
||||
|
||||
41
src/main.py
41
src/main.py
@@ -21,10 +21,6 @@ async def main():
|
||||
setup_logger()
|
||||
logger = get_logger()
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("News Agent starting...")
|
||||
logger.info("=" * 60)
|
||||
|
||||
try:
|
||||
# Load configuration
|
||||
config = get_config()
|
||||
@@ -39,17 +35,18 @@ async def main():
|
||||
# Initialize RSS fetcher
|
||||
fetcher = RSSFetcher()
|
||||
|
||||
# Fetch articles from all sources
|
||||
logger.info(f"Fetching from {len(config.rss_sources)} RSS sources...")
|
||||
# Fetch articles from all sources (silently)
|
||||
articles = await fetcher.fetch_all(config.rss_sources)
|
||||
|
||||
if not articles:
|
||||
logger.warning("No articles fetched from any source")
|
||||
await fetcher.close()
|
||||
return
|
||||
|
||||
# Save articles to database (deduplication)
|
||||
new_articles_count = await db.save_articles(articles)
|
||||
|
||||
# Log only the summary
|
||||
logger.info(f"Total articles fetched from all sources: {len(articles)}")
|
||||
logger.info(
|
||||
f"Saved {new_articles_count} new articles (filtered {len(articles) - new_articles_count} duplicates)"
|
||||
)
|
||||
@@ -60,24 +57,19 @@ async def main():
|
||||
unprocessed = await db.get_unprocessed_articles()
|
||||
|
||||
if not unprocessed:
|
||||
logger.info("No new articles to process")
|
||||
return
|
||||
|
||||
logger.info(f"Processing {len(unprocessed)} new articles with AI...")
|
||||
|
||||
# Initialize AI components
|
||||
ai_client = OpenRouterClient()
|
||||
filter_ai = ArticleFilter(ai_client)
|
||||
summarizer = ArticleSummarizer(ai_client)
|
||||
|
||||
# Filter articles by relevance
|
||||
logger.info("Filtering articles by relevance...")
|
||||
# Filter articles by relevance (silently)
|
||||
filtered_articles = await filter_ai.filter_articles(
|
||||
unprocessed, max_articles=config.ai.filtering.max_articles
|
||||
)
|
||||
|
||||
if not filtered_articles:
|
||||
logger.warning("No relevant articles found after filtering")
|
||||
# Mark all as processed but not included
|
||||
for article in unprocessed:
|
||||
await db.update_article_processing(
|
||||
@@ -85,14 +77,15 @@ async def main():
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(f"Selected {len(filtered_articles)} relevant articles")
|
||||
# Summarize filtered articles (using batch processing for speed, silently)
|
||||
# Extract just the articles for batch summarization
|
||||
articles_to_summarize = [article for article, score in filtered_articles]
|
||||
summaries_dict = await summarizer.summarize_batch(articles_to_summarize)
|
||||
|
||||
# Summarize filtered articles
|
||||
logger.info("Generating AI summaries...")
|
||||
# Create digest entries with summaries
|
||||
digest_entries = []
|
||||
|
||||
for article, score in filtered_articles:
|
||||
summary = await summarizer.summarize(article)
|
||||
summary = summaries_dict[article.id]
|
||||
|
||||
# Update database
|
||||
await db.update_article_processing(
|
||||
@@ -116,8 +109,7 @@ async def main():
|
||||
article.id, relevance_score=0.0, ai_summary="", included=False
|
||||
)
|
||||
|
||||
# Generate email
|
||||
logger.info("Generating email digest...")
|
||||
# Generate email (silently)
|
||||
generator = EmailGenerator()
|
||||
|
||||
date_str = datetime.now().strftime("%A, %B %d, %Y")
|
||||
@@ -127,16 +119,11 @@ async def main():
|
||||
digest_entries, date_str, subject
|
||||
)
|
||||
|
||||
# Send email
|
||||
logger.info("Sending email...")
|
||||
# Send email (silently)
|
||||
sender = EmailSender()
|
||||
success = sender.send(subject, html_content, text_content)
|
||||
|
||||
if success:
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Daily digest sent successfully with {len(digest_entries)} articles!")
|
||||
logger.info("=" * 60)
|
||||
else:
|
||||
if not success:
|
||||
logger.error("Failed to send email")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -58,7 +58,7 @@ class Database:
|
||||
|
||||
await db.commit()
|
||||
|
||||
logger.info(f"Database initialized at {self.db_path}")
|
||||
logger.debug(f"Database initialized at {self.db_path}")
|
||||
|
||||
async def article_exists(self, article_id: str) -> bool:
|
||||
"""Check if article already exists in database"""
|
||||
@@ -173,7 +173,7 @@ class Database:
|
||||
await db.commit()
|
||||
|
||||
if deleted > 0:
|
||||
logger.info(f"Cleaned up {deleted} old articles")
|
||||
logger.debug(f"Cleaned up {deleted} old articles")
|
||||
|
||||
def _row_to_article(self, row: aiosqlite.Row) -> Article:
|
||||
"""Convert database row to Article model"""
|
||||
|
||||
Reference in New Issue
Block a user