Trend Prediction¶
Use data analysis to predict emerging trends before they go mainstream.
Early Trend Detection¶
import asyncio
from xeepy import Xeepy
from datetime import datetime, timedelta
from collections import Counter, defaultdict
import re
class TrendPredictor:
"""Detect emerging trends before they peak"""
def __init__(self):
self.keyword_history = defaultdict(list)
self.velocity_threshold = 2.0 # 2x growth = trending
async def scan_niche(
self,
seed_keywords: list,
niche_accounts: list,
hours: int = 24
):
"""
Scan a niche for emerging keywords and topics.
Strategy:
1. Monitor seed keywords for new associated terms
2. Track what niche leaders are talking about
3. Identify keywords with accelerating velocity
"""
async with Xeepy() as x:
all_text = []
# Gather recent content from niche
for keyword in seed_keywords:
results = await x.scrape.search(
keyword,
limit=200,
since_hours=hours
)
all_text.extend([r.text for r in results])
# Get tweets from niche leaders
for account in niche_accounts:
tweets = await x.scrape.tweets(account, limit=50)
all_text.extend([t.text for t in tweets])
# Extract and count keywords
keywords = self._extract_keywords(all_text)
# Calculate velocity for each keyword
trending = []
for keyword, count in keywords.most_common(100):
velocity = self._calculate_velocity(keyword, count)
if velocity >= self.velocity_threshold:
trending.append({
"keyword": keyword,
"mentions": count,
"velocity": velocity,
"predicted_peak": self._predict_peak(velocity)
})
return sorted(trending, key=lambda x: -x["velocity"])
def _extract_keywords(self, texts: list) -> Counter:
"""Extract meaningful keywords from text"""
keywords = Counter()
# Stop words to ignore
stop_words = {
"the", "a", "an", "is", "are", "was", "were", "be", "been",
"have", "has", "had", "do", "does", "did", "will", "would",
"could", "should", "may", "might", "must", "this", "that",
"these", "those", "i", "you", "he", "she", "it", "we", "they",
"what", "which", "who", "when", "where", "why", "how", "all",
"each", "every", "both", "few", "more", "most", "other", "some",
"such", "no", "not", "only", "own", "same", "so", "than", "too",
"very", "just", "can", "now", "new", "one", "get", "got", "like",
"make", "know", "think", "see", "come", "want", "use", "find",
"give", "tell", "try", "leave", "call", "keep", "let", "begin",
"seem", "help", "show", "hear", "play", "run", "move", "live",
"believe", "bring", "happen", "write", "sit", "stand", "lose",
"pay", "meet", "include", "continue", "set", "learn", "change",
"lead", "understand", "watch", "follow", "stop", "create", "speak",
"read", "spend", "grow", "open", "walk", "win", "teach", "offer",
"remember", "consider", "appear", "buy", "wait", "serve", "die",
"send", "build", "stay", "fall", "cut", "reach", "kill", "remain",
"https", "http", "com", "www", "amp", "rt"
}
for text in texts:
# Clean and tokenize
text = text.lower()
text = re.sub(r'https?://\S+', '', text) # Remove URLs
text = re.sub(r'@\w+', '', text) # Remove mentions
words = re.findall(r'\b[a-z]{3,15}\b', text)
for word in words:
if word not in stop_words:
keywords[word] += 1
# Extract hashtags separately (high signal)
hashtags = re.findall(r'#(\w+)', text)
for tag in hashtags:
keywords[f"#{tag.lower()}"] += 2 # Weight hashtags higher
return keywords
def _calculate_velocity(self, keyword: str, current_count: int) -> float:
"""Calculate growth velocity of a keyword"""
history = self.keyword_history[keyword]
if not history:
# First observation
self.keyword_history[keyword].append({
"timestamp": datetime.now(),
"count": current_count
})
return 1.0
# Compare to previous observation
last = history[-1]
hours_elapsed = (datetime.now() - last["timestamp"]).total_seconds() / 3600
if hours_elapsed < 1:
return 1.0
# Calculate hourly growth rate
if last["count"] > 0:
growth_rate = current_count / last["count"]
else:
growth_rate = current_count
# Normalize by time
velocity = growth_rate / max(hours_elapsed, 1)
# Update history
history.append({
"timestamp": datetime.now(),
"count": current_count
})
# Keep last 10 observations
self.keyword_history[keyword] = history[-10:]
return velocity
def _predict_peak(self, velocity: float) -> str:
"""Estimate when trend will peak"""
if velocity >= 5.0:
return "6-12 hours"
elif velocity >= 3.0:
return "12-24 hours"
elif velocity >= 2.0:
return "24-48 hours"
else:
return "48+ hours"
# Usage
async def find_emerging_trends():
predictor = TrendPredictor()
trends = await predictor.scan_niche(
seed_keywords=["AI", "GPT", "LLM", "machine learning"],
niche_accounts=["OpenAI", "AnthropicAI", "GoogleAI"],
hours=24
)
print("🔮 EMERGING TRENDS")
print("=" * 60)
for trend in trends[:15]:
peak = trend["predicted_peak"]
velocity = trend["velocity"]
# Visual indicator
if velocity >= 5:
indicator = "🔥🔥🔥"
elif velocity >= 3:
indicator = "🔥🔥"
else:
indicator = "🔥"
print(f"\n{indicator} {trend['keyword']}")
print(f" Mentions: {trend['mentions']}")
print(f" Velocity: {velocity:.1f}x")
print(f" Peak in: {peak}")
asyncio.run(find_emerging_trends())
Hashtag Trend Analysis¶
async def analyze_hashtag_trajectory(hashtag: str, days: int = 7):
"""
Analyze a hashtag's growth trajectory to predict if it will trend.
"""
async with Xeepy() as x:
# Get historical data by searching different time periods
time_series = []
for day_offset in range(days, 0, -1):
since = datetime.now() - timedelta(days=day_offset)
until = datetime.now() - timedelta(days=day_offset-1)
results = await x.scrape.search(
f"#{hashtag}",
since=since.strftime("%Y-%m-%d"),
until=until.strftime("%Y-%m-%d"),
limit=500
)
time_series.append({
"date": since.date(),
"count": len(results),
"avg_engagement": sum(r.like_count + r.retweet_count for r in results) / max(len(results), 1)
})
# Analyze trajectory
counts = [t["count"] for t in time_series]
# Calculate trend direction
if len(counts) >= 3:
recent_avg = sum(counts[-3:]) / 3
earlier_avg = sum(counts[:3]) / 3
if earlier_avg > 0:
growth_rate = (recent_avg - earlier_avg) / earlier_avg * 100
else:
growth_rate = 100 if recent_avg > 0 else 0
else:
growth_rate = 0
# Predict future
if growth_rate > 50:
prediction = "📈 ACCELERATING - Likely to trend soon"
elif growth_rate > 20:
prediction = "↗️ GROWING - Building momentum"
elif growth_rate > -10:
prediction = "➡️ STABLE - Consistent usage"
elif growth_rate > -30:
prediction = "↘️ DECLINING - Losing interest"
else:
prediction = "📉 FALLING - Trend is over"
print(f"\n#{hashtag} Trajectory Analysis")
print("=" * 50)
print(f"7-day Growth: {growth_rate:+.1f}%")
print(f"Prediction: {prediction}")
print(f"\nDaily breakdown:")
for t in time_series:
bar = "█" * min(int(t["count"] / 10), 30)
print(f" {t['date']}: {bar} {t['count']}")
return {
"hashtag": hashtag,
"time_series": time_series,
"growth_rate": growth_rate,
"prediction": prediction
}
asyncio.run(analyze_hashtag_trajectory("AIagents"))
Viral Content Predictor¶
async def predict_viral_potential(tweet_url: str):
"""
Analyze a tweet's early metrics to predict viral potential.
Based on research: Tweets that go viral typically show
specific patterns in their first 1-2 hours.
"""
async with Xeepy() as x:
tweet = await x.scrape.tweet(tweet_url)
author = await x.scrape.profile(tweet.author_username)
# Calculate metrics
hours_since_post = (datetime.now() - tweet.created_at).total_seconds() / 3600
if hours_since_post < 0.5:
print("⚠️ Too early to predict (wait 30+ minutes)")
return None
# Engagement velocity
likes_per_hour = tweet.like_count / max(hours_since_post, 0.5)
retweets_per_hour = tweet.retweet_count / max(hours_since_post, 0.5)
replies_per_hour = tweet.reply_count / max(hours_since_post, 0.5)
# Relative to author's typical performance
expected_likes = author.followers_count * 0.001 # ~0.1% engagement baseline
performance_ratio = tweet.like_count / max(expected_likes, 1)
# Viral signals
signals = []
score = 0
# High velocity
if likes_per_hour > 100:
signals.append("🔥 High like velocity (100+/hr)")
score += 30
elif likes_per_hour > 50:
signals.append("✨ Good like velocity (50+/hr)")
score += 20
# Retweet ratio (retweets spread content)
rt_ratio = tweet.retweet_count / max(tweet.like_count, 1)
if rt_ratio > 0.3:
signals.append("🔄 High retweet ratio (spreading)")
score += 25
elif rt_ratio > 0.15:
signals.append("📤 Good retweet ratio")
score += 15
# Reply engagement (conversations = algorithm boost)
if replies_per_hour > 20:
signals.append("💬 High reply engagement")
score += 20
# Outperforming baseline
if performance_ratio > 10:
signals.append("📊 10x above baseline!")
score += 25
elif performance_ratio > 5:
signals.append("📈 5x above baseline")
score += 15
# Quote tweets (high signal)
if tweet.quote_count > tweet.retweet_count * 0.2:
signals.append("💭 High quote engagement")
score += 10
# Predict outcome
if score >= 70:
prediction = "🚀 HIGH VIRAL POTENTIAL - Act now!"
recommendation = "Engage immediately, ride the wave"
elif score >= 50:
prediction = "📈 GOOD POTENTIAL - Worth watching"
recommendation = "Monitor closely, engage if relevant"
elif score >= 30:
prediction = "🌱 MODERATE POTENTIAL"
recommendation = "May grow slowly, selective engagement"
else:
prediction = "📊 LOW VIRAL POTENTIAL"
recommendation = "Standard content, normal engagement"
print(f"\n🔮 VIRAL POTENTIAL ANALYSIS")
print("=" * 50)
print(f"Tweet: {tweet.text[:100]}...")
print(f"Author: @{tweet.author_username} ({author.followers_count:,} followers)")
print(f"Age: {hours_since_post:.1f} hours")
print(f"\nCurrent Metrics:")
print(f" ❤️ {tweet.like_count:,} likes ({likes_per_hour:.0f}/hr)")
print(f" 🔄 {tweet.retweet_count:,} retweets ({retweets_per_hour:.0f}/hr)")
print(f" 💬 {tweet.reply_count:,} replies ({replies_per_hour:.0f}/hr)")
print(f"\nViral Signals:")
for signal in signals:
print(f" {signal}")
print(f"\nViral Score: {score}/100")
print(f"Prediction: {prediction}")
print(f"Recommendation: {recommendation}")
return {
"score": score,
"prediction": prediction,
"signals": signals,
"metrics": {
"likes_per_hour": likes_per_hour,
"retweets_per_hour": retweets_per_hour,
"performance_ratio": performance_ratio
}
}
asyncio.run(predict_viral_potential("https://x.com/user/status/123"))
Topic Lifecycle Prediction¶
async def analyze_topic_lifecycle(topic: str):
"""
Determine where a topic is in its lifecycle:
- Emerging (early adopters)
- Growing (rapid adoption)
- Mainstream (peak awareness)
- Declining (interest waning)
- Niche (stable but small)
"""
async with Xeepy() as x:
# Get tweets from different time periods
now = datetime.now()
periods = []
for weeks_ago in [4, 3, 2, 1, 0]:
since = now - timedelta(weeks=weeks_ago+1)
until = now - timedelta(weeks=weeks_ago) if weeks_ago > 0 else now
results = await x.scrape.search(
topic,
since=since.strftime("%Y-%m-%d"),
until=until.strftime("%Y-%m-%d"),
limit=500
)
# Analyze this period
unique_authors = len(set(r.author_username for r in results))
avg_likes = sum(r.like_count for r in results) / max(len(results), 1)
periods.append({
"week": weeks_ago,
"volume": len(results),
"unique_authors": unique_authors,
"avg_engagement": avg_likes
})
# Analyze trajectory
volumes = [p["volume"] for p in periods]
authors = [p["unique_authors"] for p in periods]
# Week-over-week growth
if volumes[-2] > 0:
volume_growth = (volumes[-1] - volumes[-2]) / volumes[-2] * 100
else:
volume_growth = 100 if volumes[-1] > 0 else 0
# Author diversity (more authors = broader adoption)
author_ratio = authors[-1] / max(volumes[-1], 1) * 100 # % unique
# Determine lifecycle stage
if volume_growth > 50 and author_ratio > 50:
stage = "🚀 EMERGING"
advice = "Get in early! High potential for first-mover advantage."
elif volume_growth > 20 and volumes[-1] > volumes[0]:
stage = "📈 GROWING"
advice = "Active growth phase. Good time to establish presence."
elif -10 <= volume_growth <= 20 and volumes[-1] > 100:
stage = "🎯 MAINSTREAM"
advice = "Peak awareness. Content here needs to be exceptional."
elif volume_growth < -20:
stage = "📉 DECLINING"
advice = "Interest waning. Consider pivoting to related topics."
else:
stage = "🔬 NICHE"
advice = "Small but stable audience. Good for targeted engagement."
print(f"\n📊 TOPIC LIFECYCLE ANALYSIS: {topic}")
print("=" * 50)
print(f"\nLifecycle Stage: {stage}")
print(f"Volume Growth: {volume_growth:+.1f}%")
print(f"Author Diversity: {author_ratio:.0f}%")
print(f"\nWeekly Volume:")
for p in periods:
bar = "█" * min(int(p["volume"] / 10), 30)
week_label = "This week" if p["week"] == 0 else f"{p['week']} weeks ago"
print(f" {week_label:15}: {bar} {p['volume']}")
print(f"\n💡 Advice: {advice}")
return {
"topic": topic,
"stage": stage,
"volume_growth": volume_growth,
"advice": advice,
"periods": periods
}
asyncio.run(analyze_topic_lifecycle("AI agents"))
Continuous Trend Monitoring¶
async def continuous_trend_monitor(
niches: dict, # {"niche_name": {"keywords": [...], "accounts": [...]}}
check_interval_hours: int = 6
):
"""
Run continuous trend monitoring across multiple niches.
"""
predictor = TrendPredictor()
async with Xeepy() as x:
while True:
all_trends = {}
for niche_name, config in niches.items():
print(f"\n🔍 Scanning {niche_name}...")
trends = await predictor.scan_niche(
seed_keywords=config["keywords"],
niche_accounts=config["accounts"],
hours=24
)
all_trends[niche_name] = trends[:10] # Top 10 per niche
# Generate report
report = f"""
🔮 TREND PREDICTION REPORT
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}
{'='*50}
"""
for niche, trends in all_trends.items():
report += f"\n\n📊 {niche.upper()}\n"
report += "-" * 30 + "\n"
for t in trends[:5]:
velocity = t["velocity"]
emoji = "🔥" if velocity >= 3 else "✨" if velocity >= 2 else "📈"
report += f"{emoji} {t['keyword']}: {velocity:.1f}x velocity\n"
# Send to notifications
await x.notify.discord(
webhook_url="...",
content=report
)
print(report)
print(f"\n⏰ Next scan in {check_interval_hours} hours...")
await asyncio.sleep(check_interval_hours * 3600)
# Configure and run
niches = {
"AI/ML": {
"keywords": ["AI", "GPT", "LLM", "machine learning"],
"accounts": ["OpenAI", "AnthropicAI"]
},
"Crypto": {
"keywords": ["Bitcoin", "Ethereum", "DeFi", "Web3"],
"accounts": ["VitalikButerin", "caborik"]
},
"Startups": {
"keywords": ["startup", "founder", "YC", "Series A"],
"accounts": ["ycombinator", "paulg"]
}
}
asyncio.run(continuous_trend_monitor(niches, check_interval_hours=6))
Best Practices¶
Trend Prediction Tips
- Velocity matters more than absolute numbers
- Track multiple signals (volume, authors, engagement)
- Compare within niches, not across them
- Act fast on emerging trends
- Validate with multiple data points
Limitations
- Past performance doesn't guarantee future trends
- External events can cause sudden shifts
- Sample sizes may be limited
- Use as one input, not the only decision factor
Next Steps¶
Viral Content Hunting - Find viral content early
Network Analysis - Map influence networks
Sentiment Analysis - Understand market mood