HashtagScraper¶
Scrapes tweets containing specific hashtags.
Import¶
Class Signature¶
class HashtagScraper:
def __init__(
self,
browser_manager: BrowserManager,
rate_limiter: Optional[RateLimiter] = None
)
Parameters¶
| Parameter | Type | Default | Description |
|---|---|---|---|
browser_manager | BrowserManager | Required | Browser manager instance |
rate_limiter | Optional[RateLimiter] | None | Rate limiter instance |
Methods¶
| Method | Returns | Description |
|---|---|---|
scrape(hashtag, limit) | ScrapeResult[Tweet] | Get tweets with hashtag |
scrape_latest(hashtag) | ScrapeResult[Tweet] | Latest hashtag tweets |
scrape_top(hashtag) | ScrapeResult[Tweet] | Top/popular tweets |
scrape_multiple(hashtags) | Dict[str, ScrapeResult] | Multiple hashtags |
get_stats(hashtag) | HashtagStats | Hashtag statistics |
scrape¶
async def scrape(
self,
hashtag: str,
limit: int = 100,
sort_by: str = "latest",
language: Optional[str] = None,
cursor: Optional[str] = None
) -> ScrapeResult[Tweet]
Scrape tweets containing a specific hashtag.
Parameters: - hashtag: Hashtag to search (with or without #) - limit: Maximum tweets to fetch - sort_by: Sort order (latest, top) - language: Filter by language code - cursor: Pagination cursor
scrape_multiple¶
async def scrape_multiple(
self,
hashtags: List[str],
limit_per_hashtag: int = 50
) -> Dict[str, ScrapeResult[Tweet]]
Scrape multiple hashtags in parallel.
get_stats¶
Get statistics about a hashtag.
HashtagStats Object¶
@dataclass
class HashtagStats:
hashtag: str # The hashtag
sample_size: int # Tweets analyzed
avg_likes: float # Average likes
avg_retweets: float # Average retweets
avg_replies: float # Average replies
top_authors: List[str] # Most active authors
related_hashtags: List[str] # Co-occurring hashtags
peak_hours: List[int] # Most active hours (UTC)
language_distribution: Dict # Languages breakdown
Usage Examples¶
Basic Hashtag Scraping¶
from xeepy import Xeepy
async def main():
async with Xeepy() as x:
result = await x.scrape.hashtag("#python", limit=100)
for tweet in result.items:
print(f"@{tweet.author.username}: {tweet.text[:80]}...")
print(f" ❤️ {tweet.like_count} | 🔄 {tweet.retweet_count}")
asyncio.run(main())
Latest vs Top Tweets¶
from xeepy import Xeepy
async def main():
async with Xeepy() as x:
# Get latest tweets
latest = await x.scrape.hashtag(
"#AI",
limit=50,
sort_by="latest"
)
# Get top/popular tweets
top = await x.scrape.hashtag(
"#AI",
limit=50,
sort_by="top"
)
print(f"Latest avg likes: {sum(t.like_count for t in latest.items) / len(latest.items):.0f}")
print(f"Top avg likes: {sum(t.like_count for t in top.items) / len(top.items):.0f}")
asyncio.run(main())
Multiple Hashtags¶
from xeepy import Xeepy
async def main():
async with Xeepy() as x:
hashtags = ["#python", "#javascript", "#rust", "#golang"]
results = await x.scrape.hashtags(hashtags, limit_per_hashtag=100)
for hashtag, result in results.items():
avg_likes = sum(t.like_count for t in result.items) / len(result.items)
print(f"{hashtag}: {len(result.items)} tweets, avg {avg_likes:.0f} likes")
asyncio.run(main())
Hashtag Analytics¶
from xeepy import Xeepy
async def analyze_hashtag(hashtag: str):
async with Xeepy() as x:
stats = await x.scrape.hashtag_stats(hashtag, sample_size=200)
print(f"Hashtag Analysis: {stats.hashtag}")
print("=" * 50)
print(f"Sample size: {stats.sample_size} tweets")
print(f"Avg likes: {stats.avg_likes:.1f}")
print(f"Avg retweets: {stats.avg_retweets:.1f}")
print(f"Avg replies: {stats.avg_replies:.1f}")
print(f"\nTop authors: {', '.join(stats.top_authors[:5])}")
print(f"Related hashtags: {', '.join(stats.related_hashtags[:5])}")
print(f"Peak hours (UTC): {stats.peak_hours[:5]}")
asyncio.run(analyze_hashtag("#MachineLearning"))
Filter by Language¶
from xeepy import Xeepy
async def main():
async with Xeepy() as x:
# English tweets only
result = await x.scrape.hashtag(
"#news",
limit=200,
language="en"
)
# Spanish tweets
result_es = await x.scrape.hashtag(
"#noticias",
limit=200,
language="es"
)
print(f"English tweets: {len(result.items)}")
print(f"Spanish tweets: {len(result_es.items)}")
asyncio.run(main())
Find Trending Content¶
from xeepy import Xeepy
from datetime import datetime, timedelta
async def find_viral_hashtag_content(hashtag: str, min_likes: int = 1000):
async with Xeepy() as x:
result = await x.scrape.hashtag(hashtag, limit=500)
viral = [
t for t in result.items
if t.like_count >= min_likes
]
viral.sort(key=lambda t: t.like_count, reverse=True)
print(f"Viral tweets for {hashtag} (>{min_likes} likes):")
for tweet in viral[:10]:
print(f"\n@{tweet.author.username} ({tweet.like_count:,} likes)")
print(f" {tweet.text[:100]}...")
asyncio.run(find_viral_hashtag_content("#startup", min_likes=500))
See Also¶
- Tweet Model - Tweet data structure
- SearchScraper - General search
- FollowActions - Follow by hashtag