Saturday, December 16, 2023
tag generator
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import spacy
from collections import Counter
# Define stopwords and maximum number of hashtags
stop_words = stopwords.words("english")
max_hashtags = 5
# Load spaCy model
nlp = spacy.load("en_core_web_sm")
def generate_hashtags(text):
"""
Generates hashtags from a given text.
Args:
text: The text to analyze.
Returns:
A list of generated hashtags.
"""
# Lowercase and tokenize the text
tokens = word_tokenize(text.lower())
# Remove stopwords
filtered_tokens = [token for token in tokens if token not in stop_words]
# Extract named entities
doc = nlp(" ".join(filtered_tokens))
entities = [str(ent) for ent in doc.ents]
# Combine keywords and entities
potential_hashtags = filtered_tokens + entities
# Remove invalid characters and duplicates
valid_hashtags = [
hashtag.replace(" ", "_")
for hashtag in potential_hashtags
if hashtag.isalnum() and len(hashtag) <= 25
]
valid_hashtags = list(set(valid_hashtags))
# Count occurrences and select top hashtags
hashtag_counts = Counter(valid_hashtags)
top_hashtags = hashtag_counts.most_common(max_hashtags)
# Return a list of top hashtags
return [hashtag for hashtag, count in top_hashtags]
# Example usage
text = "This is a beautiful article about the Great Barrier Reef and its diverse marine life."
hashtags = generate_hashtags(text)
print(f"Generated hashtags: {hashtags}")
Subscribe to:
Post Comments (Atom)
screan recoder
Screen Recorder Screen Recorder Tool Start Recording Stop Recording...
-
import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servl...
-
Screen Recorder Screen Recorder Tool Start Recording Stop Recording...
-
import random class ArticleGenerator: def __init__(self): self.templates = [ "The {adj1} {noun} {verb} over th...
No comments:
Post a Comment