Saturday, December 16, 2023

tag generator

from nltk.tokenize import word_tokenize from nltk.corpus import stopwords import spacy from collections import Counter # Define stopwords and maximum number of hashtags stop_words = stopwords.words("english") max_hashtags = 5 # Load spaCy model nlp = spacy.load("en_core_web_sm") def generate_hashtags(text): """ Generates hashtags from a given text. Args: text: The text to analyze. Returns: A list of generated hashtags. """ # Lowercase and tokenize the text tokens = word_tokenize(text.lower()) # Remove stopwords filtered_tokens = [token for token in tokens if token not in stop_words] # Extract named entities doc = nlp(" ".join(filtered_tokens)) entities = [str(ent) for ent in doc.ents] # Combine keywords and entities potential_hashtags = filtered_tokens + entities # Remove invalid characters and duplicates valid_hashtags = [ hashtag.replace(" ", "_") for hashtag in potential_hashtags if hashtag.isalnum() and len(hashtag) <= 25 ] valid_hashtags = list(set(valid_hashtags)) # Count occurrences and select top hashtags hashtag_counts = Counter(valid_hashtags) top_hashtags = hashtag_counts.most_common(max_hashtags) # Return a list of top hashtags return [hashtag for hashtag, count in top_hashtags] # Example usage text = "This is a beautiful article about the Great Barrier Reef and its diverse marine life." hashtags = generate_hashtags(text) print(f"Generated hashtags: {hashtags}")

No comments:

Post a Comment

screan recoder

Screen Recorder Screen Recorder Tool Start Recording Stop Recording...