Extracting News From Top News API’s and Visualizing Top News Keywords Using Word Cloud Using Python
- Import the necessary libraries:
import requests
from wordcloud import WordCloud
import matplotlib.pyplot as plt
2. Define a function called Topnews() that will extract the top news from multiple news APIs and create a word cloud:
def Topnews():
# API keys for multiple news sources
api_keys = {
"bbc": "Api_number",
"cnn": "Api_number",
"fox": "Api_number"
}
This dictionary contains the API keys for multiple news sources. You can replace these keys with your own API keys.
# URLs for multiple news sources
urls = {
"bbc": "https://newsapi.org/v1/articles?source=bbc-news&sortBy=top&apiKey=" + api_keys["bbc"],
"cnn": "https://newsapi.org/v1/articles?source=cnn&sortBy=top&apiKey=" + api_keys["cnn"],
"ap": "https://newsapi.org/v1/articles?source=associated-press&sortBy=top&apiKey=" + api_keys["fox"]
}
This dictionary contains the URLs for multiple news sources. You can replace these URLs with your own URLs.
# Get the top news from each source
results = []
for source in urls:
open_page = requests.get(urls[source]).json()
article = open_page["articles"]
for ar in article:
results.append(ar["title"])
This loop gets the top news from each source and appends them to a list called results.
# Combine all the results into one string
text = ' '.join(results)
This line combines all the results into one string called text.
# Create the word cloud
wordcloud = WordCloud(width = 800, height = 800,
background_color ='white',
min_font_size = 10).generate(text)
This line creates a word cloud using the wordcloud library.
# Plot the WordCloud image
plt.figure(figsize = (8, 8), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()
This code plots the word cloud image using matplotlib.
3. Call the Topnews() function:
if __name__ == '__main__':
# Function call
Topnews()
Please find below full code snippet
import requests
from wordcloud import WordCloud
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
def Topnews():
# API keys for multiple news sources
api_keys = {
"bbc": "509ca54a17e9429b8b7cc33cc1c50aa3",
"cnn": "509ca54a17e9429b8b7cc33cc1c50aa3",
"ap": "509ca54a17e9429b8b7cc33cc1c50aa3"
}
# URLs for multiple news sources
urls = {
"bbc": "https://newsapi.org/v1/articles?source=bbc-news&sortBy=top&apiKey=" + api_keys["bbc"],
"cnn": "https://newsapi.org/v1/articles?source=cnn&sortBy=top&apiKey=" + api_keys["cnn"],
"ap": "https://newsapi.org/v1/articles?source=associated-press&sortBy=top&apiKey=" + api_keys["ap"]
}
# Get the top news from each source
results = []
for source in urls:
open_page = requests.get(urls[source]).json()
#print(open_page)
article = open_page["articles"]
for ar in article:
results.append(ar["title"])
# Combine all the results into one string
headlines_text = ' '.join(results)
#print(headlines_text)
# Create the word cloud
stopwords = set(STOPWORDS)
wordcloud = WordCloud(
background_color='black',
stopwords=stopwords,
max_words=100,
max_font_size=60,
scale=3)
wordcloud=wordcloud.generate(headlines_text)
fig = plt.figure(1, figsize=(12, 12))
plt.axis('off')
plt.imshow(wordcloud)
plt.show()
if __name__ == '__main__':
# Function call
Topnews()
Please find below github link
https://github.com/bashamsc/News_API_Extraction_Word_Cloud.git