For this task, our objective is to visualize the key points and themes from a public lecture delivered by the Secretary General of the National Energy Council. This lecture, held on May 29, 2024, at FISIP UPNVJ, covers critical topics related to energy policy and strategy. To achieve this, we first attended and recorded the lecture. Here is the recording file: Kuliah umum.mp3.
Next, we transcribed the recording into a text file, which can be accessed here: transkrip.txt. Using this transcript, we created a word cloud visualization to highlight the most frequently discussed words and phrases from the lecture. This visualization provides a quick and impactful way to understand the main topics and issues emphasized by the speaker, offering valuable insights into the current discourse on national energy policies.
# Load the required libraries
library(tm)
library(wordcloud)
library(RColorBrewer)
# Read the text file
text <- readLines("resources/transkrip.txt", encoding = "UTF-8")
# Create a corpus
corpus <- Corpus(VectorSource(text))
# Define a list of Indonesian stopwords
stopwords_id <- c("yang", "dan", "di", "ke", "dari", "ini", "itu", "dengan", "untuk", "pada", "adalah", "sebagai", "dalam", "tidak", "akan", "atau", "juga", "oleh", "sudah", "kami", "karena", "pada", "tersebut", "dapat", "antara", "saya", "ia", "namun", "lebih", "seperti", "bagi", "oleh", "mereka", "setelah", "jika", "saat", "hingga", "kami", "kita", "terus", "speaker", "1", "0")
# Text cleansing
corpus <- tm_map(corpus, content_transformer(tolower)) # Convert to lowercase
corpus <- tm_map(corpus, removePunctuation) # Remove punctuation
corpus <- tm_map(corpus, removeNumbers) # Remove numbers
corpus <- tm_map(corpus, removeWords, stopwords_id) # Remove Indonesian stopwords
corpus <- tm_map(corpus, stripWhitespace) # Strip whitespace
# Create a term-document matrix
tdm <- TermDocumentMatrix(corpus)
# Convert the term-document matrix into a matrix
m <- as.matrix(tdm)
# Get word frequencies
word_freqs <- sort(rowSums(m), decreasing = TRUE)
# Create a data frame with words and their frequencies
word_freqs_df <- data.frame(word = names(word_freqs), freq = word_freqs)
# Generate the word cloud
set.seed(1234) # for reproducibility
wordcloud(words = word_freqs_df$word, freq = word_freqs_df$freq,
min.freq = 2, max.words = 200,
random.order = FALSE, rot.per = 0.35,
colors = brewer.pal(8, "Dark2"))