NLP-prac

InvokerInvoker
1 min read
*POEM*

import nltk
from nltk import sent_tokenize
from nltk import word_tokenize
textfile = open('/content/mytext.txt')
text = textfile.read()
print(text)

words = word_tokenize(text)
words

nltk.download('punkt_tab')

#filtereing stop words
from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
filtered_lists = []
for w in words:
  if w.casefold() not in stop_words:
    filtered_lists.append(w)

print(filtered_lists)

#stemming
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
string_for_stemming=text
words = word_tokenize(string_for_stemming)
stemmed_words = [stemmer.stem(word) for word in words]
print(stemmed_words)

#parts of speech
from nltk import pos_tag
nltk.download('averaged_perceptron_tagger_eng')
string_for_pos = words
pos = pos_tag(string_for_pos)
print(pos)


nltk.download('tagsets_json')
nltk.help.upenn_tagset()

***********************STREAMLIT CODE**********************

import streamlit as st
import nltk
from nltk import sent_tokenize
from nltk import word_tokenize
nltk.download('punkt_tab')
from nltk.stem import PorterStemmer
from nltk import pos_tag
nltk.download('averaged_perceptron_tagger_eng')
st.success("INVOKER THE GREAT")

upload_file = st.file_uploader('upload a text file', type=['txt'])
if upload_file == True:
    st.success("Success")
    text = upload_file.read().decode("utf-8")
    st.text(text)

tokenize = st.button("tokenize")
if tokenize == True:
    text = upload_file.read().decode("utf-8")
    #st.text(text)
    words = word_tokenize(text)
    st.success(words)


stemmer = st.button("stemmer")
if stemmer == True:
    text = upload_file.read().decode("utf-8")
    #st.text(text)
    stemmer = PorterStemmer()
    string_for_stemming = text
    words = word_tokenize(string_for_stemming)
    stemmed_words = [stemmer.stem(word) for word in words]

    st.success(stemmed_words)

pos = st.button("pos")
if pos == True:
    text = upload_file.read().decode("utf-8")
    #st.text(text)
    words = word_tokenize(text)

    string_for_pos = words
    pos = pos_tag(string_for_pos)
    st.success(pos)
0
Subscribe to my newsletter

Read articles from Invoker directly inside your inbox. Subscribe to the newsletter, and don't miss out.

Written by

Invoker
Invoker