Monday 1 January 2018

Natural Language Toolkit (NLTK) python setup for Window

How to install/configure NLTK with window system. You can follow below steps :

First of check which Python version has been installed



If you couldn't configured yet, then you can download (https://www.python.org/downloads) & install first from given path.

Let's start installation process. Let install dumpy using following command



Then, install NLTK using following command



Now, download NLTK packages using below command

>>> import nltk



Once you fire download command then it will open a installer dialog from where you have to select your packages to install. You can select all to download all package.




Once download complete. It will show like this



Once installed all packages then you click on close to return to the command prompt.

prompt will show true

Now, you can verify whether it's successfully install or not.



Yeah! It's work great!. You can post your query or installing issue!

4 comments:

  1. Important Links

    Use for grammar detection
    http://rwet.decontextualize.com/book/textblob/

    Beautiful parse
    https://www.dataquest.io/blog/web-scraping-tutorial-python/

    Beautiful Soup Collecting data
    https://www.dataquest.io/blog/web-scraping-beautifulsoup/

    ReplyDelete
  2. Sample for website scraping

    import urllib3
    import nltk
    from bs4 import BeautifulSoup
    from nltk.collections import *
    import requests
    import dateparser

    prefixes = ["jan.", "feb.", "mar.", "apr.", "may.", "jun.", "jul.", "sept.", "oct.", "nov.", "dec.", "january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"]

    #Fucaton will check wheter it's month valid data or not eg. "Feb. 1:"
    #Return 1 = success, 0 = fail
    def check_start_with_syntax(p_tags_text):
    if p_tags_text.lower().startswith(tuple(prefixes)):
    return 1
    else:
    return 0

    #Fuction will parse the date e.g Feb.1, July 29/30
    #return start date yyyy/mm/dd hh:mm:ss end date yyyy/mm/dd hh:mm:ss
    def prase_event_date(p_tags_text):
    event_parse_dt = 'un-formated'
    event_parse_dt = p_tags_text[0:p_tags_text.index(':')]
    #Check for start date & end date
    #Let check date with this formate 29/30
    if(event_parse_dt.find('/')==-1):
    return dateparser.parse(event_parse_dt), dateparser.parse(event_parse_dt)
    else:
    event_parse_month_temp = str(event_parse_dt[0:p_tags_text.index(' ')]).strip()
    event_parse_days_temp = str(event_parse_dt.replace(event_parse_month_temp,'')).strip()
    event_parse_day_split = event_parse_days_temp.split('/')
    return dateparser.parse(str(event_parse_month_temp+' '+event_parse_day_split[0])), dateparser.parse(str(event_parse_month_temp+' '+event_parse_day_split[1]))


    url = "https://calendar.html"
    request = requests.get(url)

    #This will print
    #print(request)

    #This will print the status code >> 200
    #print(request.status_code)

    reponse_data = request.text
    soup = BeautifulSoup(reponse_data,"html.parser")

    #Get full article
    article_containers = soup.find_all('div',class_ = 'article-content')
    print(len(article_containers))

    #Get all praragraph
    article_all_paragraph = soup.find_all('p')
    print(len(article_all_paragraph))

    #Find all P
    for p_tags in soup.find_all('p'):
    #result = check_start_with_syntax(p_tags.get_text())
    if(check_start_with_syntax(str(p_tags.get_text()).strip())==1):
    print('\n')
    start_date, end_date = prase_event_date(str(p_tags.get_text()).strip())
    print(start_date)
    print(end_date)
    print(p_tags.get_text())
    else:
    #Nothing to do
    nothing = 'nothing to do'















    ReplyDelete
  3. import urllib3
    import nltk
    from bs4 import BeautifulSoup
    from nltk.collections import *
    import requests
    import dateparser

    prefixes = ["jan.", "feb.", "mar.", "apr.", "may.", "jun.", "jul.", "sept.", "oct.", "nov.", "dec.", "january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"]

    #Fucaton will check wheter it's month valid data or not eg. "Feb. 1:"
    #Return 1 = success, 0 = fail
    def check_start_with_syntax(p_tags_text):
    if p_tags_text.lower().startswith(tuple(prefixes)):
    return 1
    else:
    return 0

    #Fuction will parse the date e.g Feb.1, July 29/30
    #return start date yyyy/mm/dd hh:mm:ss end date yyyy/mm/dd hh:mm:ss
    def prase_event_date(p_tags_text):
    event_parse_dt = 'un-formated'
    print(p_tags_text)
    if(p_tags_text.find(':')==-1):
    return dateparser.parse(event_parse_dt), dateparser.parse(event_parse_dt)
    else:
    event_parse_dt = p_tags_text[0:p_tags_text.find(':')]
    #Check for start date & end date
    #Let check date with this formate 29/30
    if(event_parse_dt.find('/')==-1):
    return dateparser.parse(event_parse_dt), dateparser.parse(event_parse_dt)
    else:
    event_parse_month_temp = str(event_parse_dt[0:p_tags_text.index(' ')]).strip()
    event_parse_days_temp = str(event_parse_dt.replace(event_parse_month_temp,'')).strip()
    event_parse_day_split = event_parse_days_temp.split('/')
    return dateparser.parse(str(event_parse_month_temp+' '+event_parse_day_split[0])), dateparser.parse(str(event_parse_month_temp+' '+event_parse_day_split[1]))





    #Let code begin
    filepath = 'InputText.txt'
    with open(filepath) as fp:
    line = fp.readline()
    while line:
    line = fp.readline()
    if(len(line)!=0):
    #print(line.strip())
    if(check_start_with_syntax(str(line).strip())==1):
    print('\n')
    start_date, end_date = prase_event_date(str(line).strip())
    print(start_date)
    print(end_date)
    print(str(line).strip())
    else:
    #Nothing to do
    nothing = 'nothing to do'



    url = "https://www.space.com/32286-space-calendar.html"
    request = requests.get(url)

    #This will print
    print(request)

    #This will print the status code >> 200
    #print(request.status_code)

    reponse_data = request.text
    soup = BeautifulSoup(reponse_data,"html.parser")

    #Get full article
    article_containers = soup.find_all('div',class_ = 'article-content')
    print(len(article_containers))

    #Get all praragraph
    article_all_paragraph = soup.find_all('p')
    print(len(article_all_paragraph))

    #Find all P
    #for p_tags in soup.find_all('p'):
    # #result = check_start_with_syntax(p_tags.get_text())
    # if(check_start_with_syntax(str(p_tags.get_text()).strip())==1):
    # print('\n')
    # start_date, end_date = prase_event_date(str(p_tags.get_text()).strip())
    # print(start_date)
    # print(end_date)
    # print(p_tags.get_text())
    # else:
    # #Nothing to do
    # nothing = 'nothing to do'















    ReplyDelete
  4. How to parse words

    import nltk
    from nltk.util import ngrams
    from collections import Counter
    from itertools import chain

    wordSet="gmt a.m at"
    n = 1
    ngrams1= ngrams(wordSet.split(" "), n)

    #Let code begin
    filepath = 'InputText.txt'
    with open(filepath) as fp:
    line = fp.readline()
    while line:
    line = fp.readline()
    if(len(line)!=0):
    #print(line.strip())
    ngrams2= ngrams(line.strip().split(" "), n)
    counter= Counter(chain(ngrams2,ngrams1))
    print([k[0] for k,v in counter.items() if v>1])















    ReplyDelete