In [1]:
import requests
import pandas as pd
import re
import numpy as np
import sklearn 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import os
from argparse import ArgumentParser
from bs4 import BeautifulSoup
import urllib.request
from spellchecker import SpellChecker

In [2]:
url = 'https://www.menuco.co'

In [3]:
response = requests.get(url)

In [4]:
response

<Response [200]>

In [5]:
response.status_code

200

In [6]:
menuco_html = response.text

In [7]:
menuco_html

'<!DOCTYPE html>\r\n<html lang="en-US">\r\n<head itemscope="itemscope" itemtype="http://schema.org/WebSite">\r\n\r\n\t<meta charset="UTF-8">\r\n\t<meta name="viewport" content="width=device-width, initial-scale=1.0" />\r\n\t<meta http-equiv="X-UA-Compatible" content="IE=edge">\r\n\t<link rel="profile" href="https://gmpg.org/xfn/11">\r\n\t<meta name="mobile-web-app-capable" content="yes">\n<meta name="apple-mobile-web-app-capable" content="yes">\n<meta name="apple-mobile-web-app-title" content="Menuco Ana - Eat Joyfully">\n<link rel="pingback" href="https://menuco.co/xmlrpc.php" />\n<meta name=\'robots\' content=\'index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1\' />\n<meta itemprop="name" content="Menuco Ana" />\n<meta itemprop="url" content="https://menuco.co/" />\n\r\n\t<!-- This site is optimized with the Yoast SEO plugin v17.1 - https://yoast.com/wordpress/plugins/seo/ -->\r\n\t<title>Home Page - Menuco Ana</title>\r\n\t<link rel="canonical" href="https:

## Extract subpage urls

In [11]:
whole_list=[]
html_page = urllib.request.urlopen("http://www.menuco.co")
soup = BeautifulSoup(html_page, "html.parser")
for link in soup.findAll('a'):
    if link not in whole_list:
        whole_list.append(link.get('href'))
    else:
        pass
    #print(link.get('href'))

In [12]:
whole_list

['#primary',
 '#content',
 'https://menuco.co/',
 'https://menuco.co/',
 'https://menuco.co/subscription-plan',
 'https://menuco.co/contact',
 'https://menuco.co/login/',
 'https://menuco.co/register/',
 'https://menuco.co/',
 'https://menuco.co/',
 'https://menuco.co/subscription-plan',
 'https://menuco.co/contact',
 'https://menuco.co/login/',
 'https://menuco.co/register',
 'https://www.youtube.com/watch?v=KEc3aGjN228',
 '#',
 '#',
 '#',
 '#wrap',
 'https://menuco.co/privacy-policy/',
 'https://menuco.co/member-tos-page']

In [131]:
subpages = pd.DataFrame(whole_list, columns = ['link'])
subpages

Unnamed: 0,link
0,#primary
1,#content
2,https://menuco.co/
3,https://menuco.co/
4,https://menuco.co/subscription-plan
5,https://menuco.co/contact
6,https://menuco.co/login/
7,https://menuco.co/register/
8,https://menuco.co/
9,https://menuco.co/


In [24]:
subpages=subpages[subpages['link'].str.contains("//menuco.co/")]

In [25]:
subpages=subpages.link.unique()

In [26]:
subpages

array(['https://menuco.co/', 'https://menuco.co/subscription-plan',
       'https://menuco.co/contact', 'https://menuco.co/login/',
       'https://menuco.co/register/', 'https://menuco.co/register',
       'https://menuco.co/privacy-policy/',
       'https://menuco.co/member-tos-page'], dtype=object)

## Extract whole html including subpages

## Extract text from html

In [30]:
soup = BeautifulSoup(menuco_html, 'html.parser')

In [121]:
x=[]
for i in soup.find_all("p"):
    i.get_text()
    x.append(i.get_text())
        
    print(i.get_text())


The Next-Gen QR code menu system,
Scan and see the menu in your native language. 
							
  Forget the physical menus or traditional QR code apps, get ready for the future by Menuco…
Your customers only scan the barcode.
Menuco detects the language they prefer and shows the menu in that language.
Provide to your customers your menu in more than 90+ languages through Menuco.
Aren`t your all customers speaking with you in the same language? Then let them speak in their native languages. Menuco makes communication easier.
  See a sample menu
✓ 90+ Languages
✓  Speech translation (2 Hours/Month)
✓ First menu registration support
✓ Menu revisions support
✓ Fast support
✓ 90+ Languages
✓  Speech translation (15 Hours/Month)
✓ First menu registration support
✓ Menu revision support (2 Times/Year)
✓ Fast support
✓ 90+ Languages
✓  Speech translation (100 Hours/Month)
✓ First menu registration support
✓ Menu revision support (10 Times/Year)
✓ Fast support
  It`s easy to manage and use Menuco.


In [None]:
y=

In [122]:
x

['',
 'The Next-Gen QR code menu system,\nScan and see the menu in your native language. \n\t\t\t\t\t\t\t',
 '  Forget the physical menus or traditional QR code apps, get ready for the future by Menuco…',
 'Your customers only scan the barcode.',
 'Menuco detects the language they prefer and shows the menu in that language.',
 'Provide to your customers your menu in more than 90+ languages through Menuco.',
 'Aren`t your all customers speaking with you in the same language? Then let them speak in their native languages. Menuco makes communication easier.',
 '  See a sample menu',
 '✓ 90+ Languages',
 '✓ \xa0Speech translation (2 Hours/Month)',
 '✓ First menu registration support',
 '✓ Menu revisions support',
 '✓ Fast support',
 '✓ 90+ Languages',
 '✓\xa0 Speech translation (15 Hours/Month)',
 '✓ First menu registration support',
 '✓ Menu revision support (2 Times/Year)',
 '✓\xa0Fast support',
 '✓ 90+ Languages',
 '✓\xa0 Speech translation (100 Hours/Month)',
 '✓ First menu registratio

## Check grammar of the text

In [35]:
dir(SpellChecker)

['_SpellChecker__edit_distance_alt',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_case_sensitive',
 '_check_if_should_check',
 '_distance',
 '_tokenizer',
 '_word_frequency',
 'candidates',
 'correction',
 'distance',
 'edit_distance_1',
 'edit_distance_2',
 'export',
 'known',
 'languages',
 'split_words',
 'unknown',
 'word_frequency',
 'word_probability',
 'word_usage_frequency']

In [36]:
spell=SpellChecker()

In [38]:
text="Forget the physical menus or traditional QR code apps, get ready for the future by Menuco...
Your customers only scan the barcode.
Menuco detects the language they prefer and shows the menu in that language.
Provide to your customers your menu in more than 90+ languages through Menuco.
Aren`t your all customers speaking with you in the same language? Then let them speak in their native languages. Menuco makes communication easier."

SyntaxError: EOL while scanning string literal (Temp/ipykernel_22956/3977623322.py, line 1)

In [42]:
my_text="Menuco detects the languaga they prefer and shows the menu in that language."

In [39]:
docx=['calandar', 'misspel', 'necessary','bussiness', 'know']

In [43]:
for word in my_text:
    print(f'{word}:{spell.correction(word)}')

M:i
e:e
n:i
u:u
c:i
o:o
 :i
d:i
e:e
t:i
e:e
c:i
t:i
s:i
 :i
t:i
h:i
e:e
 :i
l:i
a:a
n:i
g:i
u:u
a:a
g:i
a:a
 :i
t:i
h:i
e:e
y:y
 :i
p:i
r:i
e:e
f:i
e:e
r:i
 :i
a:a
n:i
d:i
 :i
s:i
h:i
o:o
w:i
s:i
 :i
t:i
h:i
e:e
 :i
m:i
e:e
n:i
u:u
 :i
i:i
n:i
 :i
t:i
h:i
a:a
t:i
 :i
l:i
a:a
n:i
g:i
u:u
a:a
g:i
e:e
.:.


In [41]:
for word in docx:
    print(f'{word}:{spell.candidates(word)}')

calandar:{'calendar'}
misspel:{'misspelt', 'misspell'}
necessary:{'necessary'}
bussiness:{'bussiness'}
know:{'know'}


## Split the words

In [103]:
# Python3 program to Convert single
# indexed list into multiple indexed list

def convert(lst):
    return ([i for item in lst for i in item.split()])
lst = ['So this project saunds a little diffacult in terms of implementation']
print( convert(lst))


['So', 'this', 'project', 'saunds', 'a', 'little', 'diffacult', 'in', 'terms', 'of', 'implementation']


In [123]:
splitted_words=convert(x)

In [124]:
splitted_words

['The',
 'Next-Gen',
 'QR',
 'code',
 'menu',
 'system,',
 'Scan',
 'and',
 'see',
 'the',
 'menu',
 'in',
 'your',
 'native',
 'language.',
 'Forget',
 'the',
 'physical',
 'menus',
 'or',
 'traditional',
 'QR',
 'code',
 'apps,',
 'get',
 'ready',
 'for',
 'the',
 'future',
 'by',
 'Menuco…',
 'Your',
 'customers',
 'only',
 'scan',
 'the',
 'barcode.',
 'Menuco',
 'detects',
 'the',
 'language',
 'they',
 'prefer',
 'and',
 'shows',
 'the',
 'menu',
 'in',
 'that',
 'language.',
 'Provide',
 'to',
 'your',
 'customers',
 'your',
 'menu',
 'in',
 'more',
 'than',
 '90+',
 'languages',
 'through',
 'Menuco.',
 'Aren`t',
 'your',
 'all',
 'customers',
 'speaking',
 'with',
 'you',
 'in',
 'the',
 'same',
 'language?',
 'Then',
 'let',
 'them',
 'speak',
 'in',
 'their',
 'native',
 'languages.',
 'Menuco',
 'makes',
 'communication',
 'easier.',
 'See',
 'a',
 'sample',
 'menu',
 '✓',
 '90+',
 'Languages',
 '✓',
 'Speech',
 'translation',
 '(2',
 'Hours/Month)',
 '✓',
 'First',
 'menu'

In [125]:
for word in splitted_words:
    a=[]
    if word != spell.correction(word):
        a.append(spell.correction(word))
    print(f'{word}:{a}')
    #print(spell.correction(word))
    #print(f'{word}:{spell.correction(word)}')

The:[]
Next-Gen:[]
QR:['or']
code:[]
menu:[]
system,:['system']
Scan:[]
and:[]
see:[]
the:[]
menu:[]
in:[]
your:[]
native:[]
language.:['language']
Forget:[]
the:[]
physical:[]
menus:[]
or:[]
traditional:[]
QR:['or']
code:[]
apps,:['apps']
get:[]
ready:[]
for:[]
the:[]
future:[]
by:[]
Menuco…:['menudo']
Your:[]
customers:[]
only:[]
scan:[]
the:[]
barcode.:['barcode']
Menuco:['menudo']
detects:[]
the:[]
language:[]
they:[]
prefer:[]
and:[]
shows:[]
the:[]
menu:[]
in:[]
that:[]
language.:['language']
Provide:[]
to:[]
your:[]
customers:[]
your:[]
menu:[]
in:[]
more:[]
than:[]
90+:[]
languages:[]
through:[]
Menuco.:['menudo']
Aren`t:["aren't"]
your:[]
all:[]
customers:[]
speaking:[]
with:[]
you:[]
in:[]
the:[]
same:[]
language?:['language']
Then:[]
let:[]
them:[]
speak:[]
in:[]
their:[]
native:[]
languages.:['languages']
Menuco:['menudo']
makes:[]
communication:[]
easier.:['easier']
See:[]
a:[]
sample:[]
menu:[]
✓:['i']
90+:[]
Languages:[]
✓:['i']
Speech:[]
translation:[]
(2:['i']
Hours/Mo

In [136]:
with open ('menuco_html.txt', 'w', encoding='utf-8') as f:
    f.write(menuco_html)

In [7]:
menuco_html

'<!DOCTYPE html>\r\n<html lang="en-US">\r\n<head itemscope="itemscope" itemtype="http://schema.org/WebSite">\r\n\r\n\t<meta charset="UTF-8">\r\n\t<meta name="viewport" content="width=device-width, initial-scale=1.0" />\r\n\t<meta http-equiv="X-UA-Compatible" content="IE=edge">\r\n\t<link rel="profile" href="https://gmpg.org/xfn/11">\r\n\t<meta name="mobile-web-app-capable" content="yes">\n<meta name="apple-mobile-web-app-capable" content="yes">\n<meta name="apple-mobile-web-app-title" content="Menuco Ana - Eat Joyfully">\n<link rel="pingback" href="https://menuco.co/xmlrpc.php" />\n<meta name=\'robots\' content=\'index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1\' />\n<meta itemprop="name" content="Menuco Ana" />\n<meta itemprop="url" content="https://menuco.co/" />\n\r\n\t<!-- This site is optimized with the Yoast SEO plugin v17.1 - https://yoast.com/wordpress/plugins/seo/ -->\r\n\t<title>Home Page - Menuco Ana</title>\r\n\t<link rel="canonical" href="https:

In [8]:
url_list=[]
html_page = urllib.request.urlopen("http://www.menuco.co")
soup = BeautifulSoup(html_page, "html.parser")
for link in soup.findAll('a'):
    if link not in url_list:
        url_list.append(link.get('href'))
    else:
        pass
    #print(link.get('href'))

In [9]:
url_list

['#primary',
 '#content',
 'https://menuco.co/',
 'https://menuco.co/',
 'https://menuco.co/subscription-plan',
 'https://menuco.co/contact',
 'https://menuco.co/login/',
 'https://menuco.co/register/',
 'https://menuco.co/',
 'https://menuco.co/',
 'https://menuco.co/subscription-plan',
 'https://menuco.co/contact',
 'https://menuco.co/login/',
 'https://menuco.co/register',
 'https://www.youtube.com/watch?v=KEc3aGjN228',
 '#',
 '#',
 '#',
 '#wrap',
 'https://menuco.co/privacy-policy/',
 'https://menuco.co/member-tos-page']

In [10]:
df = pd.DataFrame(url_list, columns = ['link'])

In [11]:
df

Unnamed: 0,link
0,#primary
1,#content
2,https://menuco.co/
3,https://menuco.co/
4,https://menuco.co/subscription-plan
5,https://menuco.co/contact
6,https://menuco.co/login/
7,https://menuco.co/register/
8,https://menuco.co/
9,https://menuco.co/


In [12]:
df2=df[df['link'].str.contains("//menuco.co/")]

In [13]:
df3=df2.link.unique()

In [14]:
df3

array(['https://menuco.co/', 'https://menuco.co/subscription-plan',
       'https://menuco.co/contact', 'https://menuco.co/login/',
       'https://menuco.co/register/', 'https://menuco.co/register',
       'https://menuco.co/privacy-policy/',
       'https://menuco.co/member-tos-page'], dtype=object)

In [27]:
for i in df3:
    url=i
    response=requests.get(url)
    page_html=response.text
    file_name=1 
    with open (f"{file_name}.txt", "w", encoding='utf-8') as f:
        f.write(page_html)
        file_name=file_name+1
   

In [110]:
with open ("1.txt", 'r', encoding='utf-8') as f:
            text=f.read()

In [111]:
text

'<!DOCTYPE html>\n\n<html lang="en-US">\n\n<head >\n\n\n\n\t<meta charset="UTF-8">\n\n\t<meta name="viewport" content="width=device-width, initial-scale=1.0" />\n\n\t<meta http-equiv="X-UA-Compatible" content="IE=edge">\n\n\t<link rel="profile" href="https://gmpg.org/xfn/11">\n\n\t<meta name="mobile-web-app-capable" content="yes">\n<meta name="apple-mobile-web-app-capable" content="yes">\n<meta name="apple-mobile-web-app-title" content="Menuco Ana - Eat Joyfully">\n<link rel="pingback" href="https://menuco.co/xmlrpc.php" />\n<meta name=\'robots\' content=\'index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1\' />\n\n\n\t<!-- This site is optimized with the Yoast SEO plugin v17.1 - https://yoast.com/wordpress/plugins/seo/ -->\n\n\t<title>Member TOS Page - Menuco Ana</title>\n\n\t<link rel="canonical" href="https://menuco.co/member-tos-page/" />\n\n\t<meta property="og:locale" content="en_US" />\n\n\t<meta property="og:type" content="article" />\n\n\t<meta propert

In [113]:
soup = BeautifulSoup(text, 'html.parser')


In [14]:
text3 = soup.get_text()
text3

'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHome Page - Menuco Ana\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\nSkip links Skip to primary navigation Skip to content\n\n\n\n\n\n\nExplore\n\n\n\n\n\n\n\n\n\nDrag\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\nHome Page\nPrice List\nContact\n \n\n\n\nLogin\n\n\n\n\n\nRegister\n\n\n \n\n\n\nToggle navigation\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHome Page\nPrice List\nContact\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nMultilingual \nOnline Menu \n\t\t\t\t\t\t\tThe Next-Gen QR code menu system,\nScan and see the menu in your native language. \n\t\t\t\t\t\t\tLOG-IN \n\t\t\t\t\t\t\tSIGN UP \n\t\t\t\t\t\t\t\n\n\n\n \n\n\n\n\n\n\n\n Language barrier: no more!\n  Forget the physical menus or traditional QR code apps, get ready for the future by Menuco…\n\n\n\nSmart language detection Your customers only scan the barcode.\nMenuco detects the language they prefe

In [15]:
text4 = re.sub(r"(@\[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", text3)
text4

'Home Page  Menuco Ana  Skip links Skip to primary navigation Skip to contentExploreDrag Home PagePrice ListContact LoginRegister Toggle navigationHome PagePrice ListContactMultilingual Online Menu \t\t\t\t\t\t\tThe NextGen QR code menu systemScan and see the menu in your native language \t\t\t\t\t\t\tLOGIN \t\t\t\t\t\t\tSIGN UP \t\t\t\t\t\t\t  Language barrier no more  Forget the physical menus or traditional QR code apps get ready for the future by MenucoSmart language detection Your customers only scan the barcodeMenuco detects the language they prefer and shows the menu in that language90 Language Provide to your customers your menu in more than 90 languages through MenucoPush to talk speech translator Arent your all customers speaking with you in the same language Then let them speak in their native languages Menuco makes communication easier  Scan the barcode  See a sample menu  Choose your planSilver1590 Month 90 Languages Speech translation 2 HoursMonth First menu registration 

In [114]:
for para in soup.find_all("p"):
    print(para.get_text())

We are here to answer any question you may have.
Last updated: August 12, 2021
Please read these terms and conditions carefully before using Our Service.
The words of which the initial letter is capitalized have meanings defined under the following conditions. The following definitions shall have the same meaning regardless of whether they appear in singular or in plural.
For the purposes of these Terms and Conditions:
These are the Terms and Conditions governing the use of this Service and the agreement that operates between You and the Company. These Terms and Conditions set out the rights and obligations of all users regarding the use of the Service.
Your access to and use of the Service is conditioned on Your acceptance of and compliance with these Terms and Conditions. These Terms and Conditions apply to all visitors, users and others who access or use the Service.
By accessing or using the Service You agree to be bound by these Terms and Conditions. If You disagree with any part 

In [None]:
def my_lyrics(songs):
    lyric_list=[]
    for song in songs:
        song_soup=BeautifulSoup(song, "html.parser")
        lyric=song_soup.find('pre')
        lyric=lyric.get_text()
        lyric=str(lyric).replace("\\n"," ")
        lyric=str(lyric).replace("\n","")
        lyric=str(lyric).lower()
        lyric_list.append(lyric)
    return lyric_list

In [None]:
url_list=[]
html_page = urllib.request.urlopen("http://www.menuco.co")
soup = BeautifulSoup(html_page, "html.parser")
for link in soup.findAll('a'):
    if link not in url_list:
        url_list.append(link.get('href'))
    else:
        pass
    #print(link.get('href'))

In [135]:
def getlink(url): 
    url_list=[]
    for link in soup.findAll('a'):
        a=link.str.contains("//menuco.co/")
        url_list.append(link.get('href'))
    r = requests.get(url) 
    return url_list
  
linkdata = getlink("https://www.menuco.co") 

AttributeError: 'NoneType' object has no attribute 'contains'

In [126]:
def getlink(url): 
    url_list=[]
    for link in soup.findAll('a'):
        link.str.contains("//menuco.co/")
        url_list.append(link.get('href'))
        if link not in url_list:
            url_list.append(link.get('href'))
        else:
            pass
    r = requests.get(url) 
    return url_list
  
linkdata = getlink("https://www.menuco.co") 

In [127]:
linkdata

['#primary',
 '#content',
 'https://menuco.co/',
 'https://menuco.co/',
 'https://menuco.co/subscription-plan',
 'https://menuco.co/contact',
 'https://menuco.co/login/',
 'https://menuco.co/register/',
 'https://menuco.co/',
 'https://menuco.co/',
 'https://menuco.co/subscription-plan',
 'https://menuco.co/contact',
 'https://menuco.co/login/',
 'https://menuco.co/register',
 'https://www.youtube.com/watch?v=KEc3aGjN228',
 '#',
 '#',
 '#',
 '#wrap',
 'https://menuco.co/privacy-policy/',
 'https://menuco.co/member-tos-page']

In [None]:
df = pd.DataFrame(url_list, columns = ['link'])

In [None]:
df2=df[df['link'].str.contains("//menuco.co/")]

In [None]:
df3=df2.link.unique()

In [133]:
str(linkdata).contains("//menuco.co/")

AttributeError: 'str' object has no attribute 'contains'

In [119]:
def getdata(url): 
    r = requests.get(url) 
    return r.text 
  
htmldata = getdata("https://www.menuco.co") 


In [None]:
soup = BeautifulSoup(htmldata, 'html.parser') 
data = '' 
for data in soup.find_all("p"): 
    print(data.get_text()) 

In [9]:
url='https://www.menuco.co'

In [31]:
def get_html():
    response = requests.get(url)
    response
    response.status_code
    test_html = response.text
    print(test_html)
    return test_html

In [32]:
print(get_html())

<!DOCTYPE html>
<html lang="en-US">
<head itemscope="itemscope" itemtype="http://schema.org/WebSite">

	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0" />
	<meta http-equiv="X-UA-Compatible" content="IE=edge">
	<link rel="profile" href="https://gmpg.org/xfn/11">
	<meta name="mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-title" content="Menuco Ana - Eat Joyfully">
<link rel="pingback" href="https://menuco.co/xmlrpc.php" />
<meta name='robots' content='index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1' />
<meta itemprop="name" content="Menuco Ana" />
<meta itemprop="url" content="https://menuco.co/" />

	<!-- This site is optimized with the Yoast SEO plugin v17.1 - https://yoast.com/wordpress/plugins/seo/ -->
	<title>Home Page - Menuco Ana</title>
	<link rel="canonical" href="https://menuco.co/" />
	<meta property="og:loca

In [48]:
soup = BeautifulSoup(get_html(), 'html.parser')

<!DOCTYPE html>
<html lang="en-US">
<head itemscope="itemscope" itemtype="http://schema.org/WebSite">

	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0" />
	<meta http-equiv="X-UA-Compatible" content="IE=edge">
	<link rel="profile" href="https://gmpg.org/xfn/11">
	<meta name="mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-title" content="Menuco Ana - Eat Joyfully">
<link rel="pingback" href="https://menuco.co/xmlrpc.php" />
<meta name='robots' content='index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1' />
<meta itemprop="name" content="Menuco Ana" />
<meta itemprop="url" content="https://menuco.co/" />

	<!-- This site is optimized with the Yoast SEO plugin v17.1 - https://yoast.com/wordpress/plugins/seo/ -->
	<title>Home Page - Menuco Ana</title>
	<link rel="canonical" href="https://menuco.co/" />
	<meta property="og:loca

In [45]:
print(get_text1())

<!DOCTYPE html>
<html lang="en-US">
<head itemscope="itemscope" itemtype="http://schema.org/WebSite">

	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0" />
	<meta http-equiv="X-UA-Compatible" content="IE=edge">
	<link rel="profile" href="https://gmpg.org/xfn/11">
	<meta name="mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-title" content="Menuco Ana - Eat Joyfully">
<link rel="pingback" href="https://menuco.co/xmlrpc.php" />
<meta name='robots' content='index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1' />
<meta itemprop="name" content="Menuco Ana" />
<meta itemprop="url" content="https://menuco.co/" />

	<!-- This site is optimized with the Yoast SEO plugin v17.1 - https://yoast.com/wordpress/plugins/seo/ -->
	<title>Home Page - Menuco Ana</title>
	<link rel="canonical" href="https://menuco.co/" />
	<meta property="og:loca

NameError: name 'my_text' is not defined

In [49]:
def clean_text():
    text = soup.get_text(separator=' ')
    cleaned_text = re.sub(r"(@\[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", text)

    return cleaned_text

In [50]:
clean_text()

'               Home Page  Menuco Ana                                                                        Skip links  Skip to primary navigation  Skip to content        Explore           Drag                                  Home Page  Price List  Contact        Login       Register          Toggle navigation                 Home Page  Price List  Contact                        Multilingual  Online Menu \t\t\t\t\t\t\t The NextGen QR code menu system Scan and see the menu in your native language \t\t\t\t\t\t\t LOGIN \t\t\t\t\t\t\t SIGN UP \t\t\t\t\t\t\t                 Language barrier  no more    Forget the physical menus or traditional QR code apps get ready for the future by Menuco     Smart language detection   Your customers only scan the barcode  Menuco detects the language they prefer and shows the menu in that language        90 Language   Provide to your customers your menu in more than 90 languages through Menuco        Push to talk speech translator   Arent your all custom

In [51]:
dir(SpellChecker)

['_SpellChecker__edit_distance_alt',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_case_sensitive',
 '_check_if_should_check',
 '_distance',
 '_tokenizer',
 '_word_frequency',
 'candidates',
 'correction',
 'distance',
 'edit_distance_1',
 'edit_distance_2',
 'export',
 'known',
 'languages',
 'split_words',
 'unknown',
 'word_frequency',
 'word_probability',
 'word_usage_frequency']

In [58]:
def check():
    dir(SpellChecker)
    spell=SpellChecker()
    splitted_text = clean_text().split()
    print(splitted_text)
    for word in splitted_text:
        a=[]
        if word != spell.correction(word):
            a.append(spell.correction(word))
        print(f'{word}:{a}')
    return check

In [59]:
check()

['Home', 'Page', 'Menuco', 'Ana', 'Skip', 'links', 'Skip', 'to', 'primary', 'navigation', 'Skip', 'to', 'content', 'Explore', 'Drag', 'Home', 'Page', 'Price', 'List', 'Contact', 'Login', 'Register', 'Toggle', 'navigation', 'Home', 'Page', 'Price', 'List', 'Contact', 'Multilingual', 'Online', 'Menu', 'The', 'NextGen', 'QR', 'code', 'menu', 'system', 'Scan', 'and', 'see', 'the', 'menu', 'in', 'your', 'native', 'language', 'LOGIN', 'SIGN', 'UP', 'Language', 'barrier', 'no', 'more', 'Forget', 'the', 'physical', 'menus', 'or', 'traditional', 'QR', 'code', 'apps', 'get', 'ready', 'for', 'the', 'future', 'by', 'Menuco', 'Smart', 'language', 'detection', 'Your', 'customers', 'only', 'scan', 'the', 'barcode', 'Menuco', 'detects', 'the', 'language', 'they', 'prefer', 'and', 'shows', 'the', 'menu', 'in', 'that', 'language', '90', 'Language', 'Provide', 'to', 'your', 'customers', 'your', 'menu', 'in', 'more', 'than', '90', 'languages', 'through', 'Menuco', 'Push', 'to', 'talk', 'speech', 'translat

bestinclass:[]
experience:[]
strategy:[]
that:[]
builds:[]
brands:[]
and:[]
drives:[]
transactions:[]
Learn:[]
more:[]
Nasl:['nail']
balarm:['alarm']
Aadaki:['sasaki']
admlar:['amar']
takip:['takin']
et:[]
Paketini:[]
se:[]
ve:[]
restaurantn:['restaurant']
kaydet:['cadet']
yelik:['yell']
ilemleri:[]
sadece:['sauce']
birka:['burka']
dakika:['dareka']
paketini:[]
se:[]
ve:[]
bilgilerini:[]
girip:['grip']
yeliini:['yelling']
tamamla:['tamara']
Meny:['many']
ykle:['kyle']
Standart:['standard']
ve:[]
Premum:['premium']
paketlerini:[]
setiysen:[]
menn:['mean']
destek:['desk']
ekibimize:[]
gnder:['under']
ksa:['sa']
srede:['suede']
sistemi:['sister']
kullanma:[]
hazr:['hair']
hale:[]
getirelim:[]
Kullanmaya:[]
bala:[]
Eer:[]
masalara:['mascara']
yerletirmek:[]
iin:[]
QR:['or']
kodlu:['kodou']
men:[]
kartlarna:[]
ihtiyacn:[]
varsa:['versa']
bizden:['bidden']
destek:['desk']
alabilirsiniz:[]
NELER:['never']
VAR:[]
Otomatik:[]
Dil:[]
Alglama:['llama']
Ceating:['eating']
brand:[]
identities:[]
di

<function __main__.check()>

In [60]:
dir(SpellChecker)
def check():
    #dir(SpellChecker)
    spell=SpellChecker()
    cleaned_text=clean_text()
    splitted_text = cleaned_text.split()
    print(splitted_text)
    for word in splitted_text:
        a=[]
        if word != spell.correction(word):
            a.append(spell.correction(word))
        print(f'{word}:{a}')
    return check

In [61]:
check()

['Home', 'Page', 'Menuco', 'Ana', 'Skip', 'links', 'Skip', 'to', 'primary', 'navigation', 'Skip', 'to', 'content', 'Explore', 'Drag', 'Home', 'Page', 'Price', 'List', 'Contact', 'Login', 'Register', 'Toggle', 'navigation', 'Home', 'Page', 'Price', 'List', 'Contact', 'Multilingual', 'Online', 'Menu', 'The', 'NextGen', 'QR', 'code', 'menu', 'system', 'Scan', 'and', 'see', 'the', 'menu', 'in', 'your', 'native', 'language', 'LOGIN', 'SIGN', 'UP', 'Language', 'barrier', 'no', 'more', 'Forget', 'the', 'physical', 'menus', 'or', 'traditional', 'QR', 'code', 'apps', 'get', 'ready', 'for', 'the', 'future', 'by', 'Menuco', 'Smart', 'language', 'detection', 'Your', 'customers', 'only', 'scan', 'the', 'barcode', 'Menuco', 'detects', 'the', 'language', 'they', 'prefer', 'and', 'shows', 'the', 'menu', 'in', 'that', 'language', '90', 'Language', 'Provide', 'to', 'your', 'customers', 'your', 'menu', 'in', 'more', 'than', '90', 'languages', 'through', 'Menuco', 'Push', 'to', 'talk', 'speech', 'translat

bestinclass:[]
experience:[]
strategy:[]
that:[]
builds:[]
brands:[]
and:[]
drives:[]
transactions:[]
Learn:[]
more:[]
Nasl:['nail']
balarm:['alarm']
Aadaki:['sasaki']
admlar:['amar']
takip:['takin']
et:[]
Paketini:[]
se:[]
ve:[]
restaurantn:['restaurant']
kaydet:['cadet']
yelik:['yell']
ilemleri:[]
sadece:['sauce']
birka:['burka']
dakika:['dareka']
paketini:[]
se:[]
ve:[]
bilgilerini:[]
girip:['grip']
yeliini:['yelling']
tamamla:['tamara']
Meny:['many']
ykle:['kyle']
Standart:['standard']
ve:[]
Premum:['premium']
paketlerini:[]
setiysen:[]
menn:['mean']
destek:['desk']
ekibimize:[]
gnder:['under']
ksa:['sa']
srede:['suede']
sistemi:['sister']
kullanma:[]
hazr:['hair']
hale:[]
getirelim:[]
Kullanmaya:[]
bala:[]
Eer:[]
masalara:['mascara']
yerletirmek:[]
iin:[]
QR:['or']
kodlu:['kodou']
men:[]
kartlarna:[]
ihtiyacn:[]
varsa:['versa']
bizden:['bidden']
destek:['desk']
alabilirsiniz:[]
NELER:['never']
VAR:[]
Otomatik:[]
Dil:[]
Alglama:['llama']
Ceating:['eating']
brand:[]
identities:[]
di

<function __main__.check()>

In [62]:
url5='https://www.menuco.co'

In [63]:
response = requests.get(url5)

In [64]:
test_html = response.text

In [65]:
test_html

'<!DOCTYPE html>\r\n<html lang="en-US">\r\n<head itemscope="itemscope" itemtype="http://schema.org/WebSite">\r\n\r\n\t<meta charset="UTF-8">\r\n\t<meta name="viewport" content="width=device-width, initial-scale=1.0" />\r\n\t<meta http-equiv="X-UA-Compatible" content="IE=edge">\r\n\t<link rel="profile" href="https://gmpg.org/xfn/11">\r\n\t<meta name="mobile-web-app-capable" content="yes">\n<meta name="apple-mobile-web-app-capable" content="yes">\n<meta name="apple-mobile-web-app-title" content="Menuco Ana - Eat Joyfully">\n<link rel="pingback" href="https://menuco.co/xmlrpc.php" />\n<meta name=\'robots\' content=\'index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1\' />\n<meta itemprop="name" content="Menuco Ana" />\n<meta itemprop="url" content="https://menuco.co/" />\n\r\n\t<!-- This site is optimized with the Yoast SEO plugin v17.1 - https://yoast.com/wordpress/plugins/seo/ -->\r\n\t<title>Home Page - Menuco Ana</title>\r\n\t<link rel="canonical" href="https:

In [76]:
def get_html():
    #url=get_url()
    #full_url='http://'+url
    #print(url)
    #print(full_url)
    response = requests.get('https://analyticsindiamag.com/how-to-use-bert-transformer-for-grammar-checking/')
    #response.status_code
    test_html = response.text
    print(test_html)
    return test_html
get_html()

<!doctype html>
<html lang="en-US">
<head>
	<meta charset="UTF-8">
		<meta name="viewport" content="width=device-width, initial-scale=1">
	<link rel="profile" href="https://gmpg.org/xfn/11">
	<meta name='robots' content='index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1' />

	<!-- This site is optimized with the Yoast SEO Premium plugin v18.4 (Yoast SEO v18.7) - https://yoast.com/wordpress/plugins/seo/ -->
	<title>How To Use BERT Transformer For Grammar Checking?</title>
	<link rel="canonical" href="https://analyticsindiamag.com/how-to-use-bert-transformer-for-grammar-checking/" />
	<meta property="og:locale" content="en_US" />
	<meta property="og:type" content="article" />
	<meta property="og:title" content="How To Use BERT Transformer For Grammar Checking?" />
	<meta property="og:description" content="With the advent of AI, we are witnessing some of the remarkable things which were once deemed impossible now being completely achievable. AI has found its way

'<!doctype html>\n<html lang="en-US">\n<head>\n\t<meta charset="UTF-8">\n\t\t<meta name="viewport" content="width=device-width, initial-scale=1">\n\t<link rel="profile" href="https://gmpg.org/xfn/11">\n\t<meta name=\'robots\' content=\'index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1\' />\n\n\t<!-- This site is optimized with the Yoast SEO Premium plugin v18.4 (Yoast SEO v18.7) - https://yoast.com/wordpress/plugins/seo/ -->\n\t<title>How To Use BERT Transformer For Grammar Checking?</title>\n\t<link rel="canonical" href="https://analyticsindiamag.com/how-to-use-bert-transformer-for-grammar-checking/" />\n\t<meta property="og:locale" content="en_US" />\n\t<meta property="og:type" content="article" />\n\t<meta property="og:title" content="How To Use BERT Transformer For Grammar Checking?" />\n\t<meta property="og:description" content="With the advent of AI, we are witnessing some of the remarkable things which were once deemed impossible now being completely a

In [77]:
def get_text():
    soup = BeautifulSoup(get_html(), 'html.parser')
    return my_text

In [78]:
get_text()

<!doctype html>
<html lang="en-US">
<head>
	<meta charset="UTF-8">
		<meta name="viewport" content="width=device-width, initial-scale=1">
	<link rel="profile" href="https://gmpg.org/xfn/11">
	<meta name='robots' content='index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1' />

	<!-- This site is optimized with the Yoast SEO Premium plugin v18.4 (Yoast SEO v18.7) - https://yoast.com/wordpress/plugins/seo/ -->
	<title>How To Use BERT Transformer For Grammar Checking?</title>
	<link rel="canonical" href="https://analyticsindiamag.com/how-to-use-bert-transformer-for-grammar-checking/" />
	<meta property="og:locale" content="en_US" />
	<meta property="og:type" content="article" />
	<meta property="og:title" content="How To Use BERT Transformer For Grammar Checking?" />
	<meta property="og:description" content="With the advent of AI, we are witnessing some of the remarkable things which were once deemed impossible now being completely achievable. AI has found its way

NameError: name 'my_text' is not defined

In [79]:
soup = BeautifulSoup(get_html(), 'html.parser')
def get_text():
    #soup = BeautifulSoup(get_html(), 'html.parser')
    text = soup.get_text(separator=' ')
    return text
get_text()

<!doctype html>
<html lang="en-US">
<head>
	<meta charset="UTF-8">
		<meta name="viewport" content="width=device-width, initial-scale=1">
	<link rel="profile" href="https://gmpg.org/xfn/11">
	<meta name='robots' content='index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1' />

	<!-- This site is optimized with the Yoast SEO Premium plugin v18.4 (Yoast SEO v18.7) - https://yoast.com/wordpress/plugins/seo/ -->
	<title>How To Use BERT Transformer For Grammar Checking?</title>
	<link rel="canonical" href="https://analyticsindiamag.com/how-to-use-bert-transformer-for-grammar-checking/" />
	<meta property="og:locale" content="en_US" />
	<meta property="og:type" content="article" />
	<meta property="og:title" content="How To Use BERT Transformer For Grammar Checking?" />
	<meta property="og:description" content="With the advent of AI, we are witnessing some of the remarkable things which were once deemed impossible now being completely achievable. AI has found its way

'\n \n \n \n \n \n \n \n How To Use BERT Transformer For Grammar Checking? \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n   \n \n \n \n \n \n \n \n \n \n   \n \n \n \n \n \n \n   \n   \n \n \n \n \n \n \n   \n \n \n \n \n \n \n \n \n \n \n \n   \n Opinions \n Tech Policy \n Developers Corner \n Tech Events \n Careers \n Research \n People & Technology \n Tech Startups \n Education \n News \n   \n \n   Menu \n \n \n Opinions \n Tech Policy \n Developers Corner \n Tech Events \n Careers \n Research \n People & Technology \n Tech Startups \n Education \n News \n   \n \n \n \n \n \n \n \n \n \n \n \n \n \n   \n \n \n \n \n \n \n \n \n \n   \n \n \n \n \n \n \n \n \n \n \n \n \n \n Who we are \n Advertise \n Hiring Services \n Events \n Videos \n Hackathons \n Council \n Best Firm \n Discussion \n Contact us \n   \n \n   Menu \n \n \n Who we are \n Advertise \n Hiring Services \n Events \n

In [None]:
#dirs=["adele/", "beatles/"]
texts=[]
for i in all_links:
    files=os.listdir(i)
    #print(i)
    for file in files:
        with open (i+file, 'r') as f:
            text=f.read()
            texts.append(text)

In [None]:
dirs=["adele/", "beatles/"]
texts=[]
for i in dirs:
    files=os.listdir(i)
    #print(i)
    for file in files:
        with open (i+file, 'r') as f:
            text=f.read()
            texts.append(text)

In [57]:
html_page = urllib.request.urlopen("http://www.menuco.co")
soup = BeautifulSoup(html_page, "html.parser")
for link in soup.findAll('a'):
    print(link.get('href'))

#primary
#content
https://menuco.co/
https://menuco.co/
https://menuco.co/subscription-plan
https://menuco.co/contact
https://menuco.co/login/
https://menuco.co/register/
https://menuco.co/
https://menuco.co/
https://menuco.co/subscription-plan
https://menuco.co/contact
https://menuco.co/login/
https://menuco.co/register
https://www.youtube.com/watch?v=KEc3aGjN228
#
#
#
#wrap
https://menuco.co/privacy-policy/
https://menuco.co/member-tos-page


In [61]:
lyric_list=[]
html_page = urllib.request.urlopen("http://www.menuco.co")
soup = BeautifulSoup(html_page, "html.parser")
for link in soup.findAll('a'):
    lyric_list.append(link.get('href'))
    #print(link.get('href'))

In [62]:
lyric_list

['#primary',
 '#content',
 'https://menuco.co/',
 'https://menuco.co/',
 'https://menuco.co/subscription-plan',
 'https://menuco.co/contact',
 'https://menuco.co/login/',
 'https://menuco.co/register/',
 'https://menuco.co/',
 'https://menuco.co/',
 'https://menuco.co/subscription-plan',
 'https://menuco.co/contact',
 'https://menuco.co/login/',
 'https://menuco.co/register',
 'https://www.youtube.com/watch?v=KEc3aGjN228',
 '#',
 '#',
 '#',
 '#wrap',
 'https://menuco.co/privacy-policy/',
 'https://menuco.co/member-tos-page']

In [106]:
x = pd.DataFrame(lyric_list)
x.head()

Unnamed: 0,0
0,#primary
1,#content
2,https://menuco.co/
3,https://menuco.co/
4,https://menuco.co/subscription-plan


In [109]:
# function to get unique values
def unique(lyric_list):
    x = np.array(lyric_list)
    #x=re.findall(r'(https://menuco\S+")')
    print(np.unique(x))



In [110]:
df=pd.DataFrame(unique(lyric_list))

['#' '#content' '#primary' '#wrap' 'https://menuco.co/'
 'https://menuco.co/contact' 'https://menuco.co/login/'
 'https://menuco.co/member-tos-page' 'https://menuco.co/privacy-policy/'
 'https://menuco.co/register' 'https://menuco.co/register/'
 'https://menuco.co/subscription-plan'
 'https://www.youtube.com/watch?v=KEc3aGjN228']


In [115]:
lyric_list.str.startswith('https', na=False)

AttributeError: 'list' object has no attribute 'str'

In [96]:
all_links=re.findall(r'(https://menuco\S+")', df)

TypeError: expected string or bytes-like object

In [116]:
#------------

In [None]:
soup = bs4.BeautifulSoup(html, 'html.parser')

In [None]:
ls_lines = []
for el in soup.find_all("p"):
    ls_lines.append(el.get_text().strip())

In [None]:
def my_lyrics(songs):
    lyric_list=[]
    for song in songs:
        song_soup=BeautifulSoup(song, "html.parser")
        lyric=song_soup.find('pre')
        lyric=lyric.get_text()
        lyric=str(lyric).replace("\\n"," ")
        lyric=str(lyric).replace("\n","")
        lyric=str(lyric).lower()
        lyric_list.append(lyric)
    return lyric_list

In [14]:
def find(adele_songs_html):
  
    # findall() has been used 
    # with valid conditions for urls in string
    regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"
    url = re.findall(regex,adele_songs_html)      
    return [x[0] for x in url]

In [17]:
print("Urls: ", find(adele_songs_html))

Urls:  ['https://www.lyrics.com/', 'https://static.stands4.com', 'https://maxcdn.bootstrapcdn.com/bootstrap/3.3.2/css/bootstrap.min.css', 'https://fonts.googleapis.com', 'https://fonts.gstatic.com', 'fonts.googleapis.com/css2?family=Yanone+Kaffeesatz:wght@400;700&family=Oxygen:wght@400;700&family=Droid+Serif:ital,wght@0,400;0,700;1,400;1,700&family=Droid+Sans+Mono&family=Alegreya+Sans:wght@800&family=Original+Surfer&display=swap', 'fonts.googleapis.com/css2?family=Yanone+Kaffeesatz:wght@400;700&family=Oxygen:wght@400;700&family=Droid+Serif:ital,wght@0,400;0,700;1,400;1,700&family=Droid+Sans+Mono&family=Alegreya+Sans:wght@800&family=Original+Surfer&display=swap', 'fonts.googleapis.com/css2?family=Yanone+Kaffeesatz:wght@400;700&family=Oxygen:wght@400;700&family=Droid+Serif:ital,wght@0,400;0,700;1,400;1,700&family=Droid+Sans+Mono&family=Alegreya+Sans:wght@800&family=Original+Surfer&display=swap', 'https://static.stands4.com/app_common/css/lyrc.css?v=1.8.47', 'https://static.stands4.com/ap

In [24]:
df1["urls"]=pd.DataFrame(re.findall(regex, adele_songs_html))

NameError: name 'regex' is not defined

In [19]:
df1

Unnamed: 0,0
0,https://www.lyrics.com/
1,https://static.stands4.com
2,https://maxcdn.bootstrapcdn.com/bootstrap/3.3....
3,https://fonts.googleapis.com
4,https://fonts.gstatic.com
...,...
252,https://static.stands4.com/app_common/js/libs/...
253,https://maxcdn.bootstrapcdn.com/bootstrap/3.3....
254,https://static.stands4.com/app_lyrics/js/lyrc....
255,https://www.googletagmanager.com/gtag/js?id=UA...


In [26]:
url=(re.findall(r'(lyrics\S+")', adele_songs_html))

In [27]:
url

['lyrics.com/"',
 'lyrics/open-search.xml"',
 'lyrics.com/"',
 'lyrics.com/\';"',
 'lyrics..."',
 'lyrics.com/\';"',
 'lyrics.com/images/artist/861756_adele.png"',
 'lyrics.com/images/artist/861756_adele.png"',
 'lyrics.com/forgotpass.php\';"',
 'lyrics.com/addlyric.php?aid=861756\';"',
 'lyrics.com/images/artist/349078_beyonce.png"',
 'lyrics.com/images/artist/sub/53352_helena-barrington.png"',
 'lyrics.com/images/artist/sub/4122_michael-j.png"',
 'lyrics.com/images/artist/sub/52333_amalia-maldonado.png"',
 'lyrics.com/images/artist/sub/30381_the-immaculate-crows.png"',
 'lyrics.com/images/artist/sub/48424_don-jaymor.png"',
 'lyrics.com/images/artist/sub/54657_michael-frederick.png"',
 'lyrics.com/images/artist/sub/54680_dree-motion.png"',
 'lyrics.com/lyrics-quiz"',
 'lyrics.com/lyrics-quiz"',
 'lyricscom/lemhemhgkggcfkeepaanfpkbjnkgfhpi"',
 'lyricscom/lemhemhgkggcfkeepaanfpkbjnkgfhpi"',
 'lyricscom/lemhemhgkggcfkeepaanfpkbjnkgfhpi\')"',
 'lyrics-com/"',
 'lyrics-com/"',
 'lyrics-com

In [None]:
for i in range(10):
    my_song_url=f'https://lyrics.com{short_list_adele[i]}'
    response=requests.get(my_song_url)
    my_song_html=response.text
    song_number=re.findall('\d+',short_list_adele[i])
    #print(song_number)
    with open (f"{song_number}.txt", "w", encoding='utf-8') as f:
        f.write(my_song_html)
    print(f'https://lyrics.com{short_list_adele[i]}')

In [None]:
dirs=["adele/", "beatles/"]
texts=[]
for i in dirs:
    files=os.listdir(i)
    #print(i)
    for file in files:
        with open (i+file, 'r') as f:
            text=f.read()
            texts.append(text)

In [6]:
!pip install --upgrade language_tool_python

Collecting language_tool_python
  Downloading language_tool_python-2.7.0-py3-none-any.whl (34 kB)
Installing collected packages: language_tool_python
Successfully installed language_tool_python-2.7.0


In [1]:
import language_tool_python


In [5]:
!pip install --upgrade 3to2

!pip install --upgrade language-check

Collecting language-check

  error: subprocess-exited-with-error
  
  python setup.py bdist_wheel did not run successfully.
  exit code: 1
  
  [36 lines of output]
  Traceback (most recent call last):
    File "<string>", line 2, in <module>
    File "<pip-setuptools-caller>", line 34, in <module>
    File "C:\Users\Guest1\AppData\Local\Temp\pip-install-t0yockz_\language-check_af334a62f9c14bc9bc05dc5ed5152393\setup.py", line 595, in <module>
      sys.exit(main())
    File "C:\Users\Guest1\AppData\Local\Temp\pip-install-t0yockz_\language-check_af334a62f9c14bc9bc05dc5ed5152393\setup.py", line 590, in main
      run_setup_hooks(config)
    File "C:\Users\Guest1\AppData\Local\Temp\pip-install-t0yockz_\language-check_af334a62f9c14bc9bc05dc5ed5152393\setup.py", line 561, in run_setup_hooks
      language_tool_hook(config)
    File "C:\Users\Guest1\AppData\Local\Temp\pip-install-t0yockz_\language-check_af334a62f9c14bc9bc05dc5ed5152393\setup.py", line 584, in language_tool_hook
      download_lt()
    File "C:\Users\G


  Using cached language-check-1.1.tar.gz (33 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: language-check
  Building wheel for language-check (setup.py): started
  Building wheel for language-check (setup.py): finished with status 'error'
  Running setup.py clean for language-check
Failed to build language-check
Installing collected packages: language-check
  Running setup.py install for language-check: started
  Running setup.py install for language-check: finished with status 'error'


In [4]:
import grammar_check as gc

ModuleNotFoundError: No module named 'grammar_check'

In [2]:

tl = language_tool_python.LanguageTool('en-US')

txt = "good mooorning sirr and medam my namee anderen i am from amerecia !"
m = tl.check(txt)
len(m)

Downloading LanguageTool 5.6: 100%|██████████| 220M/220M [01:22<00:00, 2.67MB/s] 
Unzipping C:\Users\Guest1\AppData\Local\Temp\tmpku7u5lma.zip to C:\Users\Guest1\.cache\language_tool_python.
Downloaded https://www.languagetool.org/download/LanguageTool-5.6.zip to C:\Users\Guest1\.cache\language_tool_python.


8

In [10]:
tool = language_tool_python.LanguageTool('en-US')

ModuleNotFoundError: No java install detected. Please install java to use language-tool-python.

In [3]:
text = """LanguageTool offers spell and grammar checking. Just paste your text here and click the 'Check Text' button. Click the colored phrases for details on potential errors. or use this text too see an few of of the problems that LanguageTool can detecd. What do you thinks of grammar checkers? Please not that they are not perfect. Style issues get a blue marker: It's 5 P.M. in the afternoon. The weather was nice on Thursday, 27 June 2017"""
 
 
# get the matches
matches = tool.check(text)
 
matches

NameError: name 'tool' is not defined

In [142]:
import requests
import json

In [143]:
api_key = "964c5a4bba744df882cd962c498d860d"
example_text = "Hollo, wrld" # the text to be spell-checked
endpoint = "https://api.bing.microsoft.com/v7.0/SpellCheck"

In [144]:
data = {'text': example_text}

In [145]:
params = {
    'mkt':'en-us',
    'mode':'proof'
    }

In [146]:
headers = {
    'Content-Type': 'application/x-www-form-urlencoded',
    'Ocp-Apim-Subscription-Key': api_key,
    }

In [147]:
headers = {
    'Content-Type': 'application/x-www-form-urlencoded',
    'Ocp-Apim-Subscription-Key': api_key,
    }

In [148]:
response = requests.post(endpoint, headers=headers, params=params, data=data)

In [149]:
json_response = response.json()
print(json.dumps(json_response, indent=4))

{
    "_type": "SpellCheck",
    "flaggedTokens": [
        {
            "offset": 0,
            "token": "Hollo",
            "type": "UnknownToken",
            "suggestions": [
                {
                    "suggestion": "Hello",
                    "score": 0.9150883204586714
                },
                {
                    "suggestion": "Hollow",
                    "score": 0.7866692176934047
                }
            ]
        },
        {
            "offset": 7,
            "token": "wrld",
            "type": "UnknownToken",
            "suggestions": [
                {
                    "suggestion": "world",
                    "score": 0.9150883204586714
                }
            ]
        }
    ]
}


In [39]:
!pip install pyspellchecker
!pip install textblob
!pip install autocorrect
!pip install pattern

Collecting autocorrect
  Downloading autocorrect-2.6.1.tar.gz (622 kB)
     ------------------------------------ 622.8/622.8 KB 739.8 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: autocorrect
  Building wheel for autocorrect (setup.py): started
  Building wheel for autocorrect (setup.py): finished with status 'done'
  Created wheel for autocorrect: filename=autocorrect-2.6.1-py3-none-any.whl size=622382 sha256=c487cf448cecbe2b2300999e944e36b3a6c545fe624d25eb157bd192fa25097d
  Stored in directory: c:\users\guest1\appdata\local\pip\cache\wheels\ab\0f\23\3c010c3fd877b962146e7765f9e9b08026cac8b035094c5750
Successfully built autocorrect
Installing collected packages: autocorrect
Successfully installed autocorrect-2.6.1
Collecting pattern
  Downloading Pattern-3.6.0.tar.gz (22.2 MB)
     ---------------------------------------- 22.2/22.2 MB 1.9 MB/s eta 0:00:00
  Preparing metadat

In [150]:
from spellchecker import SpellChecker

In [151]:
dir(SpellChecker)

['_SpellChecker__edit_distance_alt',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_case_sensitive',
 '_check_if_should_check',
 '_distance',
 '_tokenizer',
 '_word_frequency',
 'candidates',
 'correction',
 'distance',
 'edit_distance_1',
 'edit_distance_2',
 'export',
 'known',
 'languages',
 'split_words',
 'unknown',
 'word_frequency',
 'word_probability',
 'word_usage_frequency']

In [152]:
spell=SpellChecker()

In [153]:
text="Hollo wrld"

In [156]:
docx=['calandar', 'misspel', 'necessary','bussiness', 'know']

In [157]:
for word in docx:
    print(f'{word}:{spell.correction(word)}')

calandar:calendar
misspel:misspell
necessary:necessary
bussiness:bussiness
know:know


In [158]:
for word in docx:
    print(f'{word}:{spell.candidates(word)}')

calandar:{'calendar'}
misspel:{'misspell', 'misspelt'}
necessary:{'necessary'}
bussiness:{'bussiness'}
know:{'know'}


## gingerit

In [None]:
from gingerit.gingerit import GingerIt

In [None]:
def sent():
    if request.method == "GET":
        return render_template("index.html")
    else:
        
        if not request.form["SENT"]:
            return redirect("/")

In [None]:

def sent_correct():
    if request.method == 'POST':
        text = request.form["SENT"]
        parser = GingerIt()
        print(parser.parse(text)['corrections'])bb
        result=parser.parse(text)['result']
        return render_template('index.html', output1=result)

In [8]:
import language_tool_python

In [9]:
tool = language_tool_python.LanguageTool('en-US')
text = 'A sentence with a error in the Hitchhiker’s Guide tot he Galaxy'
tool.correct(text)

'A sentence with an error in the Hitchhiker’s Guide to the Galaxy'