In [1]:
import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed

def check_url_availability(url, timeout=1.5):
    """
    Checks if a given URL is reachable.
    Returns True if the URL responds with a status code < 400, False otherwise.
    """
    try:
        response = requests.get(url, timeout=timeout)
        if not 400 <= response.status_code < 600:
            return True
        else:
            return False
    except requests.exceptions.RequestException:
        return False

def filter_unreachable_urls(input_file, output_file, max_threads=10):
    """
    Opens each URL from the DataFrame, removes rows where the URL doesn't respond or is unavailable,
    and saves the updated DataFrame to a new file. This function uses parallel processing to speed up the process.
    """
    try:
        # Read the dataset
        df = pd.read_csv(input_file)
    except Exception as e:
        print(f"Error reading input file: {e}")
        return

    if 'url' not in df.columns:
        print("Dataset does not contain a 'url' column.")
        return

    # Create a list of URLs to check
    urls = df['url'].tolist()

    # Use ThreadPoolExecutor to check URLs concurrently
    reachable_mask = []
    with ThreadPoolExecutor(max_threads) as executor:
        future_to_url = {executor.submit(check_url_availability, url): url for url in urls}

        # Collect results as the futures complete
        for future in as_completed(future_to_url):
            url = future_to_url[future]
            try:
                is_available = future.result()
                reachable_mask.append(is_available)
                print(f"URL: {url} | Available: {is_available}")
            except Exception as e:
                reachable_mask.append(False)
                print(f"Error checking URL {url}: {e}")

    # Filter rows based on reachable URLs
    df['is_available'] = reachable_mask

    # Save the updated DataFrame
    try:
        df.to_csv(output_file, index=False)
        print(f"\nUpdated dataset saved to {output_file}")
    except Exception as e:
        print(f"Error saving output file: {e}")

def main():
    input_file = "urlpart1.csv"  # Input file path
    output_file = "filtered_urls_1_1.csv"  # Output file path

    filter_unreachable_urls(input_file, output_file)

if __name__ == "__main__":
    main()


URL: https://www.manoramaonline.com/news/latest-news/2020/09/16/india-covid-19-tally-crosses-50-lakh-mark.html | Available: True
URL: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36713 | Available: False
URL: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36715 | Available: False
URL: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36709 | Available: False
URL: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36712 | Available: False
URL: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36708 | Available: False
URL: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36714 | Available: False
URL: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36710 | Available: False
URL: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36707 | Available: False
URL: https://flutrackers.com/forum/forum/local-regional-communities-and-organizations/cidrap/893149-cidrap-india-s-covid-19-total-tops-5-million-as-cases-rise-in-europ

In [4]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from readability import Document
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
import argostranslate.package
import argostranslate.translate
from langdetect import detect

# Global counters with thread safety
success_count = 0
failure_count = 0
lock = Lock()

language_translations = {}

argostranslate.package.update_package_index()
available_packages = argostranslate.package.get_available_packages()

# Initialize and configure Argos Translate
def initialize_argos_translate(from_code):
    """
    Initializes Argos Translate by downloading and installing necessary language packages.
    """

    to_code = "en"      # Translate to English
    if from_code in language_translations:
        return language_translations[from_code]

    package_to_install = next(
        filter(
            lambda x: x.from_code == from_code and x.to_code == to_code, available_packages
        )
    )
    if package_to_install:
        argostranslate.package.install_from_path(package_to_install.download())
        language_translations[from_code] = argostranslate.translate  # Save translation object for reuse
        print("Argos Translate package installed successfully.")
        return language_translations[from_code]
    else:
        print("No Argos Translate package available for the specified language pair.")

# Translate using Argos Translate
def detect_and_translate_to_english(text):
    """
    Detects the language of the given text and translates it to English if necessary.
    """
    try:
        lang = detect(text)
        print(lang)
        if lang == 'en':
            return text  # No need to translate
        else:
            print(f"Translating from {lang} to English...")

            translation_function = initialize_argos_translate(lang)
            if translation_function:
                return translation_function.translate(text, lang, "en")
            else:
                return "Translation failed"
    except Exception as e:
        print(f"Error during translation: {e}")
        return "Translation failed"

def fetch_main_content(url, timeout=1.2):
    """
    Fetches and extracts the main content of a webpage using readability and BeautifulSoup.
    Returns the main content as plain text or specific flags for errors.
    If a 4xx error occurs, returns "ClientError".
    """
    try:
        response = requests.get(url, timeout=timeout)
        response.raise_for_status()  # Raise HTTP error for bad responses (4xx or 5xx)

        # Parse the page using BeautifulSoup and readability
        doc = Document(response.text)
        main_content_html = doc.summary()
        main_content_text = doc.title() + "\n\n" + BeautifulSoup(main_content_html, 'html.parser').get_text()
        # print(main_content_text)

        # Detect and translate if not in English
        return detect_and_translate_to_english(main_content_text.strip())
    except requests.exceptions.HTTPError as e:
        if 400 <= e.response.status_code < 600:
            return "ClientError"  # Flag for client errors
        else:
            return "Unknown"
    except Exception:
        return "Unknown"  # Return "Unknown" for other errors

def process_url(url):
    """
    Processes a single URL and updates the global counters for success or failure.
    """
    global success_count, failure_count
    content = fetch_main_content(url)
    # print(content)
    with lock:  # Lock to safely update global counters
        if content == "Unknown" or content == "ClientError":
            failure_count += 1
        else:
            success_count += 1
        print(f"Processed: {url} | Success: {success_count} | Failed: {failure_count}")
    return content

def process_dataset(input_file, output_file, max_threads=9):
    """
    Reads a dataset, extracts main content for URLs using multithreading, 
    removes rows with client errors (4xx), and writes updated data to a new file.
    """
    global success_count, failure_count
    try:
        df = pd.read_csv(input_file)
        df = df[df['is_available'] == True]
    except Exception as e:
        print(f"Error reading input file: {e}")
        return

    if 'url' not in df.columns:
        print("Dataset does not contain a 'url' column.")
        return

    urls = df['url'].tolist()
    contents = [None] * len(urls)  # Initialize a list to store content in order

    # Use ThreadPoolExecutor for parallel processing
    with ThreadPoolExecutor(max_threads) as executor:
        # Submit tasks with their indices to ensure order
        future_to_index = {executor.submit(process_url, url): i for i, url in enumerate(urls)}

        for future in as_completed(future_to_index):
            index = future_to_index[future]
            try:
                contents[index] = future.result()  # Store the result at the correct index
            except Exception as e:
                contents[index] = "Unknown"  # Assign "Unknown" for failures
                with lock:  # Safely update failure count
                    failure_count += 1
                print(f"Error processing URL: {urls[index]} | {e}")

    # Add extracted content as a new column
    df['extracted'] = contents

    # Save updated dataset
    try:
        df.to_csv(output_file, index=False)
        print(f"\nUpdated dataset saved to {output_file}")
    except Exception as e:
        print(f"Error saving output file: {e}")

    # Print final summary
    print("\nFinal Summary:")
    print(f"Total URLs: {len(urls)}")
    print(f"Successfully processed: {success_count}")
    print(f"Failed to process: {failure_count}")


def main():
    input_file = "part1.csv"  # Input file path
    output_file = "argos_translate_1.csv"  # Output file path


    process_dataset(input_file, output_file)

if __name__ == "__main__":
    main()

Processed: https://aawsat.com/home/article/3575126/%25D8%25A7%25D9%2584%25D8%25B5%25D9%258A%25D9%2586-%25D8%25AA%25D8%25B3%25D8%25AC%25D9%2584-%25D8%25A3%25D8%25B9%25D9%2584%25D9%2589-%25D8%25A5%25D8%25B5%25D8%25A7%25D8%25A8%25D8%25A7%25D8%25AA-%25D8%25A8%25D9%2581%25D9%258A%25D8%25B1%25D9%2588%25D8%25B3-%25D9%2583%25D9%2588%25D8%25B1%25D9%2588%25D9%2586%25D8%25A7-%25D9%2585%25D9%2586%25D8%25B0-%25D8%25B9%25D8%25A7%25D9%2585%25D9%258A%25D9%2586 | Success: 0 | Failed: 1
Error during translation: Need to load profiles.
Processed: https://zonadeprensard.com/los-temores-sobre-la-escasez-de-pollo-y-huevos-en-los-ee-uu-antes-de-la-pascua-a-medida-que-se-propaga-la-gripe-aviar-hacen-que-los-precios-se-disparen/ | Success: 1 | Failed: 1
af
Translating from af to English...
Error during translation: 
Processed: https://zonadeprensard.com/los-temores-sobre-la-escasez-de-pollo-y-huevos-en-los-ee-uu-antes-de-la-pascua-a-medida-que-se-propaga-la-gripe-aviar-hacen-que-los-precios-se-disparen/ | Succ

  checkpoint = torch.load(filename, lambda storage, loc: storage)


Processed: https://www.foodsafetynews.com/2022/04/avian-flu-outbreaks-in-u-s-take-more-than-22-8-million-birds-in-24-states-so-far/ | Success: 10 | Failed: 4
Processed: https://www.bharian.com.my/dunia/eropah/2022/04/942881/kes-covid-19-babitkan-warga-emas-meningkat-di-england | Success: 10 | Failed: 5
ml
Translating from ml to English...
Error during translation: 
Processed: https://www.madhyamam.com/health/news/against-infectious-diseases-beware-dmo-973320 | Success: 11 | Failed: 5
pt
Translating from pt to English...
zh-cn
Translating from zh-cn to English...
Error during translation: 
Processed: http://www.nbd.com.cn/articles/2022-04-06/2204363.html | Success: 12 | Failed: 5
Processed: https://zeenews.india.com/bengali/photos/india-logs-795-new-covid-19-cases-and-58-deaths-in-the-last-24-hours-427229 | Success: 12 | Failed: 6
Argos Translate package installed successfully.


  checkpoint = torch.load(filename, lambda storage, loc: storage)


Processed: https://www.pecontracoronavirus.pe.gov.br/boletim-secretaria-estadual-de-saude-novo-coronavirus-196/ | Success: 12 | Failed: 7
Processed: https://www.france24.com/es/minuto-a-minuto/20220406-china-registra-un-r%25C3%25A9cord-de-casos-diarios-de-covid-19-con-m%25C3%25A1s-de-20-000 | Success: 12 | Failed: 8
Processed: https://www.cronista.com/informacion-gral/gripe-hubo-un-brote-de-influenza-pero-que-pasa-con-el-covid/ | Success: 12 | Failed: 9
en
Processed: https://www.wyso.org/local-and-statewide-news/2022-04-04/highly-contagious-avian-flu-strain-detected-in-central-ohio-chicken-flock | Success: 13 | Failed: 9
en
Processed: https://www.abc.net.au/news/2022-04-06/covid-19-case-numbers-from-around-the-states-and-territories/100968984 | Success: 14 | Failed: 9
en
Processed: https://www.abc.net.au/news/2022-04-06/covid-19-case-numbers-from-around-the-states-and-territories/100968984 | Success: 15 | Failed: 9
vi
Translating from vi to English...
Error during translation: 
Process

error getting summary: 
Traceback (most recent call last):
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 213, in summary
    self._html(True)
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 148, in _html
    self.html = self._parse(self.input)
                ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 157, in _parse
    doc, self.encoding = build_doc(input)
                         ^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\htmls.py", line 21, in build_doc
    doc = lxml.html.document_fromstring(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\lxml\html\__init__.py", line 738, in document_fromstring
    raise etree.ParserError(
lxml.etree.ParserError: Document is empty


Processed: https://news.ch7.com/detail/596201 | Success: 3988 | Failed: 2171
uk
Translating from uk to English...
Processed: https://www.telemundo47.com/noticias/local/confirman-cuatro-muertes-vinculadas-al-brote-de-la-enfermedad-del-legionario-en-hogar-de-ancianos-de-manhattan/2325010/ | Success: 3988 | Failed: 2172
Processed: https://www.naidunia.com/madhya-pradesh/gwalior-increasing-outbreak-of-dengue-in-gwalior-fogging-is-not-happening-in-the-city-mosquitoes-are-growing-7838867 | Success: 3989 | Failed: 2172
Processed: https://www.vanguardngr.com/2022/09/gombe-declares-cholera-outbreak-records-10-deaths/ | Success: 3989 | Failed: 2173
Processed: https://www.yahoo.com/video/nys-health-officials-4-deaths-035725198.html | Success: 3989 | Failed: 2174
hi
Translating from hi to English...
vi
Translating from vi to English...
Error during translation: 
Processed: https://suckhoedoisong.vn/so-ca-mac-sot-xuat-huyet-tai-quang-binh-tang-cao-ghi-nhan-hon-3-nghin-ca-169220923153653026.htm | Su

error getting summary: 
Traceback (most recent call last):
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 213, in summary
    self._html(True)
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 148, in _html
    self.html = self._parse(self.input)
                ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 157, in _parse
    doc, self.encoding = build_doc(input)
                         ^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\htmls.py", line 21, in build_doc
    doc = lxml.html.document_fromstring(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\lxml\html\__init__.py", line 738, in document_fromstring
    raise etree.ParserError(
lxml.etree.ParserError: Document is empty


Processed: https://laodongthudo.vn/quang-binh-ghi-nhan-hon-3-nghin-ca-mac-sot-xuat-huyet-146462.html | Success: 3994 | Failed: 2183
vi
Translating from vi to English...
Error during translation: 
Processed: https://laodongthudo.vn/quang-binh-ghi-nhan-hon-3-nghin-ca-mac-sot-xuat-huyet-146462.html | Success: 3995 | Failed: 2183
vi
Translating from vi to English...
Error during translation: 
Processed: https://laodongthudo.vn/quang-binh-ghi-nhan-hon-3-nghin-ca-mac-sot-xuat-huyet-146462.html | Success: 3996 | Failed: 2183
vi
Translating from vi to English...
Error during translation: 
Processed: https://laodongthudo.vn/quang-binh-ghi-nhan-hon-3-nghin-ca-mac-sot-xuat-huyet-146462.html | Success: 3997 | Failed: 2183
vi
Translating from vi to English...
Error during translation: 
Processed: https://laodongthudo.vn/quang-binh-ghi-nhan-hon-3-nghin-ca-mac-sot-xuat-huyet-146462.html | Success: 3998 | Failed: 2183
vi
Translating from vi to English...
Error during translation: 
Processed: https://l

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from readability import Document
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
from googletrans import Translator
from langdetect import detect

# Global counters with thread safety
success_count = 0
failure_count = 0
lock = Lock()

translator = Translator()

# Translate using Google Translate
def detect_and_translate_to_english(text):
    """
    Detects the language of the given text and translates it to English if necessary.
    """
    try:
        detected = translator.detect(text)
        lang_detected = detected.lang  # Access the detected language code
        print(f"Detected language: {lang_detected}")
        if lang_detected == 'en':
            return text  # No need to translate
        else:
            print(f"Translating from {lang_detected} to English...")
            translated_text = translator.translate(text, src=lang_detected, dest='en').text
            return translated_text
    except Exception as e:
        print(f"Error during translation: {e}")
        return "Translation failed"

def fetch_main_content(url, timeout=1.2):
    """
    Fetches and extracts the main content of a webpage using readability and BeautifulSoup.
    Returns the main content as plain text or specific flags for errors.
    If a 4xx error occurs, returns "ClientError".
    """
    try:
        response = requests.get(url, timeout=timeout)
        response.raise_for_status()  # Raise HTTP error for bad responses (4xx or 5xx)

        # Parse the page using BeautifulSoup and readability
        doc = Document(response.text)
        main_content_html = doc.summary()
        main_content_text = doc.title() + "\n\n" + BeautifulSoup(main_content_html, 'html.parser').get_text()


        # Detect and translate if not in English
        return detect_and_translate_to_english(main_content_text.strip())
    except requests.exceptions.HTTPError as e:
        if 400 <= e.response.status_code < 600:
            return "ClientError"  # Flag for client errors
        else:
            return "Unknown"
    except Exception:
        return "Unknown"  # Return "Unknown" for other errors

def process_url(url):
    """
    Processes a single URL and updates the global counters for success or failure.
    """
    global success_count, failure_count
    content = fetch_main_content(url)
    # print(content)
    with lock:  # Lock to safely update global counters
        if content == "Unknown" or content == "ClientError":
            failure_count += 1
        else:
            success_count += 1
        print(f"Processed: {url} | Success: {success_count} | Failed: {failure_count}")
    return content

def process_dataset(input_file, output_file, max_threads=9):
    """
    Reads a dataset, extracts main content for URLs using multithreading, 
    removes rows with client errors (4xx), and writes updated data to a new file.
    """
    global success_count, failure_count
    try:
        df = pd.read_csv(input_file)
    except Exception as e:
        print(f"Error reading input file: {e}")
        return

    if 'url' not in df.columns:
        print("Dataset does not contain a 'url' column.")
        return

    urls = df['url'].tolist()
    contents = []

    # Use ThreadPoolExecutor for parallel processing
    with ThreadPoolExecutor(max_threads) as executor:
        future_to_url = {executor.submit(process_url, url): url for url in urls}

        for future in as_completed(future_to_url):
            try:
                contents.append(future.result())
            except Exception as e:
                contents.append("Unknown")
                with lock:  # Safely update failure count
                    failure_count += 1
                print(f"Error processing URL: {future_to_url[future]} | {e}")

    # Add extracted content as a new column
    df['extracted'] = contents

    # Save updated dataset
    try:
        df.to_csv(output_file, index=False)
        print(f"\nUpdated dataset saved to {output_file}")
    except Exception as e:
        print(f"Error saving output file: {e}")

    # Print final summary
    print("\nFinal Summary:")
    print(f"Total URLs: {len(urls)}")
    print(f"Successfully processed: {success_count}")
    print(f"Failed to process: {failure_count}")

def main():
    input_file = "filter_1.csv"  # Input file path
    output_file = "googletranslate.csv"  # Output file path

    process_dataset(input_file, output_file)

if __name__ == "__main__":
    main()


Processed: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36707 | Success: 0 | Failed: 1
Processed: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36713 | Success: 0 | Failed: 2
Processed: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36708 | Success: 0 | Failed: 3
Processed: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36709 | Success: 0 | Failed: 4
Processed: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36714 | Success: 0 | Failed: 5
Detected language: ml
Translating from ml to English...
Processed: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36712 | Success: 0 | Failed: 6
Processed: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36710 | Success: 0 | Failed: 7
Error during translation: the JSON object must be str, bytes or bytearray, not NoneTypeProcessed: https://www.manoramaonline.com/news/latest-news/2020/09/16/india-covid-19-tally-crosses-50-lakh-mark.html | Success: 1 | Failed: 7

Processed: htt

error getting summary: 
Traceback (most recent call last):
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 213, in summary
    self._html(True)
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 148, in _html
    self.html = self._parse(self.input)
                ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 157, in _parse
    doc, self.encoding = build_doc(input)
                         ^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\htmls.py", line 21, in build_doc
    doc = lxml.html.document_fromstring(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\lxml\html\__init__.py", line 738, in document_fromstring
    raise etree.ParserError(
lxml.etree.ParserError: Document is empty


Processed: http://wakeweekly.com/stories/facility-reports-2nd-outbreak,223261 | Success: 848 | Failed: 797
Processed: https://chicago.suntimes.com/2020/12/30/22206622/colorado-guardsman-1st-reported-us-case-virus-variant | Success: 848 | Failed: 798
Processed: http://www.chinanews.com/gj/2020/10-24/9321187.shtml | Success: 848 | Failed: 799
Processed: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=36983 | Success: 848 | Failed: 800
Processed: https://www.promedmail.org/promed-post/?id=7971718 | Success: 848 | Failed: 801
Processed: https://www.promedmail.org/promed-post/?id=8025563 | Success: 848 | Failed: 802
Processed: https://www.promedmail.org/promed-post/?id=7915703 | Success: 848 | Failed: 803
Detected language: id
Translating from id to English...
Detected language: en
Processed: https://www.poultryworld.net/Health/Articles/2020/12/Thousands-of-turkeys-culled-as-bird-flu-strikes-again-678185E/ | Success: 849 | Failed: 803
Processed: http://hindi.cri.cn/news/southasia/

error getting summary: 
Traceback (most recent call last):
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 213, in summary
    self._html(True)
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 148, in _html
    self.html = self._parse(self.input)
                ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\readability.py", line 157, in _parse
    doc, self.encoding = build_doc(input)
                         ^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\readability\htmls.py", line 21, in build_doc
    doc = lxml.html.document_fromstring(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\VJ\anaconda3\envs\workshop\Lib\site-packages\lxml\html\__init__.py", line 738, in document_fromstring
    raise etree.ParserError(
lxml.etree.ParserError: Document is empty


Processed: http://news.cnwest.com/bwyc/a/2021/02/08/19492028.html | Success: 1443 | Failed: 1491
Processed: http://www.businessworld.in/article/COVID-19-patients-at-higher-risk-of-tuberculosis-Russian-Health-Ministry-warns/08-02-2021-374939 | Success: 1443 | Failed: 1492
Processed: https://www.patrika.com/surat-news/corona-infection-free-dadra-nagar-haveli-6677240/ | Success: 1443 | Failed: 1493
Processed: http://www.xinhuanet.com/politics/2021-02/05/c_1127067935.htm | Success: 1444 | Failed: 1493
Detected language: hi
Translating from hi to English...
Processed: http://outbreaknewstoday.com/montana-reports-1st-uk-covid-19-variant-b-1-1-7-cases-75696/ | Success: 1444 | Failed: 1494
Processed: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=38025 | Success: 1444 | Failed: 1495
Processed: http://outbreaks.globalincidentmap.com/eventdetail.php?ID=38023 | Success: 1444 | Failed: 1496
Processed: https://www.amarujala.com/uttar-pradesh/siddharthnagar/a-corona-infected-was-found-in-