In [3]:
import feedparser

In [4]:
?feedparser

In [11]:
APJL_URL = "./0004-637X"



dict_keys(['feed', 'entries', 'bozo', 'encoding', 'version', 'namespaces'])

In [14]:
feed = feedparser.parse(APJL_URL)
feed.keys()
entrylist = []
for ent in feed["entries"]:
    entrylist.append(ent)

In [16]:
len(entrylist)

23

In [17]:
ent = entrylist[0]

In [18]:
ent.keys()

dict_keys(['id', 'title', 'title_detail', 'links', 'link', 'summary', 'summary_detail', 'authors', 'author', 'author_detail', 'updated', 'updated_parsed', 'dc_source', 'iop_authors', 'iop_citation', 'iop_pdf', 'prism_coverdisplaydate', 'prism_number', 'prism_volume', 'prism_publicationname', 'prism_startingpage', 'prism_doi'])

In [19]:
ent['id']

'http://iopscience.iop.org/0004-637X/869/1/24'

In [20]:
ent['title']

'Comprehensive Measurements of the Volume-phase Holographic Gratings for the Dark Energy\r\nSpectroscopic Instrument'

In [33]:
ent['summary'].replace("\n", " ") \
    .replace("\r", " ")

'The Dark Energy Spectroscopic Instrument (DESI) is a Stage IV ground-based dark energy experiment  that will be employed on the Mayall 4 m Telescope to study the expansion history of the universe. In  the era of massively multiplexed fiber-fed spectrographs, DESI will push the boundaries of fiber  spectroscopy with a design capable of taking 5000 simultaneous spectra over 360 to 980 nm. The  instrument utilizes a suite of three-channel spectrographs, where volume-phase holographic (VPH)  gratings provide dispersions. Thirty-six VPH gratings were produced and their performances were  evaluated at the Lawrence Berkeley National Laboratory. We present the design and the evaluation  tests for the production run of the VPH gratings, verifying the incidence angle, area-weighted  efficiency, and wavefront errors (WFEs). We also present the specialized test set-up developed  on-site to assess the grating performances. Measurements of the VPH gratings show high consistency  in area-weighted ef

In [133]:
from flask import Flask, jsonify, Response
import feedparser
from html.parser import HTMLParser
import dateutil.parser
import datetime
import logging
import json

def simplecleanup(string):
    return string.replace("\n", " ") \
            .replace("\r", " ") \
            .replace("∼", "around ")

class ArticleEntry_APJL:
    """
        a class for an article entry
    """
    Nauthor_display = 3 # only show the first three authors

    def __init__(self, entry, updateDate_str, index=0):
        self.entry = entry
        self.updateDate_str = updateDate_str
        self.index = index
        self.id = str(uuid.uuid3(uuid.NAMESPACE_URL, self.entry['id']))
        self.title = simplecleanup(self.entry['title'])
        self.link = self.entry['link']
        self.updateDate = self.generate_updateDate()

        self.summary = self.process_summary(self.entry['summary'])

        self.generate_author_list()
        self.maintext = self.generate_mainText_from_summary()

    def generate_updateDate(self):
        updateDate_datetime = dateutil.parser.parse(self.updateDate_str)
        updateDate_datetime += datetime.timedelta(seconds=int(self.index))

        return updateDate_datetime.strftime("%Y-%m-%dT%H:%M:%S.%fZ")

    def generate_author_list(self):
        self.author_list_full = self.entry['author'].split(", ")
        self.Nauthor = len(self.author_list_full)

        na_short = min(self.Nauthor, self.Nauthor_display) # show 3 or less authors
        self.author_list_short = []
        for i in range(na_short):
            self.author_list_short.append(self.author_list_full[i])

        if na_short < self.Nauthor:
            self.author_list_short.append("et al")
    
    def generate_mainText_from_summary(self):
        """
            convert the "summary" from arxiv rss (v2) into mainText format.
        """
        
        maintext = self.title + "by " + ", ".join(self.author_list_short) + ". " \
            + self.summary

        return maintext

    def process_summary(self, raw_summary):
        return raw_summary.replace("$"," ") \
            .replace("\r", " ") \
            .replace("Msun", " solar masses") \
            .replace("M_odot", "solar masses") \
            .replace("<", "less than") \
            .replace(">", "greater than") \
            .replace("\n", " ") \
            .replace("~", " about ") \
            .replace("\\rm", " ") \
            .replace("\\", " ") \
            .replace("sim", "about") \
            .replace("mu m", "micro-meters") \
            .replace("_{", " ") \
            .replace("}", " ") \
            .replace("_", " ") \


def construct_alexa_dict(entry, updateDate_str, index):
    """
        1st version without author names..
        
        Example
    """
    this = ArticleEntry_APJL(entry, updateDate_str, index)
    data_dict = {}
    data_dict["uid"] = this.id
    data_dict["updateDate"] = this.updateDate
    data_dict["titleText"] = this.title
    data_dict["redirectionUrl"] = this.link
    data_dict["mainText"] = this.maintext
    
    return data_dict

In [134]:
for i in range(len(entrylist)):
    print(entrylist[i]['date'])
    print(simplecleanup(entrylist[i]['title']), "\n")
    print(simplecleanup(entrylist[i]['summary']), "\n")

2018-12-07T00:00:00Z
Comprehensive Measurements of the Volume-phase Holographic Gratings for the Dark Energy  Spectroscopic Instrument 

The Dark Energy Spectroscopic Instrument (DESI) is a Stage IV ground-based dark energy experiment  that will be employed on the Mayall 4 m Telescope to study the expansion history of the universe. In  the era of massively multiplexed fiber-fed spectrographs, DESI will push the boundaries of fiber  spectroscopy with a design capable of taking 5000 simultaneous spectra over 360 to 980 nm. The  instrument utilizes a suite of three-channel spectrographs, where volume-phase holographic (VPH)  gratings provide dispersions. Thirty-six VPH gratings were produced and their performances were  evaluated at the Lawrence Berkeley National Laboratory. We present the design and the evaluation  tests for the production run of the VPH gratings, verifying the incidence angle, area-weighted  efficiency, and wavefront errors (WFEs). We also present the specialized test s

In [135]:
ent['title']

'Comprehensive Measurements of the Volume-phase Holographic Gratings for the Dark Energy\r\nSpectroscopic Instrument'

In [136]:


this = ArticleEntry_APJL(ent, ent['date'], 1)

In [137]:
this.id

'0e942dea-fa05-303d-8098-ec420d33a32c'

In [138]:
this.author_list_short

['Yuzo Ishikawa', 'Martin M. Sirk', 'Jerry Edelstein', 'et al']

In [139]:
def main():
    feed = feedparser.parse(APJL_URL)
    entrylist = []
    for ent in feed["entries"]:
        entrylist.append(ent)

    json_list = [ construct_alexa_dict(entrylist[i], entrylist[i]['updated'], i) for i in range(5)]
    output = json.dumps(json_list)
    return output

In [140]:
main()

'[{"uid": "0e942dea-fa05-303d-8098-ec420d33a32c", "updateDate": "2018-12-07T00:00:00.000000Z", "titleText": "Comprehensive Measurements of the Volume-phase Holographic Gratings for the Dark Energy  Spectroscopic Instrument", "redirectionUrl": "http://iopscience.iop.org/0004-637X/869/1/24", "mainText": "Comprehensive Measurements of the Volume-phase Holographic Gratings for the Dark Energy  Spectroscopic Instrumentby Yuzo Ishikawa, Martin M. Sirk, Jerry Edelstein, et al. The Dark Energy Spectroscopic Instrument (DESI) is a Stage IV ground-based dark energy experiment  that will be employed on the Mayall 4 m Telescope to study the expansion history of the universe. In  the era of massively multiplexed fiber-fed spectrographs, DESI will push the boundaries of fiber  spectroscopy with a design capable of taking 5000 aboutultaneous spectra over 360 to 980 nm. The  instrument utilizes a suite of three-channel spectrographs, where volume-phase holographic (VPH)  gratings provide dispersions. 

In [124]:
ent

{'id': 'http://iopscience.iop.org/0004-637X/869/1/24',
 'title': 'Comprehensive Measurements of the Volume-phase Holographic Gratings for the Dark Energy\r\nSpectroscopic Instrument',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': '',
  'value': 'Comprehensive Measurements of the Volume-phase Holographic Gratings for the Dark Energy\r\nSpectroscopic Instrument'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'http://iopscience.iop.org/0004-637X/869/1/24'}],
 'link': 'http://iopscience.iop.org/0004-637X/869/1/24',
 'summary': 'The Dark Energy Spectroscopic Instrument (DESI) is a Stage IV ground-based dark energy experiment\r\nthat will be employed on the Mayall 4 m Telescope to study the expansion history of the universe. In\r\nthe era of massively multiplexed fiber-fed spectrographs, DESI will push the boundaries of fiber\r\nspectroscopy with a design capable of taking 5000 simultaneous spectra over 360 to 980 nm. The\r\ninstrument utilizes a

In [109]:
import uuid

In [115]:
str(uuid.uuid3(uuid.NAMESPACE_URL, ent['id']))

'0e942dea-fa05-303d-8098-ec420d33a32c'