# Installing Dependencies

First we have to install the required packages

In [1]:
!pip install -r requirements.txt



Here we added src to our PYTHONPATH if it is not already in it.

In [3]:
import os

# Get the current PYTHONPATH or set it to an empty string if it doesn't exist.
pythonpath = os.environ.get('PYTHONPATH', '')

# Only add the 'src' path if it doesn't already exist in the PYTHONPATH.
if 'src' not in pythonpath.split(':'):
    if pythonpath:
        os.environ['PYTHONPATH'] += ':src'
    else:
        os.environ['PYTHONPATH'] = 'src'

print(os.environ.get('PYTHONPATH', 'PYTHONPATH not set'))


src


Now we install the refined package.

In [4]:
!pip install 'https://github.com/amazon-science/ReFinED/archive/refs/tags/V1.zip'

Collecting https://github.com/amazon-science/ReFinED/archive/refs/tags/V1.zip
  Using cached https://github.com/amazon-science/ReFinED/archive/refs/tags/V1.zip
  Preparing metadata (setup.py) ... [?25ldone


# Testing the Model with WikiData entity set

We will download a pretrained model, note that we will download precomputed text decriptions to save processing time. We will also download the models so we do not have to download them again.

In [1]:
from refined.inference.processor import Refined
refined = Refined.from_pretrained(model_name='wikipedia_model',
                                  entity_set="wikipedia",
                                  use_precomputed_descriptions = True,
                                 download_files = True)
print("Done loading in Model")

Some weights of the model checkpoint at /Users/lukas/.cache/refined/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at /Users/lukas/.cache/refined/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight

Done loading in Model


## Testing some smaller pieces of text

In [2]:
spans = refined.process_text("England won the FIFA World Cup in 1966.")

print(spans)

[['England', Entity(wikidata_entity_id=Q47762, wikipedia_entity_title=England national football team), 'ORG'], ['FIFA World Cup', Entity(wikidata_entity_id=Q19317, wikipedia_entity_title=FIFA World Cup), 'EVENT']]




In [3]:
spans = refined.process_text("President Roosevelt created the New Deal.")

print(spans)

[['Roosevelt', Entity(wikidata_entity_id=Q8007, wikipedia_entity_title=Franklin D. Roosevelt), 'PERSON'], ['New Deal', Entity(wikidata_entity_id=Q186356, wikipedia_entity_title=New Deal), None]]


In [40]:
spans = refined.process_text("Martin Luther King Jr. was a leading figure in the civil rights movement.")

print(spans)

[['Martin Luther King Jr.', Entity(wikidata_entity_id=Q8027, wikipedia_entity_title=Martin Luther King Jr.), 'PERSON'], ['civil rights movement', Entity(wikidata_entity_id=Q48537, wikipedia_entity_title=Civil rights movement), None]]


In [41]:
spans = refined.process_text("George Bush was President")

print(spans)

[['George Bush', Entity(wikidata_entity_id=Q23505, wikipedia_entity_title=George H. W. Bush), 'PERSON']]


Testing on cases from refined_demo.py

In [43]:
from refined.data_types.base_types import Span

# Difficult disambiguation example
text = 'Michael Jordan is a Professor of Computer Science at UC Berkeley.'
spans = refined.process_text(text)
print('\n' + '****' * 10 + '\n')
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')


# Example where entity mention spans are provided
text = "Joe Biden was born in Scranton."
spans = refined.process_text(text, spans=[Span(text='Joe Biden', start=0, ln=10),
                                          Span(text='Scranton', start=22, ln=8)])
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with numeric value
text = 'The population of England is 55,000,000.'
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with currency
text = "The net worth of Elon Musk is $200B."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with time
text = "It takes 60 minutes bake a potato."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with an ordinal
text = "The first book in the Harry Potter series is Harry Potter and the Philosopher's Stone."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with age
text = "Barack Obama was 48 years old when he became president of the United States."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with percentage
text = "The rural population of England was 10% in 2020."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with height (quantity)
text = "Joe Biden is 1.82m tall."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with Wikidata entity that is not in Wikipedia
text = "Andreas Hecht is a professor."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Batched example
texts = ["Andreas Hecht is a professor.", "Michael Jordan is a Professor of Computer Science at UC Berkeley."]
docs = refined.process_text_batch(texts)
for doc in docs:
    print(f'Document: {doc.text}, spans: {doc.spans}')
print('\n' + '****' * 10 + '\n\n')

# Batched example with spans
texts = ["Joe Biden was born in Scranton."] * 2
# deep copy the Spans otherwise in-place modifications can cause issues
spanss = [[Span(text='Joe Biden', start=0, ln=10), Span(text='Scranton', start=22, ln=8)] for _ in range(2)]
docs = refined.process_text_batch(texts=texts, spanss=spanss)
for doc in docs:
    print(f'Document: {doc.text}, spans: {doc.spans}')
print('\n' + '****' * 10 + '\n')


****************************************

Michael Jordan is a Professor of Computer Science at UC Berkeley.
[['Michael Jordan', Entity(wikidata_entity_id=Q41421, wikipedia_entity_title=Michael Jordan), 'PERSON'], ['UC Berkeley', Entity(wikidata_entity_id=Q168756, wikipedia_entity_title=University of California, Berkeley), 'ORG']]

****************************************

Joe Biden was born in Scranton.
[['Joe Biden', Entity(wikidata_entity_id=Q6279, wikipedia_entity_title=Joe Biden), 'PERSON'], ['Scranton', Entity(wikidata_entity_id=Q271395, wikipedia_entity_title=Scranton, Pennsylvania), 'FAC']]

****************************************

The population of England is 55,000,000.
[['population', Entity(wikidata_entity_id=Q2625603, wikipedia_entity_title=Population), None], ['England', Entity(wikidata_entity_id=Q21, wikipedia_entity_title=England), 'GPE']]

****************************************

The net worth of Elon Musk is $200B.
[['Elon Musk', Entity(wikidata_entity_id=Q317521, w

## Testing on longer text

Here I test the model on a segment from a recent UCLA press release: https://newsroom.ucla.edu/releases/gene-block-to-step-down-as-ucla-chancellor

In [44]:
text = """
In 2006, soon after receiving the news that he would become UCLA’s next chancellor, Gene Block and his wife, Carol, finished their meal with a fortune cookie. The message? “In order to cross a great river, you have to take a great leap.”

Having leapt across countless rivers since, Block will step down from his role as UCLA’s top executive on July 31, 2024, after 17 years of transformative leadership.

In an era when most university leaders’ tenures last less than six years, Block has become an elder statesman in higher education and a guiding force not only for UCLA but for California and the nation. The choice to call it a day, he admitted, was bittersweet.

“This decision was by no means an easy one,” he wrote today in a message to the Bruin community. “But I have the greatest confidence in UCLA’s future, and I feel that the time is right — for me, for my family and for our campus.”

University of California President Michael Drake praised Block’s service and contributions to the campus.

“For nearly two decades, Chancellor Block has helped UCLA grow into a powerhouse of excellence, opportunity and access. He has been a dear friend and a dedicated partner in tackling many of the university’s challenges,” Drake said. “Chancellor Block’s efforts to forge new community partnerships, enhance the student experience and grow the research enterprise will benefit UCLA and the state of California for years to come.”

When he arrived on campus in August 2007 after nearly three decades at the University of Virginia, Block insisted that his goal for UCLA was “to make a great place even better.”

He has delivered on that promise, and then some. Under his stewardship, research funding has doubled, and the campus’s annual budget has increased from $4 billion to nearly $11 billion. Enrollment has grown by 9,000 students, and the campus has added more than a dozen new housing facilities.

The Block legacy

Visit our tribute site to view photos and video, see a year-by-year timeline of Gene Block’s tenure and leave a message for the chancellor.

The campus has risen from No. 4 to No. 1 among public universities in U.S. News & World Report’s annual rankings, and UCLA Health is consistently rated among the nation’s top five hospitals. In athletics, Bruin teams captured 21 NCAA championships during Block’s tenure, boosting UCLA’s total to 121 — second-most among all colleges.

The Centennial Campaign for UCLA, at the time of its launch the largest fundraising effort ever undertaken by a public university, raised nearly $5.5 billion between 2014 and 2019, helping to bolster student scholarships, faculty support, research, capital projects and the campus’s endowment.

UCLA acquired new properties in the South Bay and downtown Los Angeles to better serve students and strengthen ties to the region’s diverse communities, opened nearly 25 newly constructed buildings on campus and in Westwood, and grew from the seventh-largest to the fourth-largest employer in Los Angeles County.

And UCLA Health expanded access to its world-class care with the addition of some 200 clinics throughout Southern California, the acquisition of a Mid-Wilshire property for a new state-of-the-art psychiatric hospital and the launch of initiatives including the Homeless Healthcare Collaborative, which provides free mobile medical and behavioral care to unhoused people in Los Angeles.

Service has always been one of the three components of UCLA’s mission, along with research and education. But the ways in which students, staff and faculty serve the Los Angeles and global communities — through volunteerism and community-engaged scholarship — took on new dimensions during Block’s tenure. In 2009, his team initiated UCLA Volunteer Day, with more than 4,000 Bruins fanning out across Los Angeles for a day of hands-on community service. The program has become a fall ritual, with some 8,000 Bruins participating each year in the nation’s largest service project for new university students.
"""


In [45]:
%time
spans = refined.process_text(text, apply_class_check = True)

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 18.8 µs


In [46]:
spans

[['UCLA', Entity(wikidata_entity_id=Q174710, wikipedia_entity_title=University of California, Los Angeles), 'ORG'],
 ['Gene Block', Entity(wikidata_entity_id=Q5531151, wikipedia_entity_title=Gene D. Block), 'PERSON'],
 ['Carol', Entity not linked to a knowledge base, 'PERSON'],
 ['Block', Entity(wikidata_entity_id=Q5531151, wikipedia_entity_title=Gene D. Block), 'PERSON'],
 ['UCLA', Entity(wikidata_entity_id=Q174710, wikipedia_entity_title=University of California, Los Angeles), 'ORG'],
 ['Block', Entity(wikidata_entity_id=Q5531151, wikipedia_entity_title=Gene D. Block), 'PERSON'],
 ['UCLA', Entity(wikidata_entity_id=Q174710, wikipedia_entity_title=University of California, Los Angeles), 'ORG'],
 ['California', Entity(wikidata_entity_id=Q99, wikipedia_entity_title=California), 'GPE'],
 ['Bruin', Entity not linked to a knowledge base, None],
 ['UCLA', Entity(wikidata_entity_id=Q174710, wikipedia_entity_title=University of California, Los Angeles), 'ORG'],
 ['University of California', E

Testing the time needed to process a segment Biden's 2023 State of the Union. It contains around 4500 words and has many entities to link. I took the excerpt from here https://www.whitehouse.gov/briefing-room/speeches-remarks/2023/02/07/remarks-of-president-joe-biden-state-of-the-union-address-as-prepared-for-delivery/#:~:text=We%20are%20the%20only%20country,ever%20created%20in%20four%20years.

In [47]:
text = """
Mr. Speaker. Madam Vice President. Our First Lady and Second Gentleman.

Members of Congress and the Cabinet. Leaders of our military.

Mr. Chief Justice, Associate Justices, and retired Justices of the Supreme Court.

And you, my fellow Americans.

I start tonight by congratulating the members of the 118th Congress and the new Speaker of the House, Kevin McCarthy.

Mr. Speaker, I look forward to working together.

I also want to congratulate the new leader of the House Democrats and the first Black House Minority Leader in history, Hakeem Jeffries.

Congratulations to the longest serving Senate Leader in history, Mitch McConnell.

And congratulations to Chuck Schumer for another term as Senate Majority Leader, this time with an even bigger majority.

And I want to give special recognition to someone who I think will be considered the greatest Speaker in the history of this country, Nancy Pelosi.

The story of America is a story of progress and resilience. Of always moving forward. Of never giving up.

A story that is unique among all nations.

We are the only country that has emerged from every crisis stronger than when we entered it.

That is what we are doing again.

Two years ago, our economy was reeling.

As I stand here tonight, we have created a record 12 million new jobs, more jobs created in two years than any president has ever created in four years.

Two years ago, COVID had shut down our businesses, closed our schools, and robbed us of so much.

Today, COVID no longer controls our lives.

And two years ago, our democracy faced its greatest threat since the Civil War.

Today, though bruised, our democracy remains unbowed and unbroken.

As we gather here tonight, we are writing the next chapter in the great American story, a story of progress and resilience. When world leaders ask me to define America, I define our country in one word: Possibilities.

You know, we’re often told that Democrats and Republicans can’t work together.

But over these past two years, we proved the cynics and the naysayers wrong.

Yes, we disagreed plenty. And yes, there were times when Democrats had to go it alone.

But time and again, Democrats and Republicans came together.

Came together to defend a stronger and safer Europe.

Came together to pass a once-in-a-generation infrastructure law, building bridges to connect our nation and people.

Came together to pass one of the most significant laws ever, helping veterans exposed to toxic burn pits.

In fact, I signed over 300 bipartisan laws since becoming President. From reauthorizing the Violence Against Women Act, to the Electoral Count Reform Act, to the Respect for Marriage Act that protects the right to marry the person you love.

To my Republican friends, if we could work together in the last Congress, there is no reason we can’t work together in this new Congress.

The people sent us a clear message. Fighting for the sake of fighting, power for the sake of power, conflict for the sake of conflict, gets us nowhere.

And that’s always been my vision for our country.

To restore the soul of the nation.

To rebuild the backbone of America, the middle class.

To unite the country.

We’ve been sent here to finish the job.

For decades, the middle class was hollowed out.

Too many good-paying manufacturing jobs moved overseas. Factories at home closed down.

Once-thriving cities and towns became shadows of what they used to be.

And along the way, something else was lost.

Pride. That sense of self-worth.

I ran for President to fundamentally change things, to make sure the economy works for everyone so we can all feel pride in what we do.

To build an economy from the bottom up and the middle out, not from the top down. Because when the middle class does well, the poor have a ladder up and the wealthy still do very well. We all do well.

As my Dad used to say, a job is about a lot more than a paycheck. It’s about your dignity. It’s about respect. It’s about being able to look your kid in the eye and say, “Honey –it’s going to be OK,” and mean it.

So, let’s look at the results. Unemployment rate at 3.4%, a 50-year low. Near record low unemployment for Black and Hispanic workers.

We’ve already created 800,000 good-paying manufacturing jobs, the fastest growth in 40 years.

Where is it written that America can’t lead the world in manufacturing again?

For too many decades, we imported products and exported jobs.

Now, thanks to all we’ve done, we’re exporting American products and creating American jobs.

Inflation has been a global problem because of the pandemic that disrupted supply chains and Putin’s war that disrupted energy and food supplies.

But we’re better positioned than any country on Earth.

We have more to do, but here at home, inflation is coming down.

Here at home, gas prices are down $1.50 a gallon since their peak.

Food inflation is coming down.

Inflation has fallen every month for the last six months while take home pay has gone up.

Additionally, over the last two years, a record 10 million Americans applied to start a new small business.

Every time somebody starts a small business, it’s an act of hope.

And the Vice President will continue her work to ensure more small businesses can access capital and the historic laws we enacted.

Standing here last year, I shared with you a story of American genius and possibility.

Semiconductors, the small computer chips the size of your fingertip that power everything from cellphones to automobiles, and so much more. These chips were invented right here in America.

America used to make nearly 40% of the world’s chips.

But in the last few decades, we lost our edge and we’re down to producing only 10%. We all saw what happened during the pandemic when chip factories overseas shut down.

Today’s automobiles need up to 3,000 chips each, but American automakers couldn’t make enough cars because there weren’t enough chips.

Car prices went up. So did everything from refrigerators to cellphones.

We can never let that happen again.

That’s why we came together to pass the bipartisan CHIPS and Science Act.

We’re making sure the supply chain for America begins in America.

We’ve already created 800,000 manufacturing jobs even without this law.

With this new law, we will create hundreds of thousands of new jobs across the country.

That’s going to come from companies that have announced more than $300 billion in investments in American manufacturing in the last two years.

Outside of Columbus, Ohio, Intel is building semiconductor factories on a thousand acres – a literal field of dreams.

That’ll create 10,000 jobs. 7,000 construction jobs. 3,000 jobs once the factories are finished.

Jobs paying $130,000 a year, and many don’t require a college degree.

Jobs where people don’t have to leave home in search of opportunity.

And it’s just getting started.

Think about the new homes, new small businesses, and so much more that will come to life.

Talk to mayors and Governors, Democrats and Republicans, and they’ll tell you what this means to their communities.

We’re seeing these fields of dreams transform the heartland.

But to maintain the strongest economy in the world, we also need the best infrastructure in the world.

We used to be #1 in the world in infrastructure, then we fell to #13th.

Now we’re coming back because we came together to pass the Bipartisan Infrastructure Law, the largest investment in infrastructure since President Eisenhower’s Interstate Highway System.

Already, we’ve funded over 20,000 projects, including at major airports from Boston to Atlanta to Portland.

These projects will put hundreds of thousands of people to work rebuilding our highways, bridges, railroads, tunnels, ports and airports, clean water, and high-speed internet across America.

Urban. Suburban. Rural. Tribal.

And we’re just getting started. I sincerely thank my Republican friends who voted for the law.

And to my Republican friends who voted against it but still ask to fund projects in their districts, don’t worry.

I promised to be the president for all Americans.

We’ll fund your projects. And I’ll see you at the ground-breaking.

This law will help further unite all of America.

Major projects like the Brent Spence bridge between Kentucky and Ohio over the Ohio River. Built 60 years ago. Badly in need of repairs.

One of the nation’s most congested freight routes carrying $2 billion worth of freight every day. Folks have been talking about fixing it for decades, but we’re finally going to get it done.

I went there last month with Democrats and Republicans from both states to deliver $1.6 billion for this project.

While I was there, I met an ironworker named Sara, who is here tonight.

For 30 years, she’s been a proud member of Ironworkers Local 44, known as the “cowboys of the sky” who built the Cincinnati skyline.

Sara said she can’t wait to be ten stories above the Ohio River building that new bridge. That’s pride.

That’s what we’re also building – Pride.

We’re also replacing poisonous lead pipes that go into 10 million homes and 400,000 schools and childcare centers, so every child in America can drink clean water.

We’re making sure that every community has access to affordable, high-speed internet.

No parent should have to drive to a McDonald’s parking lot so their kid can do their homework online.

And when we do these projects, we’re going to Buy American.

Buy American has been the law of the land since 1933. But for too long, past administrations have found ways to get around it.

Not anymore.

Tonight, I’m also announcing new standards to require all construction materials used in federal infrastructure projects to be made in America.

American-made lumber, glass, drywall, fiber optic cables.

And on my watch, American roads, American bridges, and American highways will be made with American products.

My economic plan is about investing in places and people that have been forgotten. Amid the economic upheaval of the past four decades, too many people have been left behind or treated like they’re invisible.

Maybe that’s you, watching at home.

You remember the jobs that went away. And you wonder whether a path even exists anymore for you and your children to get ahead without moving away.

I get it.

That’s why we’re building an economy where no one is left behind.

Jobs are coming back, pride is coming back, because of the choices we made in the last two years. This is a blue-collar blueprint to rebuild America and make a real difference in your lives.

For example, too many of you lay in bed at night staring at the ceiling, wondering what will happen if your spouse gets cancer, your child gets sick, or if something happens to you.

Will you have the money to pay your medical bills? Will you have to sell the house?

I get it. With the Inflation Reduction Act that I signed into law, we’re taking on powerful interests to bring your health care costs down so you can sleep better at night.

You know, we pay more for prescription drugs than any major country on Earth.

For example, one in ten Americans has diabetes.

Every day, millions need insulin to control their diabetes so they can stay alive. Insulin has been around for 100 years. It costs drug companies just $10 a vial to make.

But, Big Pharma has been unfairly charging people hundreds of dollars – and making record profits.

Not anymore.

We capped the cost of insulin at $35 a month for seniors on Medicare.

But there are millions of other Americans who are not on Medicare, including 200,000 young people with Type I diabetes who need insulin to save their lives.

Let’s finish the job this time.

Let’s cap the cost of insulin at $35 a month for every American who needs it.

This law also caps out-of-pocket drug costs for seniors on Medicare at a maximum $2,000 per year when there are in fact many drugs, like expensive cancer drugs, that can cost up to $10,000, $12,000, and $14,000 a year.

If drug prices rise faster than inflation, drug companies will have to pay Medicare back the difference.

And we’re finally giving Medicare the power to negotiate drug prices. Bringing down prescription drug costs doesn’t just save seniors money.

It will cut the federal deficit, saving tax payers hundreds of billions of dollars on the prescription drugs the government buys for Medicare.

Why wouldn’t we want to do that?

Now, some members here are threatening to repeal the Inflation Reduction Act.

Make no mistake, if you try to do anything to raise the cost of prescription drugs, I will veto it.

I’m pleased to say that more Americans have health insurance now than ever in history.

A record 16 million people are enrolled under the Affordable Care Act.

Thanks to the law I signed last year, millions are saving $800 a year on their premiums.

But the way that law was written, that benefit expires after 2025.

Let’s finish the job, make those savings permanent, and expand coverage to those left off Medicaid.

Look, the Inflation Reduction Act is also the most significant investment ever to tackle the climate crisis.

Lowering utility bills, creating American jobs, and leading the world to a clean energy future.

I’ve visited the devastating aftermaths of record floods and droughts, storms and wildfires.

In addition to emergency recovery from Puerto Rico to Florida to Idaho, we are rebuilding for the long term.

New electric grids able to weather the next major storm.

Roads and water systems to withstand the next big flood.

Clean energy to cut pollution and create jobs in communities too often left behind.

We’re building 500,000 electric vehicle charging stations installed across the country by tens of thousands of IBEW workers.

And helping families save more than $1,000 a year with tax credits for the purchase of electric vehicles and energy-efficient appliances.

Historic conservation efforts to be responsible stewards of our lands.

Let’s face reality.

The climate crisis doesn’t care if your state is red or blue. It is an existential threat.

We have an obligation to our children and grandchildren to confront it. I’m proud of how America is at last stepping up to the challenge.

But there’s so much more to do.

We will finish the job.

And we pay for these investments in our future by finally making the wealthiest and the biggest corporations begin to pay their fair share.

I’m a capitalist. But just pay your fair share.

And I think a lot of you at home agree with me that our present tax system is simply unfair.

The idea that in 2020, 55 of the biggest companies in America made $40 billion in profits and paid zero in federal income taxes?

That’s simply not fair.

But now, because of the law I signed, billion-dollar companies have to pay a minimum of 15%.

Just 15%.

That’s less than a nurse pays. Let me be clear.

Under my plan, nobody earning less than $400,000 a year will pay an additional penny in taxes.

Nobody. Not one penny.

But there’s more to do.

Let’s finish the job. Reward work, not just wealth. Pass my proposal for a billionaire minimum tax.

Because no billionaire should pay a lower tax rate than a school teacher or a firefighter.

You may have noticed that Big Oil just reported record profits.

Last year, they made $200 billion in the midst of a global energy crisis.

It’s outrageous.

They invested too little of that profit to increase domestic production and keep gas prices down.

Instead, they used those record profits to buy back their own stock, rewarding their CEOs and shareholders.

Corporations ought to do the right thing.

That’s why I propose that we quadruple the tax on corporate stock buybacks to encourage long term investments instead.

They will still make a considerable profit.

Let’s finish the job and close the loopholes that allow the very wealthy to avoid paying their taxes.

Instead of cutting the number of audits of wealthy tax payers, I signed a law that will reduce the deficit by $114 billion by cracking down on wealthy tax cheats.

That’s being fiscally responsible.

In the last two years, my administration cut the deficit by more than $1.7 trillion – the largest deficit reduction in American history.

Under the previous administration, America’s deficit went up four years in a row.

Because of those record deficits, no president added more to the national debt in any four years than my predecessor.

Nearly 25% of the entire national debt, a debt that took 200 years to accumulate, was added by that administration alone.

How did Congress respond to all that debt?

They lifted the debt ceiling three times without preconditions or crisis.

They paid America’s bills to prevent economic disaster for our country.

Tonight, I’m asking this Congress to follow suit.

Let us commit here tonight that the full faith and credit of the United States of America will never, ever be questioned.

Some of my Republican friends want to take the economy hostage unless I agree to their economic plans. All of you at home should know what their plans are.

Instead of making the wealthy pay their fair share, some Republicans want Medicare and Social Security to sunset every five years.

That means if Congress doesn’t vote to keep them, those programs will go away.

Other Republicans say if we don’t cut Social Security and Medicare, they’ll let America default on its debt for the first time in our history.

I won’t let that happen.

Social Security and Medicare are a lifeline for millions of seniors.

Americans have been paying into them with every single paycheck since they started working.

So tonight, let’s all agree to stand up for seniors. Stand up and show them we will not cut Social Security. We will not cut Medicare.

Those benefits belong to the American people. They earned them.

If anyone tries to cut Social Security, I will stop them. And if anyone tries to cut Medicare, I will stop them.

I will not allow them to be taken away.

Not today. Not tomorrow. Not ever.

Next month when I offer my fiscal plan, I ask my Republican friends to offer their plan.

We can sit down together and discuss both plans together.

My plan will lower the deficit by $2 trillion.

I won’t cut a single Social Security or Medicare benefit.

In fact, I will extend the Medicare Trust Fund by at least two decades.

I will not raise taxes on anyone making under $400,000 a year. And I will pay for the ideas I’ve talked about tonight by making the wealthy and big corporations begin to pay their fair share.

Look, here’s the deal. Big corporations aren’t just taking advantage of the tax code. They’re taking advantage of you, the American consumer.

Here’s my message to all of you out there: I have your back. We’re already preventing insurance companies from sending surprise medical bills, stopping 1 million surprise bills a month.

We’re protecting seniors’ lives and life savings by cracking down on nursing homes that commit fraud, endanger patient safety, or prescribe drugs they don’t need.

Millions of Americans can now save thousands of dollars because they can finally get hearing aids over-the-counter without a prescription.

Capitalism without competition is not capitalism. It is exploitation.

Last year I cracked down on foreign shipping companies that were making you pay higher prices for everyday goods coming into our country.

I signed a bipartisan bill that cut shipping costs by 90%, helping American farmers, businesses, and consumers.

Let’s finish the job.

Pass bipartisan legislation to strengthen antitrust enforcement and prevent big online platforms from giving their own products an unfair advantage.

My administration is also taking on “junk” fees, those hidden surcharges too many businesses use to make you pay more.

For example, we’re making airlines show you the full ticket price upfront and refund your money if your flight is cancelled or delayed.

We’ve reduced exorbitant bank overdraft fees, saving consumers more than $1 billion a year.

We’re cutting credit card late fees by 75%, from $30 to $8.

Junk fees may not matter to the very wealthy, but they matter to most folks in homes like the one I grew up in. They add up to hundreds of dollars a month.

They make it harder for you to pay the bills or afford that family trip.

I know how unfair it feels when a company overcharges you and gets away with it.

Not anymore.

We’ve written a bill to stop all that. It’s called the Junk Fee Prevention Act.

We’ll ban surprise “resort fees” that hotels tack on to your bill. These fees can cost you up to $90 a night at hotels that aren’t even resorts.

We’ll make cable internet and cellphone companies stop charging you up to $200 or more when you decide to switch to another provider.

We’ll cap service fees on tickets to concerts and sporting events and make companies disclose all fees upfront.

And we’ll prohibit airlines from charging up to $50 roundtrip for families just to sit together.

Baggage fees are bad enough – they can’t just treat your child like a piece of luggage.

Americans are tired of being played for suckers.

Pass the Junk Fee Prevention Act so companies stop ripping us off.

For too long, workers have been getting stiffed.

Not anymore.

We’re beginning to restore the dignity of work.

For example, 30 million workers had to sign non-compete agreements when they took a job. So a cashier at a burger place can’t cross the street to take the same job at another burger place to make a couple bucks more.

Not anymore.

We’re banning those agreements so companies have to compete for workers and pay them what they’re worth.

I’m so sick and tired of companies breaking the law by preventing workers from organizing.

Pass the PRO Act because workers have a right to form a union. And let’s guarantee all workers a living wage.

Let’s also make sure working parents can afford to raise a family with sick days, paid family and medical leave, and affordable child care that will enable millions more people to go to work.

Let’s also restore the full Child Tax Credit, which gave tens of millions of parents some breathing room and cut child poverty in half.

Remember when we passed the Fair Labor Standards Act, it was called “The minimum wage law” for a reason. Workers were guaranteed the minimum they could be paid.

Guess what? It’s long past time to raise the minimum wage to $15 an hour. No one, no one working 40 hours a week should live below the poverty line.

Look, I know some of you think I’m too focused on worker rights.

Too concerned about how to get people the health care they need.

Too committed to reducing costs for families.

Too much on the side of the consumer, the student, the worker.

Too focused on making life easier for the nurse on the night shift and the teacher preparing for class.

To that I say, “Thank you.”

Thank you. That’s my job. That’s my job.

I was elected to stand up for you – to stand up for the hard-working people that built this country.

And that’s exactly what I’m going to do. That’s exactly what I’m going to do.

Let’s also finish the job of delivering health care as a right to all Americans.

We now have historic coverage, reaching more Americans than ever before.

Over 16 million Americans are now enrolled under the Affordable Care Act.

More than have ever been covered in the history of America.

Hundreds of millions of prescriptions have been filled at no cost to American seniors.

And I will not go back.

Not only that, we are delivering the most significant steps to lower drug prices in decades.

Our new law lowers prescription drug costs by 50% by allowing Medicare to negotiate prices directly with the drug companies.

I know you think I’m kidding. I’m not. We’re also putting a cap on out-of-pocket expenses for seniors on Medicare. No senior should pay more than $2,000 a year for their prescriptions.

Millions of Americans will save thousands of dollars a year.

Under the new law, prescription drugs will finally be more affordable. They’ll be available. They’ll be accessible.

We will deliver lower drug costs to all Americans. Lower premiums for you. Lower deductibles for you.

Lower copays and lower out-of-pocket costs. That’s not a political statement. That’s a fact.

So, let’s finish the job and end health care for you. By lowering health care costs for all Americans. Let’s finish the job, end health care for you.

And let’s also provide health care for more Americans.

That’s why I’m calling on Congress to expand Medicaid to cover the 2 million Americans who are uninsured and eligible.

We’re going to lower prescription drug costs by taking on the greed of the pharmaceutical industry and add dental, vision, and hearing coverage to Medicare.

And let’s lower the age of Medicare eligibility to 60 years old, giving 23 million seniors the peace of mind that comes with Medicare.

One more thing, let’s also lower the cost of hearing aids, which are essential, as well as the other incredible health care products, because they’ve been out of reach for so many of our seniors.

I know I talk a lot about workers and their rights and how to get more money in their pockets. That’s because I grew up in a family of workers. My dad had a blue-collar job in Scranton, Pennsylvania.

He used to say, “Joey, a job is about a lot more than a paycheck. It’s about your dignity. It’s about respect. It’s about being able to look your child in the eye and say, ‘Honey, it’s going to be okay.’”

That’s what I believe.

Let’s keep going. Let’s keep moving. Let’s finish the job.

You know, we all want the same thing.

You know, we all want the same thing. A better life for ourselves and our kids. Safer streets. A cleaner planet. A more just world.

We all want to make sure every child has the same opportunities in this country we all love.

Over the past two years, we’ve already done so much.

Over the past two years, we’ve already done so much. We passed a law to reduce the price of prescription drugs. We’re putting our kids in better schools. We’re protecting the air we breathe and the water we drink.

We’re delivering assistance to millions of Americans who need it most. We’re supporting small businesses and creating jobs. We’re building a fairer and more just economy.

We’ve rebuilt our alliances, restored our standing in the world, and confronted the challenges of our time.

And now, together, we’re going to finish the job.

Thank you. God bless you. And may God bless the United States of America.
"""


In [48]:
%time
spans = refined.process_text(text, apply_class_check = True)

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 6.91 µs


In [49]:
spans

[['Congress', Entity(wikidata_entity_id=Q11268, wikipedia_entity_title=United States Congress), 'ORG'],
 ['Cabinet', Entity(wikidata_entity_id=Q639738, wikipedia_entity_title=Cabinet of the United States), 'ORG'],
 ['the Supreme Court', Entity(wikidata_entity_id=Q11201, wikipedia_entity_title=Supreme Court of the United States), 'ORG'],
 ['Americans', Entity(wikidata_entity_id=Q846570, wikipedia_entity_title=Americans), 'ORG'],
 ['Congress', Entity(wikidata_entity_id=Q11268, wikipedia_entity_title=United States Congress), 'ORG'],
 ['House', Entity(wikidata_entity_id=Q11701, wikipedia_entity_title=United States House of Representatives), 'ORG'],
 ['Kevin McCarthy', Entity(wikidata_entity_id=Q766866, wikipedia_entity_title=Kevin McCarthy (California politician)), 'PERSON'],
 ['House', Entity(wikidata_entity_id=Q11701, wikipedia_entity_title=United States House of Representatives), 'ORG'],
 ['Democrats', Entity(wikidata_entity_id=Q29552, wikipedia_entity_title=Democratic Party (United Sta

## Performing Evaluation of model with Wikipedia entity set

In [51]:
from refined.evaluation.evaluation import eval_all

results_numbers = eval_all(refined=refined, el=True)

Evaluating on AIDA: 0it [00:00, ?it/s]

*****************************


Dataset name: AIDA

****************
************
f1: 0.7826
accuracy: 0.8121
gold_recall: 0.9785
p: 0.7552
r: 0.8121
num_gold_spans: 4464
************
*******MD*****
MD_f1: 0.9295, (p: 0.9238, r: 0.9352)
*****************

*****************************




Evaluating on MSNBC: 0it [00:00, ?it/s]

*****************************


Dataset name: MSNBC

****************
************
f1: 0.7343
accuracy: 0.7727
gold_recall: 0.9954
p: 0.6996
r: 0.7727
num_gold_spans: 651
************
*******MD*****
MD_f1: 0.7971, (p: 0.7649, r: 0.8322)
*****************

*****************************




# Testing WikiData model

In [2]:
from refined.inference.processor import Refined
refined = Refined.from_pretrained(model_name='wikipedia_model',
                                  entity_set="wikidata",
                                  use_precomputed_descriptions = True,
                                 download_files = True)
print("Done loading in Model")

Some weights of the model checkpoint at /Users/lukas/.cache/refined/roberta-base were not used when initializing RobertaModel: ['lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at /Users/lukas/.cache/refined/roberta-base were not used when initializing RobertaModel: ['lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight

Done loading in Model


## Testing on short text

In [None]:
spans = refined.process_text("England won the FIFA World Cup in 1966.")

print(spans)

In [28]:
spans = refined.process_text("President Roosevelt created the New Deal.")

print(spans)

[['Roosevelt', Entity(wikidata_entity_id=Q8007, wikipedia_entity_title=Franklin D. Roosevelt), 'PERSON'], ['New Deal', Entity(wikidata_entity_id=Q186356, wikipedia_entity_title=New Deal), None]]


In [29]:
spans = refined.process_text("Martin Luther King Jr. was a leading figure in the civil rights movement.")

print(spans)

[['Martin Luther King Jr.', Entity(wikidata_entity_id=Q8027, wikipedia_entity_title=Martin Luther King Jr.), 'PERSON'], ['civil rights movement', Entity(wikidata_entity_id=Q48537, wikipedia_entity_title=Civil rights movement), None]]


In [30]:
spans = refined.process_text("George Bush was President")

print(spans)

[['George Bush', Entity(wikidata_entity_id=Q23505, wikipedia_entity_title=George H. W. Bush), 'PERSON']]


Testing on cases from refined_demo.py

In [32]:
from refined.data_types.base_types import Span

# Difficult disambiguation example
text = 'Michael Jordan is a Professor of Computer Science at UC Berkeley.'
spans = refined.process_text(text)
print('\n' + '****' * 10 + '\n')
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')


# Example where entity mention spans are provided
text = "Joe Biden was born in Scranton."
spans = refined.process_text(text, spans=[Span(text='Joe Biden', start=0, ln=10),
                                          Span(text='Scranton', start=22, ln=8)])
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with numeric value
text = 'The population of England is 55,000,000.'
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with currency
text = "The net worth of Elon Musk is $200B."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with time
text = "It takes 60 minutes bake a potato."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with an ordinal
text = "The first book in the Harry Potter series is Harry Potter and the Philosopher's Stone."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with age
text = "Barack Obama was 48 years old when he became president of the United States."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with percentage
text = "The rural population of England was 10% in 2020."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with height (quantity)
text = "Joe Biden is 1.82m tall."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Example with Wikidata entity that is not in Wikipedia
text = "Andreas Hecht is a professor."
spans = refined.process_text(text)
print(text)
print(spans)
print('\n' + '****' * 10 + '\n')

# Batched example
texts = ["Andreas Hecht is a professor.", "Michael Jordan is a Professor of Computer Science at UC Berkeley."]
docs = refined.process_text_batch(texts)
for doc in docs:
    print(f'Document: {doc.text}, spans: {doc.spans}')
print('\n' + '****' * 10 + '\n\n')

# Batched example with spans
texts = ["Joe Biden was born in Scranton."] * 2
# deep copy the Spans otherwise in-place modifications can cause issues
spanss = [[Span(text='Joe Biden', start=0, ln=10), Span(text='Scranton', start=22, ln=8)] for _ in range(2)]
docs = refined.process_text_batch(texts=texts, spanss=spanss)
for doc in docs:
    print(f'Document: {doc.text}, spans: {doc.spans}')
print('\n' + '****' * 10 + '\n')


****************************************

Michael Jordan is a Professor of Computer Science at UC Berkeley.
[['Michael Jordan', Entity(wikidata_entity_id=Q41421, wikipedia_entity_title=Michael Jordan), 'PERSON'], ['UC Berkeley', Entity(wikidata_entity_id=Q168756, wikipedia_entity_title=University of California, Berkeley), 'ORG']]

****************************************

Joe Biden was born in Scranton.
[['Joe Biden', Entity(wikidata_entity_id=Q6279, wikipedia_entity_title=Joe Biden), 'PERSON'], ['Scranton', Entity(wikidata_entity_id=Q271395, wikipedia_entity_title=Scranton, Pennsylvania), 'FAC']]

****************************************

The population of England is 55,000,000.
[['population', Entity(wikidata_entity_id=Q2625603, wikipedia_entity_title=Population), None], ['England', Entity(wikidata_entity_id=Q21, wikipedia_entity_title=England), 'GPE']]

****************************************

The net worth of Elon Musk is $200B.
[['Elon Musk', Entity(wikidata_entity_id=Q317521, w

## Testing on longer text

Here I test the model on a segment from a recent UCLA press release: https://newsroom.ucla.edu/releases/gene-block-to-step-down-as-ucla-chancellor

In [14]:
text = """
In 2006, soon after receiving the news that he would become UCLA’s next chancellor, Gene Block and his wife, Carol, finished their meal with a fortune cookie. The message? “In order to cross a great river, you have to take a great leap.”

Having leapt across countless rivers since, Block will step down from his role as UCLA’s top executive on July 31, 2024, after 17 years of transformative leadership.

In an era when most university leaders’ tenures last less than six years, Block has become an elder statesman in higher education and a guiding force not only for UCLA but for California and the nation. The choice to call it a day, he admitted, was bittersweet.

“This decision was by no means an easy one,” he wrote today in a message to the Bruin community. “But I have the greatest confidence in UCLA’s future, and I feel that the time is right — for me, for my family and for our campus.”

University of California President Michael Drake praised Block’s service and contributions to the campus.

“For nearly two decades, Chancellor Block has helped UCLA grow into a powerhouse of excellence, opportunity and access. He has been a dear friend and a dedicated partner in tackling many of the university’s challenges,” Drake said. “Chancellor Block’s efforts to forge new community partnerships, enhance the student experience and grow the research enterprise will benefit UCLA and the state of California for years to come.”

When he arrived on campus in August 2007 after nearly three decades at the University of Virginia, Block insisted that his goal for UCLA was “to make a great place even better.”

He has delivered on that promise, and then some. Under his stewardship, research funding has doubled, and the campus’s annual budget has increased from $4 billion to nearly $11 billion. Enrollment has grown by 9,000 students, and the campus has added more than a dozen new housing facilities.

The Block legacy

Visit our tribute site to view photos and video, see a year-by-year timeline of Gene Block’s tenure and leave a message for the chancellor.

The campus has risen from No. 4 to No. 1 among public universities in U.S. News & World Report’s annual rankings, and UCLA Health is consistently rated among the nation’s top five hospitals. In athletics, Bruin teams captured 21 NCAA championships during Block’s tenure, boosting UCLA’s total to 121 — second-most among all colleges.

The Centennial Campaign for UCLA, at the time of its launch the largest fundraising effort ever undertaken by a public university, raised nearly $5.5 billion between 2014 and 2019, helping to bolster student scholarships, faculty support, research, capital projects and the campus’s endowment.

UCLA acquired new properties in the South Bay and downtown Los Angeles to better serve students and strengthen ties to the region’s diverse communities, opened nearly 25 newly constructed buildings on campus and in Westwood, and grew from the seventh-largest to the fourth-largest employer in Los Angeles County.

And UCLA Health expanded access to its world-class care with the addition of some 200 clinics throughout Southern California, the acquisition of a Mid-Wilshire property for a new state-of-the-art psychiatric hospital and the launch of initiatives including the Homeless Healthcare Collaborative, which provides free mobile medical and behavioral care to unhoused people in Los Angeles.

Service has always been one of the three components of UCLA’s mission, along with research and education. But the ways in which students, staff and faculty serve the Los Angeles and global communities — through volunteerism and community-engaged scholarship — took on new dimensions during Block’s tenure. In 2009, his team initiated UCLA Volunteer Day, with more than 4,000 Bruins fanning out across Los Angeles for a day of hands-on community service. The program has become a fall ritual, with some 8,000 Bruins participating each year in the nation’s largest service project for new university students.
"""


In [33]:
%time
spans = refined.process_text(text, apply_class_check = True)

CPU times: user 6 µs, sys: 2 µs, total: 8 µs
Wall time: 21 µs


In [16]:
spans

[['UCLA', Entity(wikidata_entity_id=Q174710, wikipedia_entity_title=University of California, Los Angeles), 'ORG'],
 ['Gene Block', Entity(wikidata_entity_id=Q5531151, wikipedia_entity_title=Gene D. Block), 'PERSON'],
 ['Carol', Entity not linked to a knowledge base, 'PERSON'],
 ['Block', Entity(wikidata_entity_id=Q5531151, wikipedia_entity_title=Gene D. Block), 'PERSON'],
 ['UCLA', Entity(wikidata_entity_id=Q174710, wikipedia_entity_title=University of California, Los Angeles), 'ORG'],
 ['Block', Entity(wikidata_entity_id=Q5531151, wikipedia_entity_title=Gene D. Block), 'PERSON'],
 ['UCLA', Entity(wikidata_entity_id=Q174710, wikipedia_entity_title=University of California, Los Angeles), 'ORG'],
 ['California', Entity(wikidata_entity_id=Q99, wikipedia_entity_title=California), 'GPE'],
 ['Bruin', Entity not linked to a knowledge base, None],
 ['UCLA', Entity(wikidata_entity_id=Q174710, wikipedia_entity_title=University of California, Los Angeles), 'ORG'],
 ['University of California', E

Testing the time needed to process a segment Biden's 2023 State of the Union. It contains around 4500 words and has many entities to link. I took the excerpt from here https://www.whitehouse.gov/briefing-room/speeches-remarks/2023/02/07/remarks-of-president-joe-biden-state-of-the-union-address-as-prepared-for-delivery/#:~:text=We%20are%20the%20only%20country,ever%20created%20in%20four%20years.

In [10]:
text = """
Mr. Speaker. Madam Vice President. Our First Lady and Second Gentleman.

Members of Congress and the Cabinet. Leaders of our military.

Mr. Chief Justice, Associate Justices, and retired Justices of the Supreme Court.

And you, my fellow Americans.

I start tonight by congratulating the members of the 118th Congress and the new Speaker of the House, Kevin McCarthy.

Mr. Speaker, I look forward to working together.

I also want to congratulate the new leader of the House Democrats and the first Black House Minority Leader in history, Hakeem Jeffries.

Congratulations to the longest serving Senate Leader in history, Mitch McConnell.

And congratulations to Chuck Schumer for another term as Senate Majority Leader, this time with an even bigger majority.

And I want to give special recognition to someone who I think will be considered the greatest Speaker in the history of this country, Nancy Pelosi.

The story of America is a story of progress and resilience. Of always moving forward. Of never giving up.

A story that is unique among all nations.

We are the only country that has emerged from every crisis stronger than when we entered it.

That is what we are doing again.

Two years ago, our economy was reeling.

As I stand here tonight, we have created a record 12 million new jobs, more jobs created in two years than any president has ever created in four years.

Two years ago, COVID had shut down our businesses, closed our schools, and robbed us of so much.

Today, COVID no longer controls our lives.

And two years ago, our democracy faced its greatest threat since the Civil War.

Today, though bruised, our democracy remains unbowed and unbroken.

As we gather here tonight, we are writing the next chapter in the great American story, a story of progress and resilience. When world leaders ask me to define America, I define our country in one word: Possibilities.

You know, we’re often told that Democrats and Republicans can’t work together.

But over these past two years, we proved the cynics and the naysayers wrong.

Yes, we disagreed plenty. And yes, there were times when Democrats had to go it alone.

But time and again, Democrats and Republicans came together.

Came together to defend a stronger and safer Europe.

Came together to pass a once-in-a-generation infrastructure law, building bridges to connect our nation and people.

Came together to pass one of the most significant laws ever, helping veterans exposed to toxic burn pits.

In fact, I signed over 300 bipartisan laws since becoming President. From reauthorizing the Violence Against Women Act, to the Electoral Count Reform Act, to the Respect for Marriage Act that protects the right to marry the person you love.

To my Republican friends, if we could work together in the last Congress, there is no reason we can’t work together in this new Congress.

The people sent us a clear message. Fighting for the sake of fighting, power for the sake of power, conflict for the sake of conflict, gets us nowhere.

And that’s always been my vision for our country.

To restore the soul of the nation.

To rebuild the backbone of America, the middle class.

To unite the country.

We’ve been sent here to finish the job.

For decades, the middle class was hollowed out.

Too many good-paying manufacturing jobs moved overseas. Factories at home closed down.

Once-thriving cities and towns became shadows of what they used to be.

And along the way, something else was lost.

Pride. That sense of self-worth.

I ran for President to fundamentally change things, to make sure the economy works for everyone so we can all feel pride in what we do.

To build an economy from the bottom up and the middle out, not from the top down. Because when the middle class does well, the poor have a ladder up and the wealthy still do very well. We all do well.

As my Dad used to say, a job is about a lot more than a paycheck. It’s about your dignity. It’s about respect. It’s about being able to look your kid in the eye and say, “Honey –it’s going to be OK,” and mean it.

So, let’s look at the results. Unemployment rate at 3.4%, a 50-year low. Near record low unemployment for Black and Hispanic workers.

We’ve already created 800,000 good-paying manufacturing jobs, the fastest growth in 40 years.

Where is it written that America can’t lead the world in manufacturing again?

For too many decades, we imported products and exported jobs.

Now, thanks to all we’ve done, we’re exporting American products and creating American jobs.

Inflation has been a global problem because of the pandemic that disrupted supply chains and Putin’s war that disrupted energy and food supplies.

But we’re better positioned than any country on Earth.

We have more to do, but here at home, inflation is coming down.

Here at home, gas prices are down $1.50 a gallon since their peak.

Food inflation is coming down.

Inflation has fallen every month for the last six months while take home pay has gone up.

Additionally, over the last two years, a record 10 million Americans applied to start a new small business.

Every time somebody starts a small business, it’s an act of hope.

And the Vice President will continue her work to ensure more small businesses can access capital and the historic laws we enacted.

Standing here last year, I shared with you a story of American genius and possibility.

Semiconductors, the small computer chips the size of your fingertip that power everything from cellphones to automobiles, and so much more. These chips were invented right here in America.

America used to make nearly 40% of the world’s chips.

But in the last few decades, we lost our edge and we’re down to producing only 10%. We all saw what happened during the pandemic when chip factories overseas shut down.

Today’s automobiles need up to 3,000 chips each, but American automakers couldn’t make enough cars because there weren’t enough chips.

Car prices went up. So did everything from refrigerators to cellphones.

We can never let that happen again.

That’s why we came together to pass the bipartisan CHIPS and Science Act.

We’re making sure the supply chain for America begins in America.

We’ve already created 800,000 manufacturing jobs even without this law.

With this new law, we will create hundreds of thousands of new jobs across the country.

That’s going to come from companies that have announced more than $300 billion in investments in American manufacturing in the last two years.

Outside of Columbus, Ohio, Intel is building semiconductor factories on a thousand acres – a literal field of dreams.

That’ll create 10,000 jobs. 7,000 construction jobs. 3,000 jobs once the factories are finished.

Jobs paying $130,000 a year, and many don’t require a college degree.

Jobs where people don’t have to leave home in search of opportunity.

And it’s just getting started.

Think about the new homes, new small businesses, and so much more that will come to life.

Talk to mayors and Governors, Democrats and Republicans, and they’ll tell you what this means to their communities.

We’re seeing these fields of dreams transform the heartland.

But to maintain the strongest economy in the world, we also need the best infrastructure in the world.

We used to be #1 in the world in infrastructure, then we fell to #13th.

Now we’re coming back because we came together to pass the Bipartisan Infrastructure Law, the largest investment in infrastructure since President Eisenhower’s Interstate Highway System.

Already, we’ve funded over 20,000 projects, including at major airports from Boston to Atlanta to Portland.

These projects will put hundreds of thousands of people to work rebuilding our highways, bridges, railroads, tunnels, ports and airports, clean water, and high-speed internet across America.

Urban. Suburban. Rural. Tribal.

And we’re just getting started. I sincerely thank my Republican friends who voted for the law.

And to my Republican friends who voted against it but still ask to fund projects in their districts, don’t worry.

I promised to be the president for all Americans.

We’ll fund your projects. And I’ll see you at the ground-breaking.

This law will help further unite all of America.

Major projects like the Brent Spence bridge between Kentucky and Ohio over the Ohio River. Built 60 years ago. Badly in need of repairs.

One of the nation’s most congested freight routes carrying $2 billion worth of freight every day. Folks have been talking about fixing it for decades, but we’re finally going to get it done.

I went there last month with Democrats and Republicans from both states to deliver $1.6 billion for this project.

While I was there, I met an ironworker named Sara, who is here tonight.

For 30 years, she’s been a proud member of Ironworkers Local 44, known as the “cowboys of the sky” who built the Cincinnati skyline.

Sara said she can’t wait to be ten stories above the Ohio River building that new bridge. That’s pride.

That’s what we’re also building – Pride.

We’re also replacing poisonous lead pipes that go into 10 million homes and 400,000 schools and childcare centers, so every child in America can drink clean water.

We’re making sure that every community has access to affordable, high-speed internet.

No parent should have to drive to a McDonald’s parking lot so their kid can do their homework online.

And when we do these projects, we’re going to Buy American.

Buy American has been the law of the land since 1933. But for too long, past administrations have found ways to get around it.

Not anymore.

Tonight, I’m also announcing new standards to require all construction materials used in federal infrastructure projects to be made in America.

American-made lumber, glass, drywall, fiber optic cables.

And on my watch, American roads, American bridges, and American highways will be made with American products.

My economic plan is about investing in places and people that have been forgotten. Amid the economic upheaval of the past four decades, too many people have been left behind or treated like they’re invisible.

Maybe that’s you, watching at home.

You remember the jobs that went away. And you wonder whether a path even exists anymore for you and your children to get ahead without moving away.

I get it.

That’s why we’re building an economy where no one is left behind.

Jobs are coming back, pride is coming back, because of the choices we made in the last two years. This is a blue-collar blueprint to rebuild America and make a real difference in your lives.

For example, too many of you lay in bed at night staring at the ceiling, wondering what will happen if your spouse gets cancer, your child gets sick, or if something happens to you.

Will you have the money to pay your medical bills? Will you have to sell the house?

I get it. With the Inflation Reduction Act that I signed into law, we’re taking on powerful interests to bring your health care costs down so you can sleep better at night.

You know, we pay more for prescription drugs than any major country on Earth.

For example, one in ten Americans has diabetes.

Every day, millions need insulin to control their diabetes so they can stay alive. Insulin has been around for 100 years. It costs drug companies just $10 a vial to make.

But, Big Pharma has been unfairly charging people hundreds of dollars – and making record profits.

Not anymore.

We capped the cost of insulin at $35 a month for seniors on Medicare.

But there are millions of other Americans who are not on Medicare, including 200,000 young people with Type I diabetes who need insulin to save their lives.

Let’s finish the job this time.

Let’s cap the cost of insulin at $35 a month for every American who needs it.

This law also caps out-of-pocket drug costs for seniors on Medicare at a maximum $2,000 per year when there are in fact many drugs, like expensive cancer drugs, that can cost up to $10,000, $12,000, and $14,000 a year.

If drug prices rise faster than inflation, drug companies will have to pay Medicare back the difference.

And we’re finally giving Medicare the power to negotiate drug prices. Bringing down prescription drug costs doesn’t just save seniors money.

It will cut the federal deficit, saving tax payers hundreds of billions of dollars on the prescription drugs the government buys for Medicare.

Why wouldn’t we want to do that?

Now, some members here are threatening to repeal the Inflation Reduction Act.

Make no mistake, if you try to do anything to raise the cost of prescription drugs, I will veto it.

I’m pleased to say that more Americans have health insurance now than ever in history.

A record 16 million people are enrolled under the Affordable Care Act.

Thanks to the law I signed last year, millions are saving $800 a year on their premiums.

But the way that law was written, that benefit expires after 2025.

Let’s finish the job, make those savings permanent, and expand coverage to those left off Medicaid.

Look, the Inflation Reduction Act is also the most significant investment ever to tackle the climate crisis.

Lowering utility bills, creating American jobs, and leading the world to a clean energy future.

I’ve visited the devastating aftermaths of record floods and droughts, storms and wildfires.

In addition to emergency recovery from Puerto Rico to Florida to Idaho, we are rebuilding for the long term.

New electric grids able to weather the next major storm.

Roads and water systems to withstand the next big flood.

Clean energy to cut pollution and create jobs in communities too often left behind.

We’re building 500,000 electric vehicle charging stations installed across the country by tens of thousands of IBEW workers.

And helping families save more than $1,000 a year with tax credits for the purchase of electric vehicles and energy-efficient appliances.

Historic conservation efforts to be responsible stewards of our lands.

Let’s face reality.

The climate crisis doesn’t care if your state is red or blue. It is an existential threat.

We have an obligation to our children and grandchildren to confront it. I’m proud of how America is at last stepping up to the challenge.

But there’s so much more to do.

We will finish the job.

And we pay for these investments in our future by finally making the wealthiest and the biggest corporations begin to pay their fair share.

I’m a capitalist. But just pay your fair share.

And I think a lot of you at home agree with me that our present tax system is simply unfair.

The idea that in 2020, 55 of the biggest companies in America made $40 billion in profits and paid zero in federal income taxes?

That’s simply not fair.

But now, because of the law I signed, billion-dollar companies have to pay a minimum of 15%.

Just 15%.

That’s less than a nurse pays. Let me be clear.

Under my plan, nobody earning less than $400,000 a year will pay an additional penny in taxes.

Nobody. Not one penny.

But there’s more to do.

Let’s finish the job. Reward work, not just wealth. Pass my proposal for a billionaire minimum tax.

Because no billionaire should pay a lower tax rate than a school teacher or a firefighter.

You may have noticed that Big Oil just reported record profits.

Last year, they made $200 billion in the midst of a global energy crisis.

It’s outrageous.

They invested too little of that profit to increase domestic production and keep gas prices down.

Instead, they used those record profits to buy back their own stock, rewarding their CEOs and shareholders.

Corporations ought to do the right thing.

That’s why I propose that we quadruple the tax on corporate stock buybacks to encourage long term investments instead.

They will still make a considerable profit.

Let’s finish the job and close the loopholes that allow the very wealthy to avoid paying their taxes.

Instead of cutting the number of audits of wealthy tax payers, I signed a law that will reduce the deficit by $114 billion by cracking down on wealthy tax cheats.

That’s being fiscally responsible.

In the last two years, my administration cut the deficit by more than $1.7 trillion – the largest deficit reduction in American history.

Under the previous administration, America’s deficit went up four years in a row.

Because of those record deficits, no president added more to the national debt in any four years than my predecessor.

Nearly 25% of the entire national debt, a debt that took 200 years to accumulate, was added by that administration alone.

How did Congress respond to all that debt?

They lifted the debt ceiling three times without preconditions or crisis.

They paid America’s bills to prevent economic disaster for our country.

Tonight, I’m asking this Congress to follow suit.

Let us commit here tonight that the full faith and credit of the United States of America will never, ever be questioned.

Some of my Republican friends want to take the economy hostage unless I agree to their economic plans. All of you at home should know what their plans are.

Instead of making the wealthy pay their fair share, some Republicans want Medicare and Social Security to sunset every five years.

That means if Congress doesn’t vote to keep them, those programs will go away.

Other Republicans say if we don’t cut Social Security and Medicare, they’ll let America default on its debt for the first time in our history.

I won’t let that happen.

Social Security and Medicare are a lifeline for millions of seniors.

Americans have been paying into them with every single paycheck since they started working.

So tonight, let’s all agree to stand up for seniors. Stand up and show them we will not cut Social Security. We will not cut Medicare.

Those benefits belong to the American people. They earned them.

If anyone tries to cut Social Security, I will stop them. And if anyone tries to cut Medicare, I will stop them.

I will not allow them to be taken away.

Not today. Not tomorrow. Not ever.

Next month when I offer my fiscal plan, I ask my Republican friends to offer their plan.

We can sit down together and discuss both plans together.

My plan will lower the deficit by $2 trillion.

I won’t cut a single Social Security or Medicare benefit.

In fact, I will extend the Medicare Trust Fund by at least two decades.

I will not raise taxes on anyone making under $400,000 a year. And I will pay for the ideas I’ve talked about tonight by making the wealthy and big corporations begin to pay their fair share.

Look, here’s the deal. Big corporations aren’t just taking advantage of the tax code. They’re taking advantage of you, the American consumer.

Here’s my message to all of you out there: I have your back. We’re already preventing insurance companies from sending surprise medical bills, stopping 1 million surprise bills a month.

We’re protecting seniors’ lives and life savings by cracking down on nursing homes that commit fraud, endanger patient safety, or prescribe drugs they don’t need.

Millions of Americans can now save thousands of dollars because they can finally get hearing aids over-the-counter without a prescription.

Capitalism without competition is not capitalism. It is exploitation.

Last year I cracked down on foreign shipping companies that were making you pay higher prices for everyday goods coming into our country.

I signed a bipartisan bill that cut shipping costs by 90%, helping American farmers, businesses, and consumers.

Let’s finish the job.

Pass bipartisan legislation to strengthen antitrust enforcement and prevent big online platforms from giving their own products an unfair advantage.

My administration is also taking on “junk” fees, those hidden surcharges too many businesses use to make you pay more.

For example, we’re making airlines show you the full ticket price upfront and refund your money if your flight is cancelled or delayed.

We’ve reduced exorbitant bank overdraft fees, saving consumers more than $1 billion a year.

We’re cutting credit card late fees by 75%, from $30 to $8.

Junk fees may not matter to the very wealthy, but they matter to most folks in homes like the one I grew up in. They add up to hundreds of dollars a month.

They make it harder for you to pay the bills or afford that family trip.

I know how unfair it feels when a company overcharges you and gets away with it.

Not anymore.

We’ve written a bill to stop all that. It’s called the Junk Fee Prevention Act.

We’ll ban surprise “resort fees” that hotels tack on to your bill. These fees can cost you up to $90 a night at hotels that aren’t even resorts.

We’ll make cable internet and cellphone companies stop charging you up to $200 or more when you decide to switch to another provider.

We’ll cap service fees on tickets to concerts and sporting events and make companies disclose all fees upfront.

And we’ll prohibit airlines from charging up to $50 roundtrip for families just to sit together.

Baggage fees are bad enough – they can’t just treat your child like a piece of luggage.

Americans are tired of being played for suckers.

Pass the Junk Fee Prevention Act so companies stop ripping us off.

For too long, workers have been getting stiffed.

Not anymore.

We’re beginning to restore the dignity of work.

For example, 30 million workers had to sign non-compete agreements when they took a job. So a cashier at a burger place can’t cross the street to take the same job at another burger place to make a couple bucks more.

Not anymore.

We’re banning those agreements so companies have to compete for workers and pay them what they’re worth.

I’m so sick and tired of companies breaking the law by preventing workers from organizing.

Pass the PRO Act because workers have a right to form a union. And let’s guarantee all workers a living wage.

Let’s also make sure working parents can afford to raise a family with sick days, paid family and medical leave, and affordable child care that will enable millions more people to go to work.

Let’s also restore the full Child Tax Credit, which gave tens of millions of parents some breathing room and cut child poverty in half.

Remember when we passed the Fair Labor Standards Act, it was called “The minimum wage law” for a reason. Workers were guaranteed the minimum they could be paid.

Guess what? It’s long past time to raise the minimum wage to $15 an hour. No one, no one working 40 hours a week should live below the poverty line.

Look, I know some of you think I’m too focused on worker rights.

Too concerned about how to get people the health care they need.

Too committed to reducing costs for families.

Too much on the side of the consumer, the student, the worker.

Too focused on making life easier for the nurse on the night shift and the teacher preparing for class.

To that I say, “Thank you.”

Thank you. That’s my job. That’s my job.

I was elected to stand up for you – to stand up for the hard-working people that built this country.

And that’s exactly what I’m going to do. That’s exactly what I’m going to do.

Let’s also finish the job of delivering health care as a right to all Americans.

We now have historic coverage, reaching more Americans than ever before.

Over 16 million Americans are now enrolled under the Affordable Care Act.

More than have ever been covered in the history of America.

Hundreds of millions of prescriptions have been filled at no cost to American seniors.

And I will not go back.

Not only that, we are delivering the most significant steps to lower drug prices in decades.

Our new law lowers prescription drug costs by 50% by allowing Medicare to negotiate prices directly with the drug companies.

I know you think I’m kidding. I’m not. We’re also putting a cap on out-of-pocket expenses for seniors on Medicare. No senior should pay more than $2,000 a year for their prescriptions.

Millions of Americans will save thousands of dollars a year.

Under the new law, prescription drugs will finally be more affordable. They’ll be available. They’ll be accessible.

We will deliver lower drug costs to all Americans. Lower premiums for you. Lower deductibles for you.

Lower copays and lower out-of-pocket costs. That’s not a political statement. That’s a fact.

So, let’s finish the job and end health care for you. By lowering health care costs for all Americans. Let’s finish the job, end health care for you.

And let’s also provide health care for more Americans.

That’s why I’m calling on Congress to expand Medicaid to cover the 2 million Americans who are uninsured and eligible.

We’re going to lower prescription drug costs by taking on the greed of the pharmaceutical industry and add dental, vision, and hearing coverage to Medicare.

And let’s lower the age of Medicare eligibility to 60 years old, giving 23 million seniors the peace of mind that comes with Medicare.

One more thing, let’s also lower the cost of hearing aids, which are essential, as well as the other incredible health care products, because they’ve been out of reach for so many of our seniors.

I know I talk a lot about workers and their rights and how to get more money in their pockets. That’s because I grew up in a family of workers. My dad had a blue-collar job in Scranton, Pennsylvania.

He used to say, “Joey, a job is about a lot more than a paycheck. It’s about your dignity. It’s about respect. It’s about being able to look your child in the eye and say, ‘Honey, it’s going to be okay.’”

That’s what I believe.

Let’s keep going. Let’s keep moving. Let’s finish the job.

You know, we all want the same thing.

You know, we all want the same thing. A better life for ourselves and our kids. Safer streets. A cleaner planet. A more just world.

We all want to make sure every child has the same opportunities in this country we all love.

Over the past two years, we’ve already done so much.

Over the past two years, we’ve already done so much. We passed a law to reduce the price of prescription drugs. We’re putting our kids in better schools. We’re protecting the air we breathe and the water we drink.

We’re delivering assistance to millions of Americans who need it most. We’re supporting small businesses and creating jobs. We’re building a fairer and more just economy.

We’ve rebuilt our alliances, restored our standing in the world, and confronted the challenges of our time.

And now, together, we’re going to finish the job.

Thank you. God bless you. And may God bless the United States of America.
"""


In [34]:
%time
spans = refined.process_text(text, apply_class_check = True)

CPU times: user 3 µs, sys: 2 µs, total: 5 µs
Wall time: 7.87 µs


In [12]:
spans

[['Congress', Entity(wikidata_entity_id=Q11268, wikipedia_entity_title=United States Congress), 'ORG'],
 ['Cabinet', Entity(wikidata_entity_id=Q639738, wikipedia_entity_title=Cabinet of the United States), 'ORG'],
 ['the Supreme Court', Entity(wikidata_entity_id=Q11201, wikipedia_entity_title=Supreme Court of the United States), 'ORG'],
 ['Americans', Entity(wikidata_entity_id=Q846570, wikipedia_entity_title=Americans), 'ORG'],
 ['Congress', Entity(wikidata_entity_id=Q11268, wikipedia_entity_title=United States Congress), 'ORG'],
 ['House', Entity(wikidata_entity_id=Q11701, wikipedia_entity_title=United States House of Representatives), 'ORG'],
 ['Kevin McCarthy', Entity(wikidata_entity_id=Q766866, wikipedia_entity_title=Kevin McCarthy (California politician)), 'PERSON'],
 ['House', Entity(wikidata_entity_id=Q11701, wikipedia_entity_title=United States House of Representatives), 'ORG'],
 ['Democrats', Entity(wikidata_entity_id=Q29552, wikipedia_entity_title=Democratic Party (United Sta

In [None]:
It only takes 14.1 microseconds to process the text, w

## Performing Evaluation of model with Wikidata entity set

In [54]:
from refined.evaluation.evaluation import eval_all

results_numbers = eval_all(refined=refined, el=True)

Evaluating on AIDA: 0it [00:00, ?it/s]

*****************************


Dataset name: AIDA

****************
************
f1: 0.7643
accuracy: 0.7791
gold_recall: 0.9787
p: 0.7501
r: 0.7791
num_gold_spans: 4464
************
*******MD*****
MD_f1: 0.9295, (p: 0.9238, r: 0.9352)
*****************

*****************************




Evaluating on MSNBC: 0it [00:00, ?it/s]

*****************************


Dataset name: MSNBC

****************
************
f1: 0.7278
accuracy: 0.7558
gold_recall: 0.9923
p: 0.7019
r: 0.7558
num_gold_spans: 651
************
*******MD*****
MD_f1: 0.7971, (p: 0.7649, r: 0.8322)
*****************

*****************************




# WikiData Lookups

In [1]:
!pip install Wikidata



In [2]:
from wikidata.client import Client

In [3]:
client = Client()

In [None]:
def wikidata_lookup():
    

In [3]:
spans = refined.process_text("President Roosevelt created the New Deal.")

print(spans)

[['Roosevelt', Entity(wikidata_entity_id=Q8007, wikipedia_entity_title=Franklin D. Roosevelt), 'PERSON'], ['New Deal', Entity(wikidata_entity_id=Q186356, wikipedia_entity_title=New Deal), None]]




In [12]:
spans[0].predicted_entity

Entity(wikidata_entity_id=Q8007, wikipedia_entity_title=Franklin D. Roosevelt)

In [13]:
spans[0].entity_linking_model_confidence_score

0.9806

In [20]:
qid = spans[0].predicted_entity.wikidata_entity_id

In [21]:
wikidata_url = "https://www.wikidata.org/wiki/"

In [22]:
wikidata_url + qid

'https://www.wikidata.org/wiki/Q8007'

# Entity and Span information

In [6]:
from refined.inference.processor import Refined
refined = Refined.from_pretrained(model_name='wikipedia_model_with_numbers',
                                  entity_set="wikidata",
                                  use_precomputed_descriptions = True,
                                 download_files = True)
print("Done loading in Model")

Downloading /Users/lukas/.cache/refined/wikipedia_model_with_numbers/precomputed
Some weights of the model checkpoint at /Users/lukas/.cache/refined/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at /Users/lukas/.cache/refined/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.

Done loading in Model


In [8]:
text = "Gene David Block (born August 17, 1948) is an American biologist who has served as the current and 6th chancellor of the University of California, Los Angeles since August 2007"
spans = refined.process_text(text)
print(spans)

[['Gene David Block', Entity(wikidata_entity_id=Q5531151, wikipedia_entity_title=Gene D. Block), 'PERSON'], ['August 17, 1948', Entity(parsed_string=[timepoint: ["1948/8/17"]]), 'DATE'], ['American', Entity(wikidata_entity_id=Q30, wikipedia_entity_title=United States), 'ORG'], ['6th', None, 'ORDINAL'], ['University of California, Los Angeles', Entity(wikidata_entity_id=Q174710, wikipedia_entity_title=University of California, Los Angeles), 'FAC'], ['August 2007', Entity(parsed_string=[timepoint: ["2007/8"]]), 'DATE']]




In [31]:
for span in spans:
    #print(span)
    print("Mention in text: " + span.text)
    print(f"Start of mention: {span.start}")
    print(f"Mention length: {span.ln}")
    print(f"Document ID: {span.doc_id}")
    print(f"Gold entity: {span.gold_entity}")
    print(f"Candidate Entities: {span.candidate_entities}")
    print(f"Predicted Entity: {span.predicted_entity}")
    print(f"Confidence: {span.entity_linking_model_confidence_score}")
    print(f"Top entity candidates: {span.top_k_predicted_entities}")
    print(f"Predicted Entity types: {span.predicted_entity_types}")
    print(f"Coarse type: {span.coarse_type}")
    print(f"Coarse mention type: {span.coarse_mention_type}")
    print(f"Date: {span.date}")
    print(f"Failed Class Check: {span.failed_class_check}")
    print(f"Pruned Candidates: {span.pruned_candidates}")
    print("")

Mention in text: Gene David Block
Start of mention: 0
Mention length: 16
Document ID: 749119476
Gold entity: None
Candidate Entities: [('Q5531151', 1.0)]
Predicted Entity: Entity(wikidata_entity_id=Q5531151, wikipedia_entity_title=Gene D. Block)
Confidence: 0.947
Top entity candidates: [(Entity(wikidata_entity_id=Q5531151, wikipedia_entity_title=Gene D. Block), 0.947), (Entity(wikipedia_entity_title=Gene D. Block), 0.053)]
Predicted Entity types: [('Q18805', 'naturalist', 0.8659)]
Coarse type: MENTION
Coarse mention type: PERSON
Date: None
Failed Class Check: None
Pruned Candidates: None

Mention in text: August 17, 1948
Start of mention: 23
Mention length: 15
Document ID: 749119476
Gold entity: None
Candidate Entities: None
Predicted Entity: Entity(parsed_string=[timepoint: ["1948/8/17"]])
Confidence: 1.0
Top entity candidates: None
Predicted Entity types: None
Coarse type: DATE
Coarse mention type: DATE
Date: Date(text='August 17, 1948', day=17, month=8, year=1948, offset=0, known_fo

In [32]:
entity = spans[0].predicted_entity
print(type(entity))
print(f"Wikidata ID: {entity.wikidata_entity_id}")
print(f"Wikipedia title: {entity.wikipedia_entity_title}")
print(f"Human Readable Name: {entity.human_readable_name}")
print(f"Parsed string: {entity.parsed_string}")

<class 'refined.data_types.base_types.Entity'>
Wikidata ID: Q5531151
Wikipedia title: Gene D. Block
Human Readable Name: None
Parsed string: None


In [33]:
entity = spans[4].predicted_entity
print(type(entity))
print(f"Wikidata ID: {entity.wikidata_entity_id}")
print(f"Wikipedia title: {entity.wikipedia_entity_title}")
print(f"Human Readable Name: {entity.human_readable_name}")
print(f"Parsed string: {entity.parsed_string}")

<class 'refined.data_types.base_types.Entity'>
Wikidata ID: Q174710
Wikipedia title: University of California, Los Angeles
Human Readable Name: None
Parsed string: None


In [35]:
date = spans[1].date
print(type(date))
print(f"Text: {date.text}")
print(f"Day: {date.day}")
print(f"Month: {date.month}")
print(f"Year: {date.year}")
print(f"Known Format: {date.known_format}")

<class 'refined.utilities.numeric_handling.date_utils.Date'>
Text: August 17, 1948
Day: 17
Month: 8
Year: 1948
Known Format: True
