In [None]:
!python -m spacy download nl_core_news_lg

In [1]:
from lint_ii import ReadabilityAnalysis

### ReadabilityAnalysis

In [2]:
# create ReadabilityAnalysis object from text

text = "De Oudegracht is het sfeervolle hart van de stad. In de middeleeuwen was het hier een drukte van belang met de aan- en afvoer van goederen. Nu is het een prachtige plek om te winkelen en te lunchen of te dineren in de oude stadskastelen."

ra = ReadabilityAnalysis.from_text(text)

Loading Dutch language model from spaCy... âœ“ nl_core_news_lg


### Visualization

In [3]:
# visualizer (works only in notebooks)

ra

### Scores

In [4]:
# lint-ii score of the document

ra.lint.score

48.20593518603563

In [5]:
# difficulty level of the document

ra.lint.level

3

In [6]:
# lint-ii scores per sentence

ra.lint_scores_per_sentence

[18.511612982419507, 54.27056340066443, 63.24402181810589]

### Detailed analysis

In [7]:
# dictionary with a detailed analysis (document + per sentence)

detailed_analysis = ra.get_detailed_analysis()

In [8]:
# documents stats

detailed_analysis['document_stats']

{'sentence_count': 3,
 'document_lint_score': 48.20593518603563,
 'document_difficulty_level': 3,
 'min_lint_score': 18.511612982419507,
 'max_lint_score': 63.24402181810589}

In [17]:
# detailed analysis for the first sentence

detailed_analysis['sentence_stats'][0]

{'text': 'De Oudegracht is het sfeervolle hart van de stad.',
 'score': 18.511612982419507,
 'level': 1,
 'mean_log_word_frequency': 5.364349123825101,
 'top_n_least_freq_words': [('hart', 5.293120582960477),
  ('stad', 5.435577664689725)],
 'proportion_concrete_nouns': 0.5,
 'concrete_nouns': ['stad'],
 'abstract_nouns': [],
 'undefined_nouns': ['hart'],
 'unknown_nouns': ['oudegracht'],
 'sent_length': 9,
 'max_sdl': 3,
 'sdls': [{'token': 'de', 'dep_length': 0, 'heads': ['Oudegracht']},
  {'token': 'oudegracht', 'dep_length': 3, 'heads': ['hart']},
  {'token': 'is', 'dep_length': 2, 'heads': ['hart']},
  {'token': 'het', 'dep_length': 1, 'heads': ['hart']},
  {'token': 'sfeervolle', 'dep_length': 0, 'heads': ['hart']},
  {'token': 'hart', 'dep_length': 0, 'heads': ['hart']},
  {'token': 'van', 'dep_length': 1, 'heads': ['stad']},
  {'token': 'de', 'dep_length': 0, 'heads': ['stad']},
  {'token': 'stad', 'dep_length': 2, 'heads': ['hart']}],
 'content_words_per_clause': 4.0,
 'conten

### Other properties (examples)

For the full list of available properties, see the docstrings in --

- `WordFeatures`
- `SentenceAnalysis`
- `ReadabilityAnalysis`

In [10]:
# see docstring

help(ReadabilityAnalysis)

Help on class ReadabilityAnalysis in module lint_ii.core.readability_analysis:

class ReadabilityAnalysis(lint_ii.visualization.html.LintIIVisualizer)
 |  ReadabilityAnalysis(sentences: list[lint_ii.core.sentence_analysis.SentenceAnalysis]) -> None
 |  
 |  Document-level readability analysis for Dutch texts using the LiNT-II formula.
 |  
 |  This class analyzes documents by aggregating sentence-level features and 
 |  computing readability scores based on four linguistic features: word frequency, 
 |  syntactic dependency length, content words per clause, and proportion of concrete nouns.
 |  
 |  Parameters
 |  ----------
 |  sentences : list[SentenceAnalysis]
 |      List of sentence-level analysis objects. Each sentence must be a 
 |      SentenceAnalysis instance containing linguistic features and metadata.
 |  
 |  Attributes & Properties
 |  -----------------------
 |  sentences : list[SentenceAnalysis]
 |      The input sentence analyses.
 |  word_features : list[WordFeatures]

In [11]:
# mean word frequency for the document

ra.mean_log_word_frequency

4.208347333820788

In [13]:
# list of content words per sentence

for sent in ra.sentences:
    print([feat.text for feat in sent.content_words])

['oudegracht', 'sfeervolle', 'hart', 'stad']
['middeleeuwen', 'drukte', 'belang', 'afvoer', 'goederen']
['prachtige', 'plek', 'winkelen', 'lunchen', 'dineren', 'oude', 'stadskastelen']


In [15]:
# word frequencies for each word

frequencies = {
    feat.text:freq
    for feat in ra.word_features
    if (freq := feat.word_frequency) is not None
}
print(frequencies)

{'hart': 5.293120582960477, 'stad': 5.435577664689725, 'middeleeuwen': 3.423686536423184, 'drukte': 3.96775458077346, 'belang': 4.509063243912051, 'afvoer': 3.3845344124610364, 'goederen': 3.7985612410265284, 'prachtige': 4.7805033384066125, 'plek': 5.249034299064351, 'winkelen': 4.177454440810221, 'dineren': 3.893254653252401, 'oude': 5.436741798693928, 'stadskastelen': 1.359228547196266}
