In [None]:
#Application of frictionless principles to the relevant datafile
#output is JSON file describing the relevant datafile

#Please download frictionless at frictionlessdata.io/software/#software-toolkit

In [1]:
#import frictionless packages
import frictionless
from frictionless import describe

import pprint
pp = pprint.PrettyPrinter(depth=4)

In [2]:
#'describe' datasets and print
TX_DNP = describe('https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Digital-Newspaper-Program_Issues/TX_Digital-Newspaper-Program_Issues.csv')

pp.pprint(TX_DNP)

{'encoding': 'utf-8-sig',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx_digital-newspaper-program_issues',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Digital-Newspaper-Program_Issues/TX_Digital-Newspaper-Program_Issues.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'name': 'county', 'type': 'string'},
                       {'name': 'state', 'type': 'string'},
                       {'name': 'year_start', 'type': 'integer'},
                       {'name': 'year-end', 'type': 'integer'},
                       {'name': 'count', 'type': 'integer'}]},
 'scheme': 'https'}


In [3]:
#applying more detail to each column
TX_DNP.schema.get_field("county").title = "County"
TX_DNP.schema.get_field("county").description = "Texas county name"

TX_DNP.schema.get_field("state").title = "State, Texas"
TX_DNP.schema.get_field("state").description = "Indicator for State - this dataset is only relevant to Texas"

TX_DNP.schema.get_field("year_start").title = "Start Year"
TX_DNP.schema.get_field("year_start").description = "Year indicating the beginning of each decade for the corresponding count of issues in the relevant county"

TX_DNP.schema.get_field("year-end").title = "End Year"
TX_DNP.schema.get_field("year-end").description = "Year indicating the end of each decade for the corresponding count of issues in the relevant county"

TX_DNP.schema.get_field("count").title = "Issues Count"
TX_DNP.schema.get_field("count").description = "Number of newspaper issues released for each county within the specified decade, regardless of title, according to the Texas Digital Newspaper Program (TDNP)"


pp.pprint(TX_DNP)

{'encoding': 'utf-8-sig',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx_digital-newspaper-program_issues',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Digital-Newspaper-Program_Issues/TX_Digital-Newspaper-Program_Issues.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'county',
                        'title': 'County',
                        'type': 'string'},
                       {'description': 'Indicator for State - this dataset is '
                                       'only relevant to Texas',
                        'name': 'state',
                        'title': 'State, Texas',
                        'type': 'string'},
                       {'description': 'Year indicating the beginning of each '
                                       'decade for the corresponding count of '
                                       'issues in the relevant co

In [4]:
#handling missing values
TX_DNP.schema.missing_values = [""]
pp.pprint(TX_DNP)

{'encoding': 'utf-8-sig',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx_digital-newspaper-program_issues',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Digital-Newspaper-Program_Issues/TX_Digital-Newspaper-Program_Issues.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'county',
                        'title': 'County',
                        'type': 'string'},
                       {'description': 'Indicator for State - this dataset is '
                                       'only relevant to Texas',
                        'name': 'state',
                        'title': 'State, Texas',
                        'type': 'string'},
                       {'description': 'Year indicating the beginning of each '
                                       'decade for the corresponding count of '
                                       'issues in the relevant co

In [5]:
#apply constraint field descriptors to the tables
TX_DNP.schema.get_field("state").constraints["enum"] = ['TX']

pp.pprint(TX_DNP)

{'encoding': 'utf-8-sig',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx_digital-newspaper-program_issues',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Digital-Newspaper-Program_Issues/TX_Digital-Newspaper-Program_Issues.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'county',
                        'title': 'County',
                        'type': 'string'},
                       {'constraints': {...},
                        'description': 'Indicator for State - this dataset is '
                                       'only relevant to Texas',
                        'name': 'state',
                        'title': 'State, Texas',
                        'type': 'string'},
                       {'description': 'Year indicating the beginning of each '
                                       'decade for the corresponding count of '
                   

In [6]:
#add table description
TX_DNP.description = "This dataset contains the number of newspaper issues released for each county within the decade specified by the start year and end year columns, regardless of title, according to the Texas Digital Newspaper Program (TDNP) "
pp.pprint(TX_DNP)

{'description': 'This dataset contains the number of newspaper issues released '
                'for each county within the decade specified by the start year '
                'and end year columns, regardless of title, according to the '
                'Texas Digital Newspaper Program (TDNP) ',
 'encoding': 'utf-8-sig',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx_digital-newspaper-program_issues',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Digital-Newspaper-Program_Issues/TX_Digital-Newspaper-Program_Issues.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'county',
                        'title': 'County',
                        'type': 'string'},
                       {'constraints': {...},
                        'description': 'Indicator for State - this dataset is '
                                       'only relevant to Texas',
            

In [7]:
#print JSON file
TX_DNP.to_json("tableschema.json")

'{\n  "path": "https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Digital-Newspaper-Program_Issues/TX_Digital-Newspaper-Program_Issues.csv",\n  "name": "tx_digital-newspaper-program_issues",\n  "profile": "tabular-data-resource",\n  "scheme": "https",\n  "format": "csv",\n  "hashing": "md5",\n  "encoding": "utf-8-sig",\n  "schema": {\n    "fields": [\n      {\n        "name": "county",\n        "type": "string",\n        "title": "County",\n        "description": "Texas county name"\n      },\n      {\n        "name": "state",\n        "type": "string",\n        "title": "State, Texas",\n        "description": "Indicator for State - this dataset is only relevant to Texas",\n        "constraints": {\n          "enum": [\n            "TX"\n          ]\n        }\n      },\n      {\n        "name": "year_start",\n        "type": "integer",\n        "title": "Start Year",\n        "description": "Year indicating the beginning of each decade for the corresponding co

In [8]:
ls

 Volume in drive C has no label.
 Volume Serial Number is 3A43-426E

 Directory of C:\Users\Dream Machine\Desktop\LEADING20211018\TX-History-Photographic-Dataset_Items

11/03/2021  06:40 PM    <DIR>          .
11/03/2021  06:40 PM    <DIR>          ..
11/03/2021  06:27 PM    <DIR>          .ipynb_checkpoints
11/03/2021  06:26 PM            21,274 Frictionless-App-Script_TX-History-Photographic-Dataset_Items.ipynb
11/03/2021  06:40 PM             1,870 tableschema.json
               2 File(s)         23,144 bytes
               3 Dir(s)  50,567,770,112 bytes free
