transkribus
Update 'hierarchy' metadata field
import requests,json,csv,sys
from tqdm import tqdm
auth = 'Bearer .............. '
url = 'https://transkribus.eu/TrpServer/rest/collections/1904438/list'
headers = {
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
'Connection': 'keep-alive',
'Origin': 'https://app.transkribus.org',
'Referer': 'https://app.transkribus.org/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'cross-site',
'Sec-GPC': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
'accept': 'application/json, text/plain, */*',
'authorization': auth,
'content-type': 'application/json',
'sec-ch-ua': '"Brave";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"'
}
def listDocuments(colId):
url = f'https://transkribus.eu/TrpServer/rest/collections/{colId}/list'
return requests.get(url, headers=headers).json()
def setDocumentHierarchy(doc, hierarchy):
colId = doc["mainColId"]
docId = doc["docId"]
url = f'https://transkribus.eu/TrpServer/rest/collections/{colId}/{docId}/metadata'
data = {
"type": "trpDocMetadata",
"docId": docId,
"title": doc["title"],
"hierarchy": hierarchy #doc["collectionList"]["colList"][0]["colName"]
}
response = requests.post(url, headers=headers, json=data)
if response.status_code!=200:
print("Error",response.response.text)
sys.exit()
###############################################
for doc in tqdm(listDocuments(colId = 1904438)):
setDocumentHierarchy(doc, "34-4 - Notarissen in de stad Utrecht 1560-1905")
List documents within collection and nrOfNew pages (not transcribed yet)
import requests,json,os,sys
collection_id = 297657 # Burgerlijke Stand
htr_model_id = 58997 # Dutch Demeter 1
base_url = 'https://transkribus.eu/TrpServer/rest'
username = os.getenv("TRANSKRIBUS_USER")
password = os.getenv("TRANSKRIBUS_PASS")
auth_response = requests.post(f"{base_url}/auth/login", data={'user': username, 'pw': password}, headers = {'Accept': 'application/json'})
auth_token = auth_response.json()['sessionId']
headers = {
#'Authorization': f'Bearer {auth_token}', # something wrong here?
'Content-Type': 'application/json',
'Accept': 'application/json',
'Cookie': f'JSESSIONID={auth_token}'
}
documents_response = requests.get(f"{base_url}/collections/{collection_id}/list", headers = headers)
documents_info = documents_response.json()
for doc in documents_info:
doc_id = doc["docId"]
documents_response = requests.get(f"{base_url}/collections/{collection_id}/{doc_id}/fulldoc", headers = headers)
doc_info = documents_response.json()["md"]
title = doc_info["title"]
print(title, doc_info["nrOfNew"]))