sparql

custom functions in RDFLIB to call from SPARQL

#!/usr/bin/env python3

from rdflib import Graph, URIRef, Literal
from rdflib.plugins.sparql.operators import custom_function

g = Graph()

@custom_function(URIRef("http://example.org/myCustomFunction"))
def myCustomFunction(a,b):
    return Literal(a+b)

query = """
SELECT ?result WHERE {
    BIND(<http://example.org/myCustomFunction>(5,6) AS ?result)
}
"""

for row in g.query(query):
    print(f"Result: {row.result}")

HackaLOD 2023 sparql query Utrecht Time Machine

select distinct ?work ?workLabel ?depicts ?depictsLabel ?part ?partLabel ?coords ?heading ?image
where {
  {
    ?work wdt:P31/wdt:P279* wd:Q110304307 ;
          wdt:P180 wd:Q803 ;
          p:P180 ?statement .
    ?statement ps:P180 ?depicts.
    }

  optional { ?statement pq:P2677 ?part. }
  optional { ?work wdt:P18 ?image. }
  optional { ?work wdt:P1259 ?coords. }
  optional { ?work wdt:P7787 ?heading. }
  optional { ?work wdt:P170 ?creatorLabel. }
  optional { ?work wdt:P571 ?inceptionFull. }
  bind(year(?inceptionFull) as ?inception)
  filter (bound(?coords) && bound(?heading)) #alleen mét coords en heading

  service wikibase:label { bd:serviceParam wikibase:language "en" .}
}

Parent Tree

PREFIX schema: <https://schema.org/>
PREFIX id: <https://hetutrechtsarchief.nl/id/>
PREFIX rico: <https://www.ica.org/standards/RiC/ontology#>

SELECT ?node ?parent ?nodeLabel ?parentLabel { 
  values (?node) { (id:609C5B9947B74642E0534701000A17FD) }
  {
    select distinct ?node ?parent ?nodeLabel ?parentLabel where {
      ?node rico:isOrWasIncludedIn* ?parent .
      ?parent rico:isOrWasIncludedIn/schema:name ?parentLabel .
      ?parent schema:name ?nodeLabel .
    }
  } 
}

get properties as JSON data for a wikidataID

grouping by rdf:type

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?type ?label (COUNT(?type) AS ?count)
WHERE {
  ?subject a ?type .
  OPTIONAL { ?type rdfs:label ?label }
}
GROUP BY ?type ?label
ORDER BY DESC(?count)

grouping by predicate

SELECT ?predicate (COUNT(?predicate) AS ?count)
WHERE {
  ?subject ?predicate ?object .
}
GROUP BY ?predicate
ORDER BY DESC(?count)

wikidata query using wikibase:around service to get items around a geopoint

#defaultView:Map
SELECT DISTINCT ?img ?distance ?place ?placeLabel ?location WHERE {

   SERVICE wikibase:around { 
     ?item wdt:P625 ?location . 
     ?item wdt:P131 wd:Q803. # place is in Utrecht
     ?item wdt:P18 ?img . # must have image

     # That are in a circle with a centre of with a point
     bd:serviceParam wikibase:center "Point(5.104219854148524,52.10037982790537)"^^geo:wktLiteral   . 
     bd:serviceParam wikibase:radius ".2" .  # 200m
     bd:serviceParam wikibase:distance ?distance .
   } .
   SERVICE wikibase:label {   bd:serviceParam wikibase:language "en" . }
}
ORDER BY ?distance

bind a single value to a variable

BIND( wd:Q30 AS ?country )

run multiple saved multipage sparql queries and write result as json per query

#!/usr/bin/env python3
import requests, json, csv
api = "https://api.data.netwerkdigitaalerfgoed.nl/queries/hetutrechtsarchief/"

queries = ["wo2-documenten", "wo2-brontypes", "wo2-personen", "wo2-adressen-per-document", "wo2-adressen", "wo2-persoon-op-adres-per-document"]

for query in queries:
    rows = []
    for i in range(1,5): # max 5 pages... must be a better way
        url = api+query+"/run.json?pageSize=10000&page="+str(i)
        response = requests.get(url)
        data = response.json()
        rows = rows + data

    json.dump(rows, open(f"{query}.json","w"), indent=2)

show map of all things that have a coordinate within 5km of Utrecht (Q803) using GEOSPARQL

PREFIX geof: <http://www.opengis.net/def/geosparql/function/>
#defaultView:Map
SELECT ?place ?placeLabel (SAMPLE(?location) as ?localisation) WHERE {
  wd:Q803 wdt:P625 ?utrecht.
  ?place wdt:P17 wd:Q55 . #NL
  ?place wdt:P625 ?location.
  FILTER((geof:distance(?location, ?utrecht)) <5)
  SERVICE wikibase:label { bd:serviceParam wikibase:language "nl". }
}
group by ?place ?placeLabel

easier/faster way without GEOSPARQL is:

#defaultView:Map
SELECT  ?item ?itemLabel (SAMPLE(?localisation) AS ?localisation) WHERE {
  ?item wdt:P131 wd:Q803.
  ?item wdt:P625 ?localisation.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "nl". }
}
GROUP BY ?item ?itemLabel 

VALUES

...  
  values (?zoekterm) {
    ("Keijzerstraat") 
    ("Drift")
  }

order by random

...
BIND(SHA512(CONCAT(STR(RAND()), STR(?s))) AS ?random) . # order by random to get a varried sample set within the first 10.000 results
} ORDER BY ?random

request saved query in jsonld form from triply with paging (limit/pageSize, offset/page)

zie ook: https://triply.cc/blog/2021-03-new-features

Supported RDF media types: application/n-triples, application/n-quads, application/trig, text/turtle, application/ld+json, application/json, application/x-triply+json.

Supported RDF extensions: nt, nq, trig, ttl, jsonld, json, triply. Adding an extension to the end of the url overrides header-based content negotiation.

import requests, json
url = "https://api.data.netwerkdigitaalerfgoed.nl/queries/hetutrechtsarchief/wo2-all-documents/run.json?pageSize=10000&page=2"

response = requests.get(url)
# response = requests.get(url, headers={"Accept":"application/ld+json"})

data = response.json()

print(json.dumps(data,indent=2))

https://api.data.netwerkdigitaalerfgoed.nl/queries/hetutrechtsarchief/wo2-all-documents/run.json?pageSize=5&page=2

sparql construct test

PREFIX wgs84: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX sordef: <https://data.kkg.kadaster.nl/sor/model/def/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX def: <https://hetutrechtsarchief.nl/def/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

CONSTRUCT {
  ?doc rdf:type def:Document ;
     rdfs:label ?doc_label ;
     def:adres ?adres ;
     foaf:depiction ?depiction ;
     def:typeBron ?bron ;
     def:adres ?adres .
  ?adres rdf:type sordef:Nummeraanduiding ;
     rdfs:label ?adres_label ;
     wgs84:asWKT ?geo .
}
WHERE {
  ?doc a def:Document .
  OPTIONAL { ?doc rdfs:label ?doc_label }
  OPTIONAL { ?doc def:adresvermelding/sordef:Nummeraanduiding ?adres }
}
limit 10000 offset 1

geof:sfWithin (filter results with a boundingbox)

data:

@prefix geo: <http://www.opengis.net/ont/geosparql#> .
@prefix def: <https://hetutrechtsarchief.nl/def/> .
@prefix hua: <https://hetutrechtsarchief.nl/id/> .

hua:D8884A3B2E6CA8F6E0538F04000A374B
  a def:Adresvermelding ;
  geo:asWKT "POINT (9.1825624999999995 45.4652869999999965)"^^geo:wktLiteral .

query:

PREFIX def: <https://hetutrechtsarchief.nl/def/>
prefix geo: <http://www.opengis.net/ont/geosparql#>
prefix geof: <http://www.opengis.net/def/function/geosparql/>

select ?zoekveld ?match ?x {

  ?x a def:Adresvermelding ; 
     geo:asWKT ?match.

  bind('Polygon((10 45, 10 46, 9 46, 9 45, 10 45))'^^geo:wktLiteral as ?zoekveld)

  filter(geof:sfWithin(?match, ?zoekveld))
}
limit 10

source: https://triplydb.com/academy/-/queries/geo-within-query/5

everything that has as location a quadrangle instance EXCLUDE everything with a lunar coordinate

SELECT distinct ?item ?itemLabel ?itemDescription ?typeLabel ?pic ?dt ?lq ?lqLabel WHERE {
  ?lq wdt:P31 wd:Q56551180 .  # instance of quadrangle on the moon
  ?item wdt:P276 ?lq  .       # anything that has as location a quadrangle instance 
  ?item wdt:P31 ?type .
  OPTIONAL { ?item wdt:P18 ?pic } .
  OPTIONAL { ?item wdt:P619 ?dt } .

  MINUS { SELECT ?item WHERE { ?item wdt:P8981 ?coord . } } # EXCLUDE everything with a lunar coordinate

  FILTER (?type NOT IN ( wd:Q13406463 ) )  # excude Wikipedia articles

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}

CSV bestand met aantal keer dat een Wikidata item per jaar voorkomt in de dataset van HUA

Onderstaande loop voert meerdere queries uit (in blokken van 10.000 resultaten) uit op de triplestore van Het Utrechts Archief. De output kun je redirecten naar 1 groot csv bestand. Er wordt geen rekening gehouden met tijdsperiodes of onzekere datums

echo > data.csv

for i in `seq 0 10000 200000`
do
 query='PREFIX%20wd%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fentity%2F%3E%0APREFIX%20dct%3A%20%3Chttp%3A%2F%2Fpurl.org%2Fdc%2Fterms%2F%3E%0APREFIX%20rdfs%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0A%0ASELECT%20%3Fwd%2C%20%3Fyear%2C%20count%28%3Fyear%29%20as%20%3Faantal%20WHERE%20%7B%0A%20%20%3Fsub%20dct%3Aspatial%20%3Fwd%20.%0A%20%20%3Fsub%20dct%3Adate%20%3Fyear%20.%0A%20%20filter%20regex%28%3Fwd%2C%20%22wikidata%22%29%0A%7D%20%0A%23order%20by%20desc%28%3Faantal%29%0Alimit%2010000%0Aoffset%20'$i

 echo $query
 echo

 curl https://data.netwerkdigitaalerfgoed.nl/_api/datasets/hetutrechtsarchief/Dataset/services/Dataset/sparql -X POST --data query=$query -H "Accept: text/csv" | awk 'FNR>1' >> data.csv  # awk zorgt dat de header telkens worden weggefilterd
done

Ivar's SPARQL queries

Query via CURL als text/csv

# er zijn op het moment van schrijven +100.000 afbeeldingen in de beeldbank waarvan 
# een of meerdere 'oude nummers' bekend zijn. Onderstaande loop voert meerdere queries 
# (in blokken van 10.000 resultaten) uit op de triplestore van Het Utrechts Archief.
# de output kun je redirecten naar 1 groot csv bestand.

# Usage:
# ./download-csv.sh

output_base=output/HUA-catnr-oudnr

echo "Catalogusnummer,Oudnummer" > $output_base.csv

for i in `seq 0 10000 120000`
do 
  curl https://data.netwerkdigitaalerfgoed.nl/_api/datasets/hetutrechtsarchief/mi2rdf/services/mi2rdf/sparql -X POST --data 'query=PREFIX%20xsd%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2F2001%2FXMLSchema%23%3E%0APREFIX%20rdf%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2F1999%2F02%2F22-rdf-syntax-ns%23%3E%0APREFIX%20rdfs%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX%20soort%3A%20%3Chttps%3A%2F%2Farchief.io%2Fsoort%23%3E%0APREFIX%20v%3A%20%3Chttps%3A%2F%2Farchief.io%2Fveld%23%3E%0A%0ASELECT%20%20%3FCatalogusnummer%2C%20%3Foudnummer%20WHERE%20%7B%0A%0A%20%20SELECT%20%3FCatalogusnummer%2C%20%3Foudnummer%20%20%7B%0A%20%20%20%20%7B%20%3Fsub%20v%3Anummer%20%3FCatalogusnummer%20%3B%20v%3Aoudnummer_1%20%3F_oudnummer%7D%0A%20%20%20%20UNION%0A%20%20%20%20%7B%20%3Fsub%20v%3Anummer%20%3FCatalogusnummer%20%3B%20v%3Aoudnummer_2%20%3F_oudnummer%7D%20%0A%20%20%20%20UNION%0A%20%20%20%20%7B%20%3Fsub%20v%3Anummer%20%3FCatalogusnummer%20%3B%20v%3Aoudnummer_3%20%3F_oudnummer%7D%0A%20%20%20%20BIND%28REPLACE%28%3F_oudnummer%2C%22%5Cn%22%2C%22%22%29%20as%20%3Foudnummer%29%20%23%20remove%20linefeed%20from%20%3F_oudnumber%0A%20%20%7D%0A%20%20ORDER%20BY%20%3Foudnummer%0A%7D%0A%0ALIMIT%2010000%0AOFFSET%200'$i \
   -H "Accept: text/csv" | awk 'FNR>1' >> $output_base.csv # zorgt dat de header telkens worden weggefilterd
done

#convert to Markdown
csv2md $output_base.csv > $output_base.md  

#convert to HTML
marked $output_base.md > $output_base.html 

# Convert to PDF
markdown-pdf $output_base.md 

Opvragen 'oudnummer_*' en via UNION als losse triples

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX soort: <https://archief.io/soort#>
PREFIX v: <https://archief.io/veld#>

SELECT  ?Catalogusnummer, ?oudnummer WHERE {  # subselect voor ORDER BY icm OFFSET
  SELECT ?Catalogusnummer, ?oudnummer  {
    { ?sub v:nummer ?Catalogusnummer ; v:oudnummer_1 ?_oudnummer}
    UNION
    { ?sub v:nummer ?Catalogusnummer ; v:oudnummer_2 ?_oudnummer} 
    UNION
    { ?sub v:nummer ?Catalogusnummer ; v:oudnummer_3 ?_oudnummer}
    BIND(REPLACE(?_oudnummer,"\n","") as ?oudnummer) # remove linefeed from ?_oudnumber
  }
  ORDER BY ?oudnummer
}

LIMIT 10000
OFFSET 0

Lekker bezig voor Wikimedia

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX v: <https://archief.io/veld#>

SELECT * WHERE {
  GRAPH <https://data.netwerkdigitaalerfgoed.nl/MI2RDF/mi2rdf/graphs/HUA-beeldbank-april2020-9bestanden> {
    ?sub v:Deelcollectie "Nederlandse Spoorwegen" . 
    BIND(REPLACE(str(?sub), "https://archief.io/id/", "https://hetutrechtsarchief.nl/beeld/") AS ?shownAt)
    OPTIONAL { ?sub v:Catalogusnummer ?Catalogusnummer }
    OPTIONAL { ?sub v:Beschrijving ?Beschrijving }
    OPTIONAL { ?sub v:LBTWD ?LBTWD }
    OPTIONAL { ?sub v:THWTW ?THWTW }
    OPTIONAL { ?sub v:Afmeting ?Afmeting }
    OPTIONAL { ?sub v:Afmeting_2 ?Afmeting_2 }
    OPTIONAL { ?sub v:Opmerkingen ?Opmerkingen }
    OPTIONAL { ?sub v:Opschrift ?Opschrift }
    OPTIONAL { ?sub v:Datering_vroegst ?Datering_vroegst }
    OPTIONAL { ?sub v:Datering_laatst ?Datering_laatst }
    OPTIONAL { ?sub v:fnc_lic ?licentie }
    OPTIONAL { ?sub v:Auteursrechthouder ?Auteursrechthouder }
    OPTIONAL { ?sub v:CXTWD_VERVAARDIGER ?CXTWD_VERVAARDIGER }   
    OPTIONAL { ?sub v:cxtwd_uitgdruk ?cxtwd_uitgdruk }   
    OPTIONAL { ?sub v:Materiaal ?Materiaal }
    OPTIONAL { ?sub v:Techniek ?Techniek }
  } 
  FILTER regex(?licentie, "CC BY 4.0|CC0 1.0|Publiek Domein 1.0", "i")
}

#LIMIT 10000
#OFFSET 20000

Alle verschillende licenties

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX v: <https://archief.io/veld#>

SELECT distinct(?licentie) WHERE {
  ?sub ?pred ?obj .
  ?sub v:fnc_lic ?licentie 
} LIMIT 10

Alle vervaardigers

PREFIX v: <https://archief.io/veld#>
SELECT distinct ?obj WHERE {
  ?sub v:CXTWD_VERVAARDIGER ?obj .
} 

Alle straatnamen met daarachter bijbehorende plaats

PREFIX v: <https://archief.io/veld#>
SELECT distinct ?straat, ?plaats WHERE {
  ?sub v:CXTWD_STRAATNAAM ?straat .
  ?sub v:CXTWD_PLAATSNAAM ?plaats .
} 

Alle trefwoorden

PREFIX v: <https://archief.io/veld#>
SELECT distinct ?trefwoord WHERE {
  ?sub v:tr ?trefwoord
} 

Alle veldnamen maar alleen binnen 1 specifieke GRAPH (en evt binnen Deelcollectie)

PREFIX v: <https://archief.io/veld#>

SELECT distinct ?veldnaam WHERE {
  GRAPH <https://data.netwerkdigitaalerfgoed.nl/MI2RDF/mi2rdf/graphs/HUA-beeldbank-april2020-9bestanden> {
    # ?sub v:Deelcollectie "Nederlandse Spoorwegen" .   
    ?sub ?veldnaam ?obj
  }
} ORDER BY ?veldnaam

Alle vervaardigers (van alleen Beeldbank Waterlands Archief) gesorteerd op aantal afbeeldingen

PREFIX v: <https://archief.io/veld#>
SELECT count(*) as ?aantal, ?vervaardiger WHERE {
  GRAPH <https://data.netwerkdigitaalerfgoed.nl/MI2RDF/mi2rdf/graphs/BEELDBANK_14_131_MAISI_EXP-8066> {
   ?sub v:CXTWD_VERVAARDIGER ?vervaardiger
  }
} ORDER BY DESC(?aantal)

Alle velden v:tr én v:TR met UNION

PREFIX v: <https://archief.io/veld#>
SELECT count(*) as ?aantal, ?waarde WHERE {
  GRAPH <https://data.netwerkdigitaalerfgoed.nl/MI2RDF/mi2rdf/graphs/BEELDBANK_14_131_MAISI_EXP-8066> {

    { ?sub v:TR ?waarde } union { ?sub v:tr ?waarde } 

  }
} ORDER BY DESC(?aantal)

Persoon op afbeelding gesorteerd op aantal

PREFIX RiCo: <https://www.ica.org/standards/RiC/ontology#>
PREFIX v: <https://archief.io/veld#>
PREFIX soort: <https://archief.io/soort#>

SELECT count(*) as ?aantal, ?naam WHERE {
  GRAPH <https://data.netwerkdigitaalerfgoed.nl/MI2RDF/mi2rdf/graphs/BEELDBANK_14_131_MAISI_EXP-8066> {
   ?sub v:aet soort:psafb .
   ?sub RiCo:title ?naam
  }
} ORDER BY DESC(?aantal)