Skip to content

Commit

Permalink
Improved Uri shortening algorythms
Browse files Browse the repository at this point in the history
  • Loading branch information
mdesalvo committed Mar 16, 2019
1 parent 9008f93 commit 0c6c75a
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 215 deletions.
88 changes: 0 additions & 88 deletions RDFSharp/Model/RDFModelUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -337,94 +337,6 @@ internal static List<RDFNamespace> GetGraphNamespaces(RDFGraph graph)
}
return result.Distinct().ToList();
}

/// <summary>
/// Finds if the given token contains a recognizable namespace and, if so, abbreviates it with its prefix.
/// It also prepares the result in a format useful for serialization.
/// </summary>
internal static String AbbreviateUri(String token, List<RDFNamespace> prefixes = null)
{

//Null/Space token: give empty result
if (token == null || token.Trim() == String.Empty)
{
return String.Empty;
}

//Blank token: abbreviate it with "_"
if (token.StartsWith("bnode:"))
{
return token.Replace("bnode:", "_:");
}

//Variable token: do not modify
if (token.StartsWith("?"))
{
return token;
}

//Prefixed token: check if it starts with a known prefix, if so just return it
var prefixToSearch = token.Split(':')[0];
if (prefixes == null)
{
if (RDFNamespaceRegister.GetByPrefix(prefixToSearch) != null)
{
return token;
}
}
else
{
var searchedPrefix = prefixes.Find(pf => pf.NamespacePrefix.Equals(prefixToSearch, StringComparison.OrdinalIgnoreCase));
if (searchedPrefix != null)
{
return token;
}
}

//Uri token: search a known namespace, if found replace it with its prefix
String tokenBackup = token;
Boolean abbrev = false;
List<RDFNamespace> namespacesToSearch = (prefixes == null ? RDFNamespaceRegister.Instance.Register : prefixes);
namespacesToSearch.ForEach(ns =>
{
if (!abbrev)
{
String nS = ns.ToString();
if (!token.Equals(nS, StringComparison.OrdinalIgnoreCase))
{
if (token.StartsWith(nS))
{
token = token.Replace(nS, ns.NamespacePrefix + ":").TrimEnd(new Char[] { '/' });

//Accept the abbreviation only if it has generated a valid XSD QName
try
{
var qn = new RDFTypedLiteral(token, RDFModelEnums.RDFDatatypes.XSD_QNAME);
abbrev = true;
}
catch
{
token = tokenBackup;
abbrev = false;
}

}
}
}
});

//Search done, let's analyze results:
if (abbrev)
{
return token; //token is a relative or a blank uri
}
if (token.Contains("^^"))
{ //token is a typedLiteral absolute uri
return token.Replace("^^", "^^<") + ">";
}
return "<" + token + ">"; //token is an absolute uri

}
#endregion

#region Datatypes
Expand Down
12 changes: 7 additions & 5 deletions RDFSharp/Model/Serializers/RDFTurtle.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ limitations under the License.
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using RDFSharp.Query;

namespace RDFSharp.Model
{
Expand Down Expand Up @@ -63,7 +64,8 @@ internal static void Serialize(RDFGraph graph, Stream outputStream)

#region prefixes
//Write the namespaces collected by the graph
foreach (var ns in RDFModelUtilities.GetGraphNamespaces(graph).OrderBy(n => n.NamespacePrefix))
var prefixes = RDFModelUtilities.GetGraphNamespaces(graph);
foreach (var ns in prefixes.OrderBy(n => n.NamespacePrefix))
{
sw.WriteLine("@prefix " + ns.NamespacePrefix + ": <" + ns.NamespaceUri + ">.");
}
Expand Down Expand Up @@ -111,7 +113,7 @@ orderby triple.Subject.ToString(), triple.Predicate.ToString()
actualPred = String.Empty;
if (!actualSubj.StartsWith("_:"))
{
abbreviatedSubj = RDFModelUtilities.AbbreviateUri(actualSubj);
abbreviatedSubj = RDFQueryUtilities.PrintRDFPatternMember(RDFQueryUtilities.ParseRDFPatternMember(actualSubj), prefixes);
}
else
{
Expand All @@ -136,7 +138,7 @@ orderby triple.Subject.ToString(), triple.Predicate.ToString()
result.Append(spaceConst.PadRight(abbreviatedSubj.Length + 1)); //pretty-printing spaces to align the predList
}
actualPred = triple.Predicate.ToString();
abbreviatedPred = RDFModelUtilities.AbbreviateUri(actualPred);
abbreviatedPred = RDFQueryUtilities.PrintRDFPatternMember(RDFQueryUtilities.ParseRDFPatternMember(actualPred), prefixes);
//Turtle goody for "rdf:type" shortcutting to "a"
if (abbreviatedPred == RDFVocabulary.RDF.PREFIX + ":type")
{
Expand All @@ -153,7 +155,7 @@ orderby triple.Subject.ToString(), triple.Predicate.ToString()
String obj = triple.Object.ToString();
if (!obj.StartsWith("_:"))
{
result.Append(RDFModelUtilities.AbbreviateUri(obj));
result.Append(RDFQueryUtilities.PrintRDFPatternMember(RDFQueryUtilities.ParseRDFPatternMember(obj), prefixes));
}
else
{
Expand All @@ -175,7 +177,7 @@ orderby triple.Subject.ToString(), triple.Predicate.ToString()

if (triple.Object is RDFTypedLiteral)
{
String tLit = litValDelim + ((RDFTypedLiteral)triple.Object).Value.Replace("\\", "\\\\") + litValDelim + "^^" + RDFModelUtilities.AbbreviateUri(RDFModelUtilities.GetDatatypeFromEnum(((RDFTypedLiteral)triple.Object).Datatype));
String tLit = litValDelim + ((RDFTypedLiteral)triple.Object).Value.Replace("\\", "\\\\") + litValDelim + "^^" + RDFQueryUtilities.PrintRDFPatternMember(RDFQueryUtilities.ParseRDFPatternMember(RDFModelUtilities.GetDatatypeFromEnum(((RDFTypedLiteral)triple.Object).Datatype)), prefixes);
result.Append(tLit);
}
else
Expand Down
6 changes: 2 additions & 4 deletions RDFSharp/Query/Filters/RDFComparisonFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,8 @@ public override String ToString()
}
internal override String ToString(List<RDFNamespace> prefixes)
{
String leftValue = (prefixes != null && prefixes.Any() ? RDFModelUtilities.AbbreviateUri(this.LeftMember.ToString(), prefixes) :
RDFQueryUtilities.PrintRDFPatternMember(this.LeftMember));
String rightValue = (prefixes != null && prefixes.Any() ? RDFModelUtilities.AbbreviateUri(this.RightMember.ToString(), prefixes) :
RDFQueryUtilities.PrintRDFPatternMember(this.RightMember));
String leftValue = RDFQueryUtilities.PrintRDFPatternMember(this.LeftMember, prefixes);
String rightValue = RDFQueryUtilities.PrintRDFPatternMember(this.RightMember, prefixes);
switch (this.ComparisonFlavor)
{
case RDFQueryEnums.RDFComparisonFlavors.LessThan:
Expand Down
9 changes: 1 addition & 8 deletions RDFSharp/Query/Filters/RDFDatatypeFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,7 @@ public override String ToString()
}
internal override String ToString(List<RDFNamespace> prefixes)
{
if (prefixes != null && prefixes.Any())
{
return "FILTER ( DATATYPE(" + this.Variable + ") = " + RDFModelUtilities.AbbreviateUri(RDFModelUtilities.GetDatatypeFromEnum(this.Datatype), prefixes) + " )";
}
else
{
return "FILTER ( DATATYPE(" + this.Variable + ") = <" + RDFModelUtilities.GetDatatypeFromEnum(this.Datatype) + "> )";
}
return "FILTER ( DATATYPE(" + this.Variable + ") = " + RDFQueryUtilities.PrintRDFPatternMember(RDFQueryUtilities.ParseRDFPatternMember(RDFModelUtilities.GetDatatypeFromEnum(this.Datatype)), prefixes) + " )";
}
#endregion

Expand Down
9 changes: 1 addition & 8 deletions RDFSharp/Query/Filters/RDFSameTermFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,7 @@ public override String ToString()
}
internal override String ToString(List<RDFNamespace> prefixes)
{
if (prefixes != null && prefixes.Any())
{
return "FILTER ( SAMETERM(" + this.Variable + ", " + RDFModelUtilities.AbbreviateUri(this.RDFTerm.ToString(), prefixes) + ") )";
}
else
{
return "FILTER ( SAMETERM(" + this.Variable + ", " + RDFQueryUtilities.PrintRDFPatternMember(this.RDFTerm) + ") )";
}
return "FILTER ( SAMETERM(" + this.Variable + ", " + RDFQueryUtilities.PrintRDFPatternMember(this.RDFTerm, prefixes) + ") )";
}
#endregion

Expand Down
9 changes: 1 addition & 8 deletions RDFSharp/Query/Queries/Describe/RDFDescribeQuery.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,7 @@ public override String ToString()
#region TERMS
if (this.DescribeTerms.Any())
{
if (this.Prefixes.Any())
{
this.DescribeTerms.ForEach(t => query.Append(" " + RDFModelUtilities.AbbreviateUri(t.ToString(), this.Prefixes)));
}
else
{
this.DescribeTerms.ForEach(t => query.Append(" " + RDFQueryUtilities.PrintRDFPatternMember(t)));
}
this.DescribeTerms.ForEach(t => query.Append(" " + RDFQueryUtilities.PrintRDFPatternMember(t, this.Prefixes)));
}
else
{
Expand Down
31 changes: 4 additions & 27 deletions RDFSharp/Query/RDFPattern.cs
Original file line number Diff line number Diff line change
Expand Up @@ -198,37 +198,14 @@ public override String ToString()
}
internal String ToString(List<RDFNamespace> prefixes)
{
String subj = null;
String pred = null;
String obj = null;

//If prefixes are given, try to use them for abbreviating pattern terms
if (prefixes != null && prefixes.Any())
{
subj = RDFModelUtilities.AbbreviateUri(this.Subject.ToString(), prefixes);
pred = RDFModelUtilities.AbbreviateUri(this.Predicate.ToString(), prefixes);
obj = RDFModelUtilities.AbbreviateUri(this.Object.ToString(), prefixes);
}
//Otherwise pretty-print pattern terms
else
{
subj = RDFQueryUtilities.PrintRDFPatternMember(this.Subject);
pred = RDFQueryUtilities.PrintRDFPatternMember(this.Predicate);
obj = RDFQueryUtilities.PrintRDFPatternMember(this.Object);
}
String subj = RDFQueryUtilities.PrintRDFPatternMember(this.Subject, prefixes);
String pred = RDFQueryUtilities.PrintRDFPatternMember(this.Predicate, prefixes);
String obj = RDFQueryUtilities.PrintRDFPatternMember(this.Object, prefixes);

//CSPO pattern
if (this.Context != null)
{
String ctx = null;
if (prefixes != null && prefixes.Any())
{
ctx = RDFModelUtilities.AbbreviateUri(this.Context.ToString(), prefixes);
}
else
{
ctx = RDFQueryUtilities.PrintRDFPatternMember(this.Context);
}
String ctx = RDFQueryUtilities.PrintRDFPatternMember(this.Context, prefixes);
if (this.IsOptional)
{
return "OPTIONAL { GRAPH " + ctx + " { " + subj + " " + pred + " " + obj + " } }";
Expand Down
66 changes: 10 additions & 56 deletions RDFSharp/Query/RDFPropertyPath.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,22 +106,11 @@ public override String ToString()
}
internal String ToString(List<RDFNamespace> prefixes)
{
if (prefixes != null && prefixes.Any())
{
return RDFModelUtilities.AbbreviateUri(this.Start.ToString(), prefixes) +
" " +
this.GetStepString(prefixes) +
" " +
RDFModelUtilities.AbbreviateUri(this.End.ToString(), prefixes);
}
else
{
return this.Start +
" " +
this.GetStepString(new List<RDFNamespace>()) +
" " +
this.End;
}
return RDFQueryUtilities.PrintRDFPatternMember(this.Start, prefixes) +
" " +
this.GetStepString(prefixes) +
" " +
RDFQueryUtilities.PrintRDFPatternMember(this.End, prefixes);
}
#endregion

Expand Down Expand Up @@ -183,14 +172,7 @@ internal String GetStepString(List<RDFNamespace> prefixes)
}

var propPath = this.Steps[0].StepProperty;
if (prefixes != null && prefixes.Any())
{
result.Append(RDFModelUtilities.AbbreviateUri(propPath.ToString(), prefixes));
}
else
{
result.Append(RDFQueryUtilities.PrintRDFPatternMember(propPath));
}
result.Append(RDFQueryUtilities.PrintRDFPatternMember(propPath, prefixes));

}
#endregion
Expand Down Expand Up @@ -224,26 +206,12 @@ internal String GetStepString(List<RDFNamespace> prefixes)
var propPath = this.Steps[i].StepProperty;
if (i < this.Steps.Count - 1)
{
if (prefixes != null && prefixes.Any())
{
result.Append(RDFModelUtilities.AbbreviateUri(propPath.ToString(), prefixes));
}
else
{
result.Append(RDFQueryUtilities.PrintRDFPatternMember(propPath));
}
result.Append(RDFQueryUtilities.PrintRDFPatternMember(propPath, prefixes));
result.Append((Char)this.Steps[i].StepFlavor);
}
else
{
if (prefixes != null && prefixes.Any())
{
result.Append(RDFModelUtilities.AbbreviateUri(propPath.ToString(), prefixes));
}
else
{
result.Append(RDFQueryUtilities.PrintRDFPatternMember(propPath));
}
result.Append(RDFQueryUtilities.PrintRDFPatternMember(propPath, prefixes));
result.Append(")");
}
}
Expand All @@ -267,26 +235,12 @@ internal String GetStepString(List<RDFNamespace> prefixes)
var propPath = this.Steps[i].StepProperty;
if (i < this.Steps.Count - 1)
{
if (prefixes != null && prefixes.Any())
{
result.Append(RDFModelUtilities.AbbreviateUri(propPath.ToString(), prefixes));
}
else
{
result.Append(RDFQueryUtilities.PrintRDFPatternMember(propPath));
}
result.Append(RDFQueryUtilities.PrintRDFPatternMember(propPath, prefixes));
result.Append((Char)this.Steps[i].StepFlavor);
}
else
{
if (prefixes != null && prefixes.Any())
{
result.Append(RDFModelUtilities.AbbreviateUri(propPath.ToString(), prefixes));
}
else
{
result.Append(RDFQueryUtilities.PrintRDFPatternMember(propPath));
}
result.Append(RDFQueryUtilities.PrintRDFPatternMember(propPath, prefixes));
}
}

Expand Down
Loading

0 comments on commit 0c6c75a

Please sign in to comment.