Browse Source

Upgrade ANSI marshalling to UTF-8 marshalling

UTF-8 is backwards-compatible with i.e. a strict superset of, ANSI, so we can safely marshal any ANSI as UTF-8 and thus we gain UTF-8 support for free.
Also fixed leaking memory when passing strings to native code as nobody ever freed the previously used Marshal.StringToPtr.

Signed-off-by: Dimitar Dobrev <dpldobrev@protonmail.com>
pull/1237/head
Dimitar Dobrev 6 years ago
parent
commit
2e4a3a5cf3
  1. 8
      src/Generator/Generators/CSharp/CSharpSources.cs
  2. 4
      src/Generator/Options.cs
  3. 1
      src/Generator/Passes/DelegatesPass.cs
  4. 124
      src/Generator/Types/Std/Stdlib.cs
  5. 24
      src/Runtime/Helpers.cs
  6. 35
      tests/Common/Common.Tests.cs
  7. 6
      tests/Common/Common.cpp
  8. 2
      tests/Common/Common.h

8
src/Generator/Generators/CSharp/CSharpSources.cs

@ -1068,6 +1068,11 @@ namespace CppSharp.Generators.CSharp @@ -1068,6 +1068,11 @@ namespace CppSharp.Generators.CSharp
marshal.Context.ArgumentPrefix}{marshal.Context.Return};");
}
}
if (paramMarshal.HasUsingBlock)
UnindentAndWriteCloseBrace();
if (ctx.HasCodeBlock)
UnindentAndWriteCloseBrace();
}
private void GeneratePropertyGetter<T>(T decl, Class @class,
@ -1802,6 +1807,9 @@ namespace CppSharp.Generators.CSharp @@ -1802,6 +1807,9 @@ namespace CppSharp.Generators.CSharp
{
WriteLine($"return {marshal.Context.ArgumentPrefix}{marshal.Context.Return};");
}
if (ctx.HasCodeBlock)
UnindentAndWriteCloseBrace();
}
if (!isVoid && isSetter)

4
src/Generator/Options.cs

@ -20,8 +20,6 @@ namespace CppSharp @@ -20,8 +20,6 @@ namespace CppSharp
GeneratorKind = GeneratorKind.CSharp;
OutputInteropIncludes = true;
Encoding = Encoding.ASCII;
StripLibPrefix = true;
ExplicitlyPatchedVirtualFunctions = new HashSet<string>();
@ -125,7 +123,7 @@ namespace CppSharp @@ -125,7 +123,7 @@ namespace CppSharp
/// </summary>
public CommentKind? CommentKind;
public Encoding Encoding { get; set; }
public Encoding Encoding { get; set; } = Encoding.UTF8;
public bool IsCSharpGenerator => GeneratorKind == GeneratorKind.CSharp;

1
src/Generator/Passes/DelegatesPass.cs

@ -276,6 +276,7 @@ namespace CppSharp.Passes @@ -276,6 +276,7 @@ namespace CppSharp.Passes
{
// TODO: all of this needs proper general fixing by only leaving type names
return types.Replace("global::System.", string.Empty)
.Replace("[MarshalAs(UnmanagedType.LPUTF8Str)] ", string.Empty)
.Replace("[MarshalAs(UnmanagedType.LPWStr)] ", string.Empty)
.Replace("global::", string.Empty).Replace("*", "Ptr")
.Replace('.', '_').Replace(' ', '_').Replace("::", "_")

124
src/Generator/Types/Std/Stdlib.cs

@ -129,7 +129,7 @@ namespace CppSharp.Types.Std @@ -129,7 +129,7 @@ namespace CppSharp.Types.Std
encoding = Context.Options.Encoding;
string param;
if (Equals(encoding, Encoding.ASCII))
if (Equals(encoding, Encoding.ASCII) || Equals(encoding, Encoding.UTF8))
param = "E_UTF8";
else if (Equals(encoding, Encoding.Unicode) ||
Equals(encoding, Encoding.BigEndianUnicode))
@ -154,8 +154,9 @@ namespace CppSharp.Types.Std @@ -154,8 +154,9 @@ namespace CppSharp.Types.Std
return new CustomType(typePrinter.IntPtrType);
}
if (Context.Options.Encoding == Encoding.ASCII)
return new CustomType("string");
if (Context.Options.Encoding == Encoding.ASCII ||
Context.Options.Encoding == Encoding.UTF8)
return new CustomType("[MarshalAs(UnmanagedType.LPUTF8Str)] string");
if (Context.Options.Encoding == Encoding.Unicode ||
Context.Options.Encoding == Encoding.BigEndianUnicode)
@ -183,19 +184,14 @@ namespace CppSharp.Types.Std @@ -183,19 +184,14 @@ namespace CppSharp.Types.Std
if (substitution != null)
param = $"({substitution.Replacement}) (object) {param}";
if (Equals(Context.Options.Encoding, Encoding.ASCII))
{
ctx.Return.Write($"Marshal.StringToHGlobalAnsi({param})");
return;
}
if (Equals(Context.Options.Encoding, Encoding.Unicode) ||
Equals(Context.Options.Encoding, Encoding.BigEndianUnicode))
{
ctx.Return.Write($"Marshal.StringToHGlobalUni({param})");
return;
}
throw new System.NotSupportedException(
$"{Context.Options.Encoding.EncodingName} is not supported yet.");
string bytes = $"__bytes{ctx.ParameterIndex}";
string bytePtr = $"__bytePtr{ctx.ParameterIndex}";
ctx.Before.WriteLine($@"byte[] {bytes} = global::System.Text.Encoding.{
GetEncodingClass(ctx.Parameter)}.GetBytes({param});");
ctx.Before.WriteLine($"fixed (byte* {bytePtr} = {bytes})");
ctx.HasCodeBlock = true;
ctx.Before.WriteOpenBraceAndIndent();
ctx.Return.Write($"new global::System.IntPtr({bytePtr})");
}
public override void CSharpMarshalToManaged(CSharpMarshalContext ctx)
@ -207,49 +203,93 @@ namespace CppSharp.Types.Std @@ -207,49 +203,93 @@ namespace CppSharp.Types.Std
return;
}
Type type = Type.Desugar();
Type pointee = type.GetPointee().Desugar();
var isChar = type.IsPointerToPrimitiveType(PrimitiveType.Char) ||
(pointee.IsPointerToPrimitiveType(PrimitiveType.Char) &&
ctx.Parameter != null &&
(ctx.Parameter.IsInOut || ctx.Parameter.IsOut));
var encoding = isChar ? Encoding.ASCII : Encoding.Unicode;
if (Equals(encoding, Encoding.ASCII))
encoding = Context.Options.Encoding;
string returnVarName = ctx.ReturnVarName;
string nullPtr = "global::System.IntPtr.Zero";
if (ctx.Function != null)
{
Type returnType = ctx.Function.ReturnType.Type.Desugar();
if (returnType.IsAddress() &&
returnType.GetPointee().Desugar().IsAddress())
{
returnVarName = $"new global::System.IntPtr(*{returnVarName})";
returnVarName = $"*{returnVarName}";
nullPtr = "null";
}
}
if (Equals(encoding, Encoding.ASCII))
TextGenerator textGenerator;
if (ctx.Parameter == null)
{
ctx.Return.Write($"Marshal.PtrToStringAnsi({returnVarName})");
return;
textGenerator = ctx.Before;
textGenerator.WriteLine($"if ({ctx.ReturnVarName} == {nullPtr})");
textGenerator.WriteLineIndent($"return default({ctx.ReturnType});");
}
if (Equals(encoding, Encoding.UTF8))
else
{
ctx.Return.Write($"Marshal.PtrToStringUTF8({returnVarName})");
return;
textGenerator = ctx.Cleanup;
textGenerator.WriteLine($"if ({ctx.ReturnVarName} == {nullPtr})");
textGenerator.WriteOpenBraceAndIndent();
textGenerator.WriteLine($"{ctx.Parameter.Name} = default({Type.Desugar()});");
textGenerator.WriteLine("return;");
textGenerator.UnindentAndWriteCloseBrace();
}
// If we reach this, we know the string is Unicode.
if (isChar || ctx.Context.TargetInfo.WCharWidth == 16)
string encoding = GetEncodingClass(ctx.Parameter);
string type = GetTypeForCodePoint(encoding);
textGenerator.WriteLine($"var __retPtr = ({type}*) {returnVarName};");
textGenerator.WriteLine("int __length = 0;");
textGenerator.WriteLine($"while (*(__retPtr++) != 0) __length += sizeof({type});");
ctx.Return.Write($@"global::System.Text.Encoding.{
encoding}.GetString((byte*) {returnVarName}, __length)");
}
private string GetEncodingClass(Parameter parameter)
{
Type type = Type.Desugar();
Type pointee = type.GetPointee().Desugar();
var isChar = type.IsPointerToPrimitiveType(PrimitiveType.Char) ||
(pointee.IsPointerToPrimitiveType(PrimitiveType.Char) &&
parameter != null &&
(parameter.IsInOut || parameter.IsOut));
if (!isChar)
return (Context.TargetInfo.WCharWidth == 16) ?
nameof(Encoding.Unicode) : nameof(Encoding.UTF32);
if (Context.Options.Encoding == Encoding.ASCII)
return nameof(Encoding.ASCII);
if (Context.Options.Encoding == Encoding.BigEndianUnicode)
return nameof(Encoding.BigEndianUnicode);
if (Context.Options.Encoding == Encoding.Unicode)
return nameof(Encoding.Unicode);
if (Context.Options.Encoding == Encoding.UTF32)
return nameof(Encoding.UTF32);
if (Context.Options.Encoding == Encoding.UTF7)
return nameof(Encoding.UTF7);
if (Context.Options.Encoding == Encoding.UTF8)
return nameof(Encoding.UTF8);
throw new System.NotSupportedException(
$"{Context.Options.Encoding.EncodingName} is not supported yet.");
}
private static string GetTypeForCodePoint(string encoding)
{
switch (encoding)
{
ctx.Return.Write($"Marshal.PtrToStringUni({returnVarName})");
return;
case nameof(Encoding.UTF32):
return "int";
case nameof(Encoding.Unicode):
case nameof(Encoding.BigEndianUnicode):
return "short";
default:
return "byte";
}
// If we reach this, we should have an UTF-32 wide string.
const string encodingName = "System.Text.Encoding.UTF32";
ctx.Return.Write($@"CppSharp.Runtime.Helpers.MarshalEncodedString({
returnVarName}, {encodingName})");
}
}

24
src/Runtime/Helpers.cs

@ -1,24 +0,0 @@ @@ -1,24 +0,0 @@
using System;
using System.Runtime.InteropServices;
using System.Text;
namespace CppSharp.Runtime
{
public static class Helpers
{
public static string MarshalEncodedString(IntPtr ptr, Encoding encoding)
{
if (ptr == IntPtr.Zero)
return null;
var size = 0;
while (Marshal.ReadInt32(ptr, size) != 0)
size += sizeof(int);
var buffer = new byte[size];
Marshal.Copy(ptr, buffer, 0, buffer.Length);
return encoding.GetString(buffer);
}
}
}

35
tests/Common/Common.Tests.cs

@ -829,7 +829,7 @@ This is a very long string. This is a very long string. This is a very long stri @@ -829,7 +829,7 @@ This is a very long string. This is a very long string. This is a very long stri
}
}
[Test]
[Test]
public void TestStdStringPassedByValue()
{
// when C++ memory is deleted, it's only marked as free but not immediadely freed
@ -859,6 +859,39 @@ This is a very long string. This is a very long string. This is a very long stri @@ -859,6 +859,39 @@ This is a very long string. This is a very long string. This is a very long stri
}
}
[Test]
public void TestUTF8()
{
var strings = new[] { "ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя" +
"ѐёђѓєѕіїјљњћќѝўџѠѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯѰѱѲѳѴѵѶѷѸѹѺѻѼѽѾѿҀҁҊҋҌҍҎҏҐґҒғҔҕҖҗҘҙҚқҜҝҞҟҠҡҢңҤҥҦҧҨҩ" +
"ҪҫҬҭҮүҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿӀӁӂӃӄӅӆӇӈӉӊӋӌӍӎӏӐӑӒӓӔӕӖӗӘәӚӛӜӝӞӟӠӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹӺӻӼӽ" +
"ӾӿԀԁԂԃԄԅԆԇԈԉԊԋԌԍԎԏԐԑԒԓ",
"აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶჷჸჹჺ",
"ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖՙաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև",
"々〆〱〲〳〴〵〻〼ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづて" +
"でとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕ" +
"ゖゝゞゟァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニ" +
"ヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ" +
"ーヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ" +
"ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵ" +
"ĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƀƁƂƃƄƅƆƇƈƉƊ" +
"ƋƌƍƎƏƐƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿǀǁǂǃDŽDždžLJLjljNJNjnjǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝ" +
"ǞǟǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȠȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳ" +
"ȴȵȶȷȸȹȺȻȼȽȾȿɀɁɂɃɄɅɆɇɈɉɊɋɌɍɎɏḀḁḂḃḄḅḆḇḈḉḊḋḌḍḎḏḐḑḒḓḔḕḖḗḘḙḚḛḜḝḞḟḠḡḢḣḤḥḦḧḨḩḪḫḬḭḮḯḰḱḲḳḴḵḶḷḸḹḺḻḼḽ" +
"ḾḿṀṁṂṃṄṅṆṇṈṉṊṋṌṍṎṏṐṑṒṓṔṕṖṗṘṙṚṛṜṝṞṟṠṡṢṣṤṥṦṧṨṩṪṫṬṭṮṯṰṱṲṳṴṵṶṷṸṹṺṻṼṽṾṿẀẁẂẃẄẅẆẇẈẉẊẋẌẍẎẏẐẑẒẓẔẕẖẗẘẙẚ" +
"ẛẞẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾếỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹ" +
"ⱠⱡⱢⱣⱤⱥⱦⱧⱨⱩⱪⱫⱬⱭⱱⱲⱳⱴⱵⱶⱷ" };
foreach (var @string in strings)
{
Assert.That(Common.TakeReturnUTF8(@string), Is.EqualTo(@string));
}
}
private class CustomDerivedFromVirtual : AbstractWithVirtualDtor
{
public override void Abstract()

6
tests/Common/Common.cpp

@ -1071,6 +1071,12 @@ void overloadPointer(const void* p, int i) @@ -1071,6 +1071,12 @@ void overloadPointer(const void* p, int i)
{
}
const char* takeReturnUTF8(const char* utf8)
{
UTF8 = utf8;
return UTF8.data();
}
StructWithCopyCtor::StructWithCopyCtor() {}
StructWithCopyCtor::StructWithCopyCtor(const StructWithCopyCtor& other) : mBits(other.mBits) {}

2
tests/Common/Common.h

@ -1495,6 +1495,8 @@ DLL_API void takeReferenceToVoidStar(const void*& p); @@ -1495,6 +1495,8 @@ DLL_API void takeReferenceToVoidStar(const void*& p);
DLL_API void takeVoidStarStar(void** p);
DLL_API void overloadPointer(void* p, int i = 0);
DLL_API void overloadPointer(const void* p, int i = 0);
DLL_API const char* takeReturnUTF8(const char* utf8);
DLL_API std::string UTF8;
struct DLL_API StructWithCopyCtor
{

Loading…
Cancel
Save