Browse Source

dev: Standardize on utf-8 (no bom) in .editorconfig

pull/3546/head
Peter Crabtree 4 months ago
parent
commit
1d082e7128
  1. 6
      .editorconfig
  2. 171
      BuildTools/bom-classify-encodings.ps1
  3. 208
      BuildTools/bom-strip.ps1
  4. 5
      BuildTools/update-assemblyinfo.ps1

6
.editorconfig

@ -2,12 +2,18 @@
root = true root = true
[*] [*]
charset = utf-8 # standardize on no BOM (except resx, see below)
indent_style = tab indent_style = tab
indent_size = 4 indent_size = 4
guidelines = 110 guidelines = 110
tab_width = 4 tab_width = 4
end_of_line = crlf end_of_line = crlf
# .net tooling writes the BOM to resx files on running dotnet build ILSpy.sln regardless,
# so we should direct text editors to NOT change the file
[*.resx]
charset = utf-8-bom
[*.il] [*.il]
indent_style = space indent_style = space
indent_size = 2 indent_size = 2

171
BuildTools/bom-classify-encodings.ps1

@ -0,0 +1,171 @@
<#
.SYNOPSIS
Classify text files by encoding under the current subtree, respecting .gitignore.
.DESCRIPTION
Enumerates tracked files and untracked-but-not-ignored files (via Git) beneath
PWD. Skips likely-binary files (NUL probe). Classifies remaining files as:
- 'utf8' : valid UTF-8 (no BOM) or empty file
- 'utf8-with-bom' : starts with UTF-8 BOM (EF BB BF)
- 'other' : text but not valid UTF-8 (e.g., UTF-16/ANSI)
Outputs:
1) Relative paths of files classified as 'other'
2) A table by extension: UTF8 / UTF8-with-BOM / Other / Total
Notes:
- Read-only: this script makes no changes.
- Requires Git and must be run inside a Git work tree.
#>
[CmdletBinding()]
param()
Set-StrictMode -Version Latest
$ErrorActionPreference = 'Stop'
# --- Git enumeration ---------------------------------------------------------
function Assert-InGitWorkTree {
# Throws if not inside a Git work tree.
$inside = (& git rev-parse --is-inside-work-tree 2>$null).Trim()
if ($LASTEXITCODE -ne 0 -or $inside -ne 'true') {
throw 'Not in a Git work tree.'
}
}
function Get-GitFilesUnderPwd {
<#
Returns full paths to tracked + untracked-not-ignored files under PWD.
#>
Assert-InGitWorkTree
$repoRoot = (& git rev-parse --show-toplevel).Trim()
$pwdPath = (Get-Location).Path
# cached (tracked) + others (untracked not ignored)
$nulSeparated = & git -C $repoRoot ls-files -z --cached --others --exclude-standard
$relativePaths = $nulSeparated.Split(
[char]0, [System.StringSplitOptions]::RemoveEmptyEntries)
foreach ($relPath in $relativePaths) {
$fullPath = Join-Path $repoRoot $relPath
# Only include files under the current subtree.
if ($fullPath.StartsWith($pwdPath,
[System.StringComparison]::OrdinalIgnoreCase)) {
if (Test-Path -LiteralPath $fullPath -PathType Leaf) { $fullPath }
}
}
}
# --- Probes ------------------------------------------------------------------
function Test-ProbablyBinary {
# Heuristic: treat as binary if the first 8 KiB contains any NUL byte.
param([Parameter(Mandatory)][string]$Path)
try {
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
try {
$len = [int][Math]::Min(8192,$stream.Length)
if ($len -le 0) { return $false }
$buffer = [byte[]]::new($len)
[void]$stream.Read($buffer,0,$len)
return ($buffer -contains 0)
}
finally { $stream.Dispose() }
}
catch { return $false }
}
function Get-TextEncodingCategory {
# Returns 'utf8', 'utf8-with-bom', 'other', or $null for likely-binary.
param([Parameter(Mandatory)][string]$Path)
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
try {
$fileLength = $stream.Length
if ($fileLength -eq 0) { return 'utf8' }
# BOM check (EF BB BF)
$header = [byte[]]::new([Math]::Min(3,$fileLength))
[void]$stream.Read($header,0,$header.Length)
if ($header.Length -ge 3 -and
$header[0] -eq 0xEF -and $header[1] -eq 0xBB -and $header[2] -eq 0xBF) {
return 'utf8-with-bom'
}
# Quick binary probe before expensive decoding
$stream.Position = 0
$sampleLen = [int][Math]::Min(8192,$fileLength)
$sample = [byte[]]::new($sampleLen)
[void]$stream.Read($sample,0,$sampleLen)
if ($sample -contains 0) { return $null }
}
finally { $stream.Dispose() }
# Validate UTF-8 by decoding with throw-on-invalid option (no BOM).
try {
$bytes = [System.IO.File]::ReadAllBytes($Path)
$utf8 = [System.Text.UTF8Encoding]::new($false,$true)
[void]$utf8.GetString($bytes)
return 'utf8'
}
catch { return 'other' }
}
# --- Main --------------------------------------------------------------------
$otherFiles = @()
$byExtension = @{}
$allFiles = Get-GitFilesUnderPwd
foreach ($fullPath in $allFiles) {
# Avoid decoding likely-binary files.
if (Test-ProbablyBinary $fullPath) { continue }
$category = Get-TextEncodingCategory $fullPath
if (-not $category) { continue }
$ext = [IO.Path]::GetExtension($fullPath).ToLower()
if (-not $byExtension.ContainsKey($ext)) {
$byExtension[$ext] = @{ 'utf8' = 0; 'utf8-with-bom' = 0; 'other' = 0 }
}
$byExtension[$ext][$category]++
if ($category -eq 'other') {
$otherFiles += (Resolve-Path -LiteralPath $fullPath -Relative)
}
}
# 1) Files in 'other'
if ($otherFiles.Count -gt 0) {
'Files classified as ''other'':'
$otherFiles | Sort-Object | ForEach-Object { " $_" }
''
}
# 2) Table by extension
$rows = foreach ($kv in $byExtension.GetEnumerator()) {
$ext = if ($kv.Key) { $kv.Key } else { '[noext]' }
$u = [int]$kv.Value['utf8']
$b = [int]$kv.Value['utf8-with-bom']
$o = [int]$kv.Value['other']
[PSCustomObject]@{
Extension = $ext
UTF8 = $u
'UTF8-with-BOM' = $b
Other = $o
Total = $u + $b + $o
}
}
$rows |
Sort-Object -Property (
@{Expression='Total';Descending=$true},
@{Expression='Extension';Descending=$false}
) |
Format-Table -AutoSize

208
BuildTools/bom-strip.ps1

@ -0,0 +1,208 @@
<#
.SYNOPSIS
Strip UTF-8 BOM from selected text files under the current subtree, respecting
.gitignore.
.DESCRIPTION
Enumerates tracked and untracked-but-not-ignored files under the current
directory (via Git), filters to texty extensions and dotfiles, skips likely
binary files (NUL probe), and removes a leading UTF-8 BOM (EF BB BF) in place.
Refuses to run if there are uncommitted changes as a safeguard. Use -Force to override.
Supports -WhatIf/-Confirm via ShouldProcess.
#>
[CmdletBinding(SupportsShouldProcess = $true, ConfirmImpact = 'Low')]
param(
[switch]$Force
)
Set-StrictMode -Version Latest
$ErrorActionPreference = 'Stop'
# --- File sets (ILSpy) ------------------------------------------------------
$Dotfiles = @(
'.gitignore', '.editorconfig', '.gitattributes', '.gitmodules',
'.tgitconfig', '.vsconfig'
)
$AllowedExts = @(
'.bat','.config','.cs','.csproj','.css','.filelist','.fs','.html','.il',
'.ipynb','.js','.json','.less','.manifest','.md','.projitems','.props',
'.ps1','.psd1','.ruleset','.shproj','.sln','.slnf','.svg','.template',
'.tt', '.txt','.vb','.vsct','.vsixlangpack','.wxl','.xaml','.xml','.xshd','.yml'
)
$IncludeNoExt = $true # include names like LICENSE
# --- Git checks / enumeration -----------------------------------------------
function Assert-InGitWorkTree {
$inside = (& git rev-parse --is-inside-work-tree 2>$null).Trim()
if ($LASTEXITCODE -ne 0 -or $inside -ne 'true') {
throw 'Not in a Git work tree.'
}
}
function Assert-CleanWorkingTree {
if ($Force) { return }
$status = & git status --porcelain -z
if ($LASTEXITCODE -ne 0) { throw 'git status failed.' }
if (-not [string]::IsNullOrEmpty($status)) {
throw 'Working tree not clean. Commit/stash changes or use -Force.'
}
}
function Get-GitFilesUnderPwd {
Assert-InGitWorkTree
$repoRoot = (& git rev-parse --show-toplevel).Trim()
$pwdPath = (Get-Location).Path
$tracked = & git -C $repoRoot ls-files -z
$others = & git -C $repoRoot ls-files --others --exclude-standard -z
$allRel = ("$tracked$others").Split(
[char]0, [System.StringSplitOptions]::RemoveEmptyEntries)
foreach ($relPath in $allRel) {
$fullPath = Join-Path $repoRoot $relPath
if ($fullPath.StartsWith($pwdPath,
[System.StringComparison]::OrdinalIgnoreCase)) {
if (Test-Path -LiteralPath $fullPath -PathType Leaf) {
$fullPath
}
}
}
}
# --- Probes -----------------------------------------------------------------
function Test-HasUtf8Bom {
param([Parameter(Mandatory)][string]$Path)
try {
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
try {
if ($stream.Length -lt 3) { return $false }
$header = [byte[]]::new(3)
[void]$stream.Read($header,0,3)
return ($header[0] -eq 0xEF -and
$header[1] -eq 0xBB -and
$header[2] -eq 0xBF)
}
finally {
$stream.Dispose()
}
}
catch { return $false }
}
function Test-ProbablyBinary {
# Binary if the first 8 KiB contains any NUL byte.
param([Parameter(Mandatory)][string]$Path)
try {
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
try {
$len = [int][Math]::Min(8192,$stream.Length)
if ($len -le 0) { return $false }
$buffer = [byte[]]::new($len)
[void]$stream.Read($buffer,0,$len)
return ($buffer -contains 0)
}
finally {
$stream.Dispose()
}
}
catch { return $false }
}
# --- Mutation ---------------------------------------------------------------
function Remove-Utf8BomInPlace {
# Write the existing buffer from offset 3, no extra full-size allocation.
param([Parameter(Mandatory)][string]$Path)
$bytes = [System.IO.File]::ReadAllBytes($Path)
if ($bytes.Length -lt 3) { return $false }
if ($bytes[0] -ne 0xEF -or
$bytes[1] -ne 0xBB -or
$bytes[2] -ne 0xBF) {
return $false
}
$stream = [System.IO.File]::Open($Path,'Create','Write','ReadWrite')
try {
$stream.Write($bytes, 3, $bytes.Length - 3)
$stream.SetLength($bytes.Length - 3)
}
finally {
$stream.Dispose()
}
return $true
}
# --- Main -------------------------------------------------------------------
Assert-InGitWorkTree
Assert-CleanWorkingTree
$allFiles = Get-GitFilesUnderPwd
$targets = $allFiles | % {
$fileName = [IO.Path]::GetFileName($_)
$ext = [IO.Path]::GetExtension($fileName)
$isDot = $Dotfiles -contains $fileName
$isNoExt = -not $fileName.Contains('.')
if ($isDot -or ($AllowedExts -contains $ext) -or
($IncludeNoExt -and $isNoExt -and -not $isDot)) {
$_
}
}
| ? { Test-HasUtf8Bom $_ }
| ? { -not (Test-ProbablyBinary $_) }
$changed = 0
$byExtension = @{}
$dotfileChanges = 0
$targets | % {
$relative = Resolve-Path -LiteralPath $_ -Relative
if ($PSCmdlet.ShouldProcess($relative,'Strip UTF-8 BOM')) {
if (Remove-Utf8BomInPlace -Path $_) {
$changed++
$fileName = [IO.Path]::GetFileName($_)
if ($Dotfiles -contains $fileName) { $dotfileChanges++ }
$ext = [IO.Path]::GetExtension($fileName)
if (-not $byExtension.ContainsKey($ext)) { $byExtension[$ext] = 0 }
$byExtension[$ext]++
"stripped BOM: $relative"
}
}
}
"Done. Stripped BOM from $changed file(s)."
if ($byExtension.Keys.Count -gt 0) {
""
"By extension:"
$byExtension.GetEnumerator() | Sort-Object Name | % {
$key = if ([string]::IsNullOrEmpty($_.Name)) { '[noext]' } else { $_.Name }
" {0}: {1}" -f $key, $_.Value
}
}
if ($dotfileChanges -gt 0) {
" [dotfiles]: $dotfileChanges"
}

5
BuildTools/update-assemblyinfo.ps1

@ -1,4 +1,4 @@
if (-not ($PSVersionTable.PSCompatibleVersions -contains "5.0")) { if (-not ($PSVersionTable.PSCompatibleVersions -contains "5.0")) {
Write-Error "This script requires at least powershell version 5.0!"; Write-Error "This script requires at least powershell version 5.0!";
return 255; return 255;
} }
@ -175,7 +175,8 @@ try {
$out = $out.Replace('$INSERTBUILDCONFIG$', $buildConfig); $out = $out.Replace('$INSERTBUILDCONFIG$', $buildConfig);
if ((-not (Test-File $file.Output)) -or (((Get-Content $file.Output) -Join [System.Environment]::NewLine) -ne $out)) { if ((-not (Test-File $file.Output)) -or (((Get-Content $file.Output) -Join [System.Environment]::NewLine) -ne $out)) {
$out | Out-File -Encoding utf8 $file.Output; $utf8NoBom = New-Object System.Text.UTF8Encoding($false);
[System.IO.File]::WriteAllText($file.Output, $out, $utf8NoBom);
} }
} }

Loading…
Cancel
Save