mirror of https://github.com/icsharpcode/ILSpy.git
4 changed files with 388 additions and 2 deletions
@ -0,0 +1,171 @@ |
|||||||
|
<# |
||||||
|
.SYNOPSIS |
||||||
|
Classify text files by encoding under the current subtree, respecting .gitignore. |
||||||
|
|
||||||
|
.DESCRIPTION |
||||||
|
Enumerates tracked files and untracked-but-not-ignored files (via Git) beneath |
||||||
|
PWD. Skips likely-binary files (NUL probe). Classifies remaining files as: |
||||||
|
- 'utf8' : valid UTF-8 (no BOM) or empty file |
||||||
|
- 'utf8-with-bom' : starts with UTF-8 BOM (EF BB BF) |
||||||
|
- 'other' : text but not valid UTF-8 (e.g., UTF-16/ANSI) |
||||||
|
|
||||||
|
Outputs: |
||||||
|
1) Relative paths of files classified as 'other' |
||||||
|
2) A table by extension: UTF8 / UTF8-with-BOM / Other / Total |
||||||
|
|
||||||
|
Notes: |
||||||
|
- Read-only: this script makes no changes. |
||||||
|
- Requires Git and must be run inside a Git work tree. |
||||||
|
#> |
||||||
|
|
||||||
|
[CmdletBinding()] |
||||||
|
param() |
||||||
|
|
||||||
|
Set-StrictMode -Version Latest |
||||||
|
$ErrorActionPreference = 'Stop' |
||||||
|
|
||||||
|
# --- Git enumeration --------------------------------------------------------- |
||||||
|
function Assert-InGitWorkTree { |
||||||
|
# Throws if not inside a Git work tree. |
||||||
|
$inside = (& git rev-parse --is-inside-work-tree 2>$null).Trim() |
||||||
|
if ($LASTEXITCODE -ne 0 -or $inside -ne 'true') { |
||||||
|
throw 'Not in a Git work tree.' |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function Get-GitFilesUnderPwd { |
||||||
|
<# |
||||||
|
Returns full paths to tracked + untracked-not-ignored files under PWD. |
||||||
|
#> |
||||||
|
Assert-InGitWorkTree |
||||||
|
|
||||||
|
$repoRoot = (& git rev-parse --show-toplevel).Trim() |
||||||
|
$pwdPath = (Get-Location).Path |
||||||
|
|
||||||
|
# cached (tracked) + others (untracked not ignored) |
||||||
|
$nulSeparated = & git -C $repoRoot ls-files -z --cached --others --exclude-standard |
||||||
|
|
||||||
|
$relativePaths = $nulSeparated.Split( |
||||||
|
[char]0, [System.StringSplitOptions]::RemoveEmptyEntries) |
||||||
|
|
||||||
|
foreach ($relPath in $relativePaths) { |
||||||
|
$fullPath = Join-Path $repoRoot $relPath |
||||||
|
|
||||||
|
# Only include files under the current subtree. |
||||||
|
if ($fullPath.StartsWith($pwdPath, |
||||||
|
[System.StringComparison]::OrdinalIgnoreCase)) { |
||||||
|
if (Test-Path -LiteralPath $fullPath -PathType Leaf) { $fullPath } |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# --- Probes ------------------------------------------------------------------ |
||||||
|
function Test-ProbablyBinary { |
||||||
|
# Heuristic: treat as binary if the first 8 KiB contains any NUL byte. |
||||||
|
param([Parameter(Mandatory)][string]$Path) |
||||||
|
|
||||||
|
try { |
||||||
|
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite') |
||||||
|
try { |
||||||
|
$len = [int][Math]::Min(8192,$stream.Length) |
||||||
|
if ($len -le 0) { return $false } |
||||||
|
|
||||||
|
$buffer = [byte[]]::new($len) |
||||||
|
[void]$stream.Read($buffer,0,$len) |
||||||
|
return ($buffer -contains 0) |
||||||
|
} |
||||||
|
finally { $stream.Dispose() } |
||||||
|
} |
||||||
|
catch { return $false } |
||||||
|
} |
||||||
|
|
||||||
|
function Get-TextEncodingCategory { |
||||||
|
# Returns 'utf8', 'utf8-with-bom', 'other', or $null for likely-binary. |
||||||
|
param([Parameter(Mandatory)][string]$Path) |
||||||
|
|
||||||
|
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite') |
||||||
|
try { |
||||||
|
$fileLength = $stream.Length |
||||||
|
if ($fileLength -eq 0) { return 'utf8' } |
||||||
|
|
||||||
|
# BOM check (EF BB BF) |
||||||
|
$header = [byte[]]::new([Math]::Min(3,$fileLength)) |
||||||
|
[void]$stream.Read($header,0,$header.Length) |
||||||
|
if ($header.Length -ge 3 -and |
||||||
|
$header[0] -eq 0xEF -and $header[1] -eq 0xBB -and $header[2] -eq 0xBF) { |
||||||
|
return 'utf8-with-bom' |
||||||
|
} |
||||||
|
|
||||||
|
# Quick binary probe before expensive decoding |
||||||
|
$stream.Position = 0 |
||||||
|
$sampleLen = [int][Math]::Min(8192,$fileLength) |
||||||
|
$sample = [byte[]]::new($sampleLen) |
||||||
|
[void]$stream.Read($sample,0,$sampleLen) |
||||||
|
if ($sample -contains 0) { return $null } |
||||||
|
} |
||||||
|
finally { $stream.Dispose() } |
||||||
|
|
||||||
|
# Validate UTF-8 by decoding with throw-on-invalid option (no BOM). |
||||||
|
try { |
||||||
|
$bytes = [System.IO.File]::ReadAllBytes($Path) |
||||||
|
$utf8 = [System.Text.UTF8Encoding]::new($false,$true) |
||||||
|
[void]$utf8.GetString($bytes) |
||||||
|
return 'utf8' |
||||||
|
} |
||||||
|
catch { return 'other' } |
||||||
|
} |
||||||
|
|
||||||
|
# --- Main -------------------------------------------------------------------- |
||||||
|
$otherFiles = @() |
||||||
|
$byExtension = @{} |
||||||
|
|
||||||
|
$allFiles = Get-GitFilesUnderPwd |
||||||
|
|
||||||
|
foreach ($fullPath in $allFiles) { |
||||||
|
# Avoid decoding likely-binary files. |
||||||
|
if (Test-ProbablyBinary $fullPath) { continue } |
||||||
|
|
||||||
|
$category = Get-TextEncodingCategory $fullPath |
||||||
|
if (-not $category) { continue } |
||||||
|
|
||||||
|
$ext = [IO.Path]::GetExtension($fullPath).ToLower() |
||||||
|
if (-not $byExtension.ContainsKey($ext)) { |
||||||
|
$byExtension[$ext] = @{ 'utf8' = 0; 'utf8-with-bom' = 0; 'other' = 0 } |
||||||
|
} |
||||||
|
|
||||||
|
$byExtension[$ext][$category]++ |
||||||
|
|
||||||
|
if ($category -eq 'other') { |
||||||
|
$otherFiles += (Resolve-Path -LiteralPath $fullPath -Relative) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# 1) Files in 'other' |
||||||
|
if ($otherFiles.Count -gt 0) { |
||||||
|
'Files classified as ''other'':' |
||||||
|
$otherFiles | Sort-Object | ForEach-Object { " $_" } |
||||||
|
'' |
||||||
|
} |
||||||
|
|
||||||
|
# 2) Table by extension |
||||||
|
$rows = foreach ($kv in $byExtension.GetEnumerator()) { |
||||||
|
$ext = if ($kv.Key) { $kv.Key } else { '[noext]' } |
||||||
|
$u = [int]$kv.Value['utf8'] |
||||||
|
$b = [int]$kv.Value['utf8-with-bom'] |
||||||
|
$o = [int]$kv.Value['other'] |
||||||
|
|
||||||
|
[PSCustomObject]@{ |
||||||
|
Extension = $ext |
||||||
|
UTF8 = $u |
||||||
|
'UTF8-with-BOM' = $b |
||||||
|
Other = $o |
||||||
|
Total = $u + $b + $o |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
$rows | |
||||||
|
Sort-Object -Property ( |
||||||
|
@{Expression='Total';Descending=$true}, |
||||||
|
@{Expression='Extension';Descending=$false} |
||||||
|
) | |
||||||
|
Format-Table -AutoSize |
||||||
@ -0,0 +1,208 @@ |
|||||||
|
<# |
||||||
|
.SYNOPSIS |
||||||
|
Strip UTF-8 BOM from selected text files under the current subtree, respecting |
||||||
|
.gitignore. |
||||||
|
|
||||||
|
.DESCRIPTION |
||||||
|
Enumerates tracked and untracked-but-not-ignored files under the current |
||||||
|
directory (via Git), filters to texty extensions and dotfiles, skips likely |
||||||
|
binary files (NUL probe), and removes a leading UTF-8 BOM (EF BB BF) in place. |
||||||
|
|
||||||
|
Refuses to run if there are uncommitted changes as a safeguard. Use -Force to override. |
||||||
|
Supports -WhatIf/-Confirm via ShouldProcess. |
||||||
|
#> |
||||||
|
|
||||||
|
[CmdletBinding(SupportsShouldProcess = $true, ConfirmImpact = 'Low')] |
||||||
|
param( |
||||||
|
[switch]$Force |
||||||
|
) |
||||||
|
|
||||||
|
Set-StrictMode -Version Latest |
||||||
|
$ErrorActionPreference = 'Stop' |
||||||
|
|
||||||
|
# --- File sets (ILSpy) ------------------------------------------------------ |
||||||
|
$Dotfiles = @( |
||||||
|
'.gitignore', '.editorconfig', '.gitattributes', '.gitmodules', |
||||||
|
'.tgitconfig', '.vsconfig' |
||||||
|
) |
||||||
|
|
||||||
|
$AllowedExts = @( |
||||||
|
'.bat','.config','.cs','.csproj','.css','.filelist','.fs','.html','.il', |
||||||
|
'.ipynb','.js','.json','.less','.manifest','.md','.projitems','.props', |
||||||
|
'.ps1','.psd1','.ruleset','.shproj','.sln','.slnf','.svg','.template', |
||||||
|
'.tt', '.txt','.vb','.vsct','.vsixlangpack','.wxl','.xaml','.xml','.xshd','.yml' |
||||||
|
) |
||||||
|
|
||||||
|
$IncludeNoExt = $true # include names like LICENSE |
||||||
|
|
||||||
|
# --- Git checks / enumeration ----------------------------------------------- |
||||||
|
function Assert-InGitWorkTree { |
||||||
|
$inside = (& git rev-parse --is-inside-work-tree 2>$null).Trim() |
||||||
|
if ($LASTEXITCODE -ne 0 -or $inside -ne 'true') { |
||||||
|
throw 'Not in a Git work tree.' |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function Assert-CleanWorkingTree { |
||||||
|
if ($Force) { return } |
||||||
|
|
||||||
|
$status = & git status --porcelain -z |
||||||
|
if ($LASTEXITCODE -ne 0) { throw 'git status failed.' } |
||||||
|
|
||||||
|
if (-not [string]::IsNullOrEmpty($status)) { |
||||||
|
throw 'Working tree not clean. Commit/stash changes or use -Force.' |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function Get-GitFilesUnderPwd { |
||||||
|
Assert-InGitWorkTree |
||||||
|
|
||||||
|
$repoRoot = (& git rev-parse --show-toplevel).Trim() |
||||||
|
$pwdPath = (Get-Location).Path |
||||||
|
|
||||||
|
$tracked = & git -C $repoRoot ls-files -z |
||||||
|
$others = & git -C $repoRoot ls-files --others --exclude-standard -z |
||||||
|
|
||||||
|
$allRel = ("$tracked$others").Split( |
||||||
|
[char]0, [System.StringSplitOptions]::RemoveEmptyEntries) |
||||||
|
|
||||||
|
foreach ($relPath in $allRel) { |
||||||
|
$fullPath = Join-Path $repoRoot $relPath |
||||||
|
if ($fullPath.StartsWith($pwdPath, |
||||||
|
[System.StringComparison]::OrdinalIgnoreCase)) { |
||||||
|
if (Test-Path -LiteralPath $fullPath -PathType Leaf) { |
||||||
|
$fullPath |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
# --- Probes ----------------------------------------------------------------- |
||||||
|
function Test-HasUtf8Bom { |
||||||
|
param([Parameter(Mandatory)][string]$Path) |
||||||
|
|
||||||
|
try { |
||||||
|
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite') |
||||||
|
try { |
||||||
|
if ($stream.Length -lt 3) { return $false } |
||||||
|
|
||||||
|
$header = [byte[]]::new(3) |
||||||
|
[void]$stream.Read($header,0,3) |
||||||
|
|
||||||
|
return ($header[0] -eq 0xEF -and |
||||||
|
$header[1] -eq 0xBB -and |
||||||
|
$header[2] -eq 0xBF) |
||||||
|
} |
||||||
|
finally { |
||||||
|
$stream.Dispose() |
||||||
|
} |
||||||
|
} |
||||||
|
catch { return $false } |
||||||
|
} |
||||||
|
|
||||||
|
function Test-ProbablyBinary { |
||||||
|
# Binary if the first 8 KiB contains any NUL byte. |
||||||
|
param([Parameter(Mandatory)][string]$Path) |
||||||
|
|
||||||
|
try { |
||||||
|
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite') |
||||||
|
try { |
||||||
|
$len = [int][Math]::Min(8192,$stream.Length) |
||||||
|
if ($len -le 0) { return $false } |
||||||
|
|
||||||
|
$buffer = [byte[]]::new($len) |
||||||
|
[void]$stream.Read($buffer,0,$len) |
||||||
|
|
||||||
|
return ($buffer -contains 0) |
||||||
|
} |
||||||
|
finally { |
||||||
|
$stream.Dispose() |
||||||
|
} |
||||||
|
} |
||||||
|
catch { return $false } |
||||||
|
} |
||||||
|
|
||||||
|
# --- Mutation --------------------------------------------------------------- |
||||||
|
function Remove-Utf8BomInPlace { |
||||||
|
# Write the existing buffer from offset 3, no extra full-size allocation. |
||||||
|
param([Parameter(Mandatory)][string]$Path) |
||||||
|
|
||||||
|
$bytes = [System.IO.File]::ReadAllBytes($Path) |
||||||
|
if ($bytes.Length -lt 3) { return $false } |
||||||
|
|
||||||
|
if ($bytes[0] -ne 0xEF -or |
||||||
|
$bytes[1] -ne 0xBB -or |
||||||
|
$bytes[2] -ne 0xBF) { |
||||||
|
return $false |
||||||
|
} |
||||||
|
|
||||||
|
$stream = [System.IO.File]::Open($Path,'Create','Write','ReadWrite') |
||||||
|
try { |
||||||
|
$stream.Write($bytes, 3, $bytes.Length - 3) |
||||||
|
$stream.SetLength($bytes.Length - 3) |
||||||
|
} |
||||||
|
finally { |
||||||
|
$stream.Dispose() |
||||||
|
} |
||||||
|
|
||||||
|
return $true |
||||||
|
} |
||||||
|
|
||||||
|
# --- Main ------------------------------------------------------------------- |
||||||
|
Assert-InGitWorkTree |
||||||
|
Assert-CleanWorkingTree |
||||||
|
|
||||||
|
$allFiles = Get-GitFilesUnderPwd |
||||||
|
|
||||||
|
$targets = $allFiles | % { |
||||||
|
$fileName = [IO.Path]::GetFileName($_) |
||||||
|
$ext = [IO.Path]::GetExtension($fileName) |
||||||
|
|
||||||
|
$isDot = $Dotfiles -contains $fileName |
||||||
|
$isNoExt = -not $fileName.Contains('.') |
||||||
|
|
||||||
|
if ($isDot -or ($AllowedExts -contains $ext) -or |
||||||
|
($IncludeNoExt -and $isNoExt -and -not $isDot)) { |
||||||
|
$_ |
||||||
|
} |
||||||
|
} |
||||||
|
| ? { Test-HasUtf8Bom $_ } |
||||||
|
| ? { -not (Test-ProbablyBinary $_) } |
||||||
|
|
||||||
|
$changed = 0 |
||||||
|
$byExtension = @{} |
||||||
|
$dotfileChanges = 0 |
||||||
|
|
||||||
|
$targets | % { |
||||||
|
$relative = Resolve-Path -LiteralPath $_ -Relative |
||||||
|
|
||||||
|
if ($PSCmdlet.ShouldProcess($relative,'Strip UTF-8 BOM')) { |
||||||
|
if (Remove-Utf8BomInPlace -Path $_) { |
||||||
|
$changed++ |
||||||
|
|
||||||
|
$fileName = [IO.Path]::GetFileName($_) |
||||||
|
if ($Dotfiles -contains $fileName) { $dotfileChanges++ } |
||||||
|
|
||||||
|
$ext = [IO.Path]::GetExtension($fileName) |
||||||
|
if (-not $byExtension.ContainsKey($ext)) { $byExtension[$ext] = 0 } |
||||||
|
$byExtension[$ext]++ |
||||||
|
|
||||||
|
"stripped BOM: $relative" |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
"Done. Stripped BOM from $changed file(s)." |
||||||
|
|
||||||
|
if ($byExtension.Keys.Count -gt 0) { |
||||||
|
"" |
||||||
|
"By extension:" |
||||||
|
$byExtension.GetEnumerator() | Sort-Object Name | % { |
||||||
|
$key = if ([string]::IsNullOrEmpty($_.Name)) { '[noext]' } else { $_.Name } |
||||||
|
" {0}: {1}" -f $key, $_.Value |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if ($dotfileChanges -gt 0) { |
||||||
|
" [dotfiles]: $dotfileChanges" |
||||||
|
} |
||||||
Loading…
Reference in new issue