SharePoint - Export Library & Fix Path Length Issues
Problem
Windows has a MAX_PATH limit of 260 characters. When downloading files from SharePoint to a local drive, files with long paths will fail to copy. SharePoint allows much longer paths than Windows does.
Audit Process
1. Register an Entra App
In the client’s Azure portal (portal.azure.com):
- Entra ID → App registrations → New registration
- Name it (e.g.
SPAudit), single tenant, no redirect URI - Note the Client ID and Tenant ID
- API Permissions → Add → Microsoft Graph → Application permissions →
Sites.Read.All+Files.Read.All→ Grant admin consent - Certificates & Secrets → New client secret → copy the Value immediately (not the ID)
2. Run the Audit Script
See script below. Crawls the entire library/folder via Graph API and outputs a CSV of every item whose local path would exceed 260 chars.
Key config values:
FOLDER_PATH— path relative to drive root, e.g.1 - Archives(do NOT includeShared Documents/)DEST_ROOT— actual destination on the hard drive; keep it short (e.g.E:\B) to maximise headroomOUTPUT_CSV— where to save results
Prerequisites:
pip install requests msal
# Also requires Python 3.x and PowerShell 7 if using PnP (not needed for this script)Notes:
- Token auto-refreshes so runs longer than 1 hour won’t fail with 401
- Iterative BFS traversal avoids Python recursion limits on deep folder structures
- Retry logic with backoff handles dropped connections
- 200k item library took ~2 hours to crawl
3. Interpret Results
- CSV columns: Type, PathLength, Over (chars over limit), LocalPath, SharePointPath
- Check folders first — a long parent folder path causes all children to fail; renaming one folder fixes many
- If it’s only files (no folders), the issue is long filenames rather than deep nesting
4. Fix Options
- Manual rename in SharePoint — practical if under ~100 files
- Auto-truncate script — see truncation script below; renames files in SharePoint via Graph API, keeps extension, trims filename to fit
Audit Script
import msal
import requests
import csv
import os
import time
from collections import deque
# --- CONFIG ---
TENANT_ID = "your-tenant-id"
CLIENT_ID = "your-client-id"
CLIENT_SECRET = "your-client-secret"
FOLDER_PATH = "1 - Archives" # relative to drive root
DEST_ROOT = "E:\\B"
MAX_PATH = 260
OUTPUT_CSV = "C:\\Temp\\PathAudit.csv"
MAX_RETRIES = 5
# --------------
def build_app():
app = msal.ConfidentialClientApplication(
CLIENT_ID,
authority=f"https://login.microsoftonline.com/{TENANT_ID}",
client_credential=CLIENT_SECRET
)
result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" not in result:
raise Exception(f"Auth failed: {result.get('error_description')}")
print("Authentication successful.")
return app
def get_with_retry(url, app):
for attempt in range(MAX_RETRIES):
try:
token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])["access_token"]
resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=60)
resp.raise_for_status()
return resp
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
wait = 2 ** attempt
print(f" Connection error (attempt {attempt+1}/{MAX_RETRIES}), retrying in {wait}s...")
time.sleep(wait)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 429:
wait = int(e.response.headers.get("Retry-After", 10))
print(f" Throttled, waiting {wait}s...")
time.sleep(wait)
else:
raise
raise Exception(f"Failed after {MAX_RETRIES} attempts: {url}")
def get_site_id(app, hostname):
resp = get_with_retry(f"https://graph.microsoft.com/v1.0/sites/{hostname}:/", app)
return resp.json()["id"]
def get_drive_id(app, site_id):
resp = get_with_retry(f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives", app)
drives = resp.json()["value"]
for drive in drives:
if drive["name"] == "Documents":
return drive["id"]
raise Exception(f"Could not find Documents drive. Available: {[d['name'] for d in drives]}")
def get_all_items(app, drive_id, folder_path):
items = []
seed_url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{folder_path}:"
resp = get_with_retry(seed_url, app)
root = resp.json()
queue = deque([(root["id"], folder_path)])
total_folders = 0
total_files = 0
while queue:
folder_id, current_path = queue.popleft()
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{folder_id}/children"
while url:
resp = get_with_retry(url, app)
data = resp.json()
for item in data.get("value", []):
item_path = f"{current_path}/{item['name']}"
is_folder = "folder" in item
items.append({"name": item["name"], "path": item_path, "is_folder": is_folder})
if is_folder:
total_folders += 1
queue.append((item["id"], item_path))
else:
total_files += 1
url = data.get("@odata.nextLink")
if total_folders % 100 == 0:
print(f" Progress: {total_folders} folders, {total_files} files scanned, {len(queue)} folders queued...")
return items
def main():
print("Authenticating...")
app = build_app()
hostname = "yourtenant.sharepoint.com"
print("Getting site ID...")
site_id = get_site_id(app, hostname)
print("Getting drive ID...")
drive_id = get_drive_id(app, site_id)
print(f"Fetching all items under '{FOLDER_PATH}'...")
items = get_all_items(app, drive_id, FOLDER_PATH)
print(f"\nRetrieved {len(items)} items. Analysing path lengths...")
problems = []
for item in items:
local_path = os.path.join(DEST_ROOT, item["path"])
path_len = len(local_path)
if path_len >= MAX_PATH:
problems.append({
"Type": "Folder" if item["is_folder"] else "File",
"PathLength": path_len,
"Over": path_len - MAX_PATH,
"LocalPath": local_path,
"SharePointPath": item["path"]
})
problems.sort(key=lambda x: x["PathLength"], reverse=True)
print(f"Done. {len(problems)} items exceed {MAX_PATH} characters.")
if problems:
os.makedirs(os.path.dirname(OUTPUT_CSV), exist_ok=True)
with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["Type","PathLength","Over","LocalPath","SharePointPath"])
writer.writeheader()
writer.writerows(problems)
print(f"Results exported to: {OUTPUT_CSV}")
files = sum(1 for p in problems if p["Type"] == "File")
folders = sum(1 for p in problems if p["Type"] == "Folder")
print(f"\nBreakdown:\n Files: {files}\n Folders: {folders}")
print(f" Worst: {problems[0]['LocalPath']} ({problems[0]['PathLength']} chars)")
else:
print(f"No path length issues found for destination: {DEST_ROOT}")
if __name__ == "__main__":
main()Auto-Truncate Script
Renames files in SharePoint that exceed MAX_PATH. Keeps the file extension, trims the filename stem to fit.
Requires: Files.ReadWrite.All permission on the app registration (in addition to Sites.Read.All and Files.Read.All).
Run the audit script first and point this at the resulting CSV.
import msal
import requests
import csv
import os
import time
# --- CONFIG ---
TENANT_ID = "your-tenant-id"
CLIENT_ID = "your-client-id"
CLIENT_SECRET = "your-client-secret"
DRIVE_ID = "your-drive-id" # copy from audit script output or Graph Explorer
INPUT_CSV = "C:\\Temp\\PathAudit.csv"
DEST_ROOT = "E:\\B"
MAX_PATH = 260
DRY_RUN = True # set to False to actually rename
MAX_RETRIES = 5
# --------------
def build_app():
app = msal.ConfidentialClientApplication(
CLIENT_ID,
authority=f"https://login.microsoftonline.com/{TENANT_ID}",
client_credential=CLIENT_SECRET
)
result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" not in result:
raise Exception(f"Auth failed: {result.get('error_description')}")
return app
def get_token(app):
return app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])["access_token"]
def patch_with_retry(url, app, payload):
for attempt in range(MAX_RETRIES):
try:
token = get_token(app)
resp = requests.patch(
url,
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
json=payload,
timeout=60
)
resp.raise_for_status()
return resp
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
wait = 2 ** attempt
print(f" Retrying in {wait}s...")
time.sleep(wait)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 429:
wait = int(e.response.headers.get("Retry-After", 10))
print(f" Throttled, waiting {wait}s...")
time.sleep(wait)
else:
raise
raise Exception(f"Failed after {MAX_RETRIES} attempts: {url}")
def get_item_id_by_path(app, drive_id, sp_path):
"""Look up the Graph item ID for a given SharePoint-relative path."""
token = get_token(app)
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{sp_path}"
resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=60)
resp.raise_for_status()
return resp.json()["id"]
def truncate_filename(name, max_len):
"""Trim filename stem to fit within max_len total characters, keeping extension."""
stem, ext = os.path.splitext(name)
allowed_stem = max_len - len(ext)
if allowed_stem < 1:
raise ValueError(f"Extension alone exceeds max length: {name}")
return stem[:allowed_stem] + ext
def main():
app = build_app()
with open(INPUT_CSV, newline="", encoding="utf-8") as f:
rows = list(csv.DictReader(f))
# Files only — folders should be handled manually as renaming affects all children
file_rows = [r for r in rows if r["Type"] == "File"]
print(f"Processing {len(file_rows)} files. DRY_RUN={DRY_RUN}")
for row in file_rows:
sp_path = row["SharePointPath"] # e.g. 1 - Archives/L/Folder/Longname.pdf
local_path = row["LocalPath"] # e.g. E:\B\1 - Archives\L\Folder\Longname.pdf
over_by = int(row["Over"])
folder_part = os.path.dirname(local_path)
old_name = os.path.basename(local_path)
# How many chars the filename needs to lose
max_name_len = len(old_name) - over_by - 1 # -1 for safety margin
new_name = truncate_filename(old_name, max_name_len)
print(f"\n OLD: {old_name}")
print(f" NEW: {new_name}")
if not DRY_RUN:
try:
item_id = get_item_id_by_path(app, DRIVE_ID, sp_path)
patch_with_retry(
f"https://graph.microsoft.com/v1.0/drives/{DRIVE_ID}/items/{item_id}",
app,
{"name": new_name}
)
print(f" Renamed OK")
except Exception as e:
print(f" FAILED: {e}")
print("\nDone.")
if __name__ == "__main__":
main()Important: Always run with DRY_RUN = True first to review proposed renames before setting it to False.
Be Collective — Audit Results (April 2026)
- Library: Shared Documents →
1 - Archives - Total items crawled: 214,695
- Items exceeding 260 chars: 95 (all files, no folders)
- Worst path: 321 chars (61 over)
- Resolution: Client chose to rename the 95 files manually in SharePoint