Skip to content
This repository was archived by the owner on Apr 14, 2022. It is now read-only.

Adding support for egg and zip files #1477

Merged
merged 19 commits into from
Sep 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions src/Analysis/Ast/Impl/Modules/Resolution/ModuleResolutionBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public IPythonModule GetOrLoadModule(string name) {
moduleRef = Modules.GetOrAdd(name, new ModuleRef());
return moduleRef.GetOrCreate(name, this);
}

public ModulePath FindModule(string filePath) {
var bestLibraryPath = string.Empty;

Expand All @@ -102,11 +102,24 @@ public ModulePath FindModule(string filePath) {
}

protected void ReloadModulePaths(in IEnumerable<string> rootPaths) {
foreach (var moduleFile in rootPaths.Where(Directory.Exists).SelectMany(p => PathUtils.EnumerateFiles(FileSystem, p))) {
PathResolver.TryAddModulePath(moduleFile.FullName, moduleFile.Length, false, out _);
foreach (var root in rootPaths) {
foreach (var moduleFile in PathUtils.EnumerateFiles(FileSystem, root)) {
PathResolver.TryAddModulePath(moduleFile.FullName, moduleFile.Length, false, out _);
}

if (PathUtils.TryGetZipFilePath(root, out var zipFilePath, out var _) && File.Exists(zipFilePath)) {
foreach (var moduleFile in PathUtils.EnumerateZip(zipFilePath)) {
if (!PathUtils.PathStartsWith(moduleFile.FullName, "EGG-INFO")) {
PathResolver.TryAddModulePath(
Path.Combine(zipFilePath,
PathUtils.NormalizePath(moduleFile.FullName)),
moduleFile.Length, false, out _
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So what would be the fullModuleName returned by the TryAddModulePath?

Copy link
Contributor Author

@CTrando CTrando Sep 3, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fullModuleName returns the empty string, not sure if that's what is supposed to happen

edit - actually sorry was looking at init.py, for modules like test.zip/test/a.py it's returning a as the full module name

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that an expected full module name?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I should have clarified that a root was test.zip/test. I ran the test on the library simplejson and for files like simplejson/decoder.py it returns simplejson.decoder and for simplejson/tests/test_bitsize_int it gives simplejson.tests.tesT_bitsize_int etc, so I believe this is correct.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. Then let's add an import test to ensure that import of the module from egg is resolved correctly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I did that in:
https://github.com/microsoft/python-language-server/pull/1477/files#diff-fc769198933ce6a497889cd518874a9dR897-R928

It loads up the simplejson library and checks to see if some variables have members

);
}
}
}
}
}

protected class ModuleRef {
private readonly object _syncObj = new object();
private IPythonModule _module;
Expand Down
21 changes: 13 additions & 8 deletions src/Analysis/Ast/Impl/get_search_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,18 @@ def clean(path):
BEFORE_SITE.discard(None)
AFTER_SITE.discard(None)

import zipfile

for p in sys.path:
p = clean(p)
if os.path.isdir(p):
if p in BEFORE_SITE:
print("%s|stdlib|" % p)
elif p in AFTER_SITE:
if p in SITE_PKGS:
print("%s|site|" % p)
else:
print("%s|pth|" % p)

if not os.path.isdir(p) and not (os.path.isfile(p) and zipfile.is_zipfile(p)):
continue

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added this so interpreter paths would include .egg and .zip

if p in BEFORE_SITE:
print("%s|stdlib|" % p)
elif p in AFTER_SITE:
if p in SITE_PKGS:
print("%s|site|" % p)
else:
print("%s|pth|" % p)
2 changes: 0 additions & 2 deletions src/Analysis/Ast/Test/ImportTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
// See the Apache Version 2.0 License for specific language governing
// permissions and limitations under the License.

using System.IO;
using System.Linq;
using System.Threading.Tasks;
using FluentAssertions;
Expand All @@ -23,7 +22,6 @@
using Microsoft.Python.Analysis.Types;
using Microsoft.Python.Core;
using Microsoft.Python.Parsing.Tests;
using Microsoft.Python.Tests.Utilities.FluentAssertions;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using TestUtilities;

Expand Down
8 changes: 7 additions & 1 deletion src/Core/Impl/IO/FileSystem.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@ public long FileSize(string path) {
return fileInfo.Length;
}

public string ReadAllText(string path) => File.ReadAllText(path);
public string ReadAllText(string filePath) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should remain File.ReadAllText(filePath);. I would've add IFileInfo.IsZipFile and allow reading zip content from it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's hard to do this because sometimes we may get paths like
project/test.zip/a as the module import, and checking if it's a zip file will involve much of the same work done in this method.

I'll think about this some more, instead of putting the logic here I suppose I could put it where ReadAllText is called; however I may end up with duplicate code.

if (PathUtils.TryGetZipFilePath(filePath, out var zipPath, out var relativeZipPath)) {
return PathUtils.GetZipContent(zipPath, relativeZipPath);
}
return File.ReadAllText(filePath);
}

public void WriteAllText(string path, string content) => File.WriteAllText(path, content);
public IEnumerable<string> FileReadAllLines(string path) => File.ReadLines(path);
public void FileWriteAllLines(string path, IEnumerable<string> contents) => File.WriteAllLines(path, contents);
Expand Down
90 changes: 88 additions & 2 deletions src/Core/Impl/IO/PathUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Runtime.InteropServices;
using System.Threading;
Expand Down Expand Up @@ -47,7 +48,6 @@ public static bool IsValidFileNameCharacter(char character)
public static bool HasEndSeparator(string path)
=> !string.IsNullOrEmpty(path) && IsDirectorySeparator(path[path.Length - 1]);


public static bool IsDirectorySeparator(char c) => Array.IndexOf(DirectorySeparators, c) != -1;

public static bool PathStartsWith(string s, string prefix)
Expand Down Expand Up @@ -117,7 +117,7 @@ public static string FindFile(IFileSystem fileSystem,
int depthLimit = 2,
IEnumerable<string> firstCheck = null
) {
if (!Directory.Exists(root)) {
if (!fileSystem.DirectoryExists(root)) {
return null;
}

Expand Down Expand Up @@ -185,12 +185,17 @@ public static IEnumerable<string> EnumerateDirectories(IFileSystem fileSystem, s
var path = queue.Dequeue();
path = EnsureEndSeparator(path);

if (!fileSystem.DirectoryExists(path)) {
continue;
}

IEnumerable<string> dirs = null;
try {
dirs = fileSystem.GetDirectories(path);
} catch (UnauthorizedAccessException) {
} catch (IOException) {
}

if (dirs == null) {
continue;
}
Expand Down Expand Up @@ -308,6 +313,87 @@ public static IEnumerable<IFileInfo> EnumerateFiles(IFileSystem fileSystem, stri
}
}

public static bool TryGetZipFilePath(string filePath, out string zipPath, out string relativeZipPath) {
zipPath = string.Empty;
relativeZipPath = string.Empty;
if (string.IsNullOrEmpty(filePath)) {
return false;
}

var workingPath = filePath;
// Filepath doesn't have zip or egg in it, bail
if (!filePath.Contains(".zip") && !filePath.Contains(".egg")) {
return false;
}

while (!string.IsNullOrEmpty(workingPath)) {
if (IsZipFile(workingPath, out zipPath)) {
// File path is '..\\test\\test.zip\\test\\a.py'
// Working path is '..\\test\\test.zip'
// Relative path in zip file becomes 'test/a.py'
relativeZipPath = filePath.Substring(workingPath.Length);

// According to https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT, zip files must have forward slashes
foreach (var separator in DirectorySeparators) {
relativeZipPath = relativeZipPath.Replace(separator, '/');
}
return true;
}
// \\test\\test.zip => \\test\\
workingPath = GetParent(workingPath);
}

// Filepath had .zip or .egg in it but no zip or egg files
// e.g /tmp/tmp.zip.txt
return false;
}

/// <summary>
/// Returns whether the given file path is a path to a zip (or egg) file
/// The path can be of the form ..\\test.zip or ..\\test.zip\\
/// </summary>
public static bool IsZipFile(string rawZipPath, out string zipPath) {
var path = NormalizePathAndTrim(rawZipPath);
var extension = Path.GetExtension(path);
switch (extension) {
case ".zip":
case ".egg":
zipPath = path;
return true;
default:
zipPath = string.Empty;
return false;
}
}

/// <summary>
/// Given the path to the zip file and the relative path to a file inside the zip,
/// returns the contents of the zip entry
/// e.g
/// test.zip
/// a.py
/// b.py
/// Can get the contents of a.py by passing in "test.zip" and "a.py"
/// </summary>
public static string GetZipContent(string zipPath, string relativeZipPath) {
using (var zip = ZipFile.OpenRead(zipPath)) {
var zipFile = zip.GetEntry(relativeZipPath);
// Could not open zip, bail
if (zipFile == null) {
return null;
}
using (var reader = new StreamReader(zipFile.Open())) {
return reader.ReadToEnd();
}
}
}

public static IEnumerable<ZipArchiveEntry> EnumerateZip(string root) {
using (var zip = ZipFile.OpenRead(root)) {
return zip.Entries.ToList();
}
}

/// <summary>
/// Deletes a file, making multiple attempts and suppressing any
/// IO-related errors.
Expand Down
53 changes: 53 additions & 0 deletions src/Core/Test/PathUtilsTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright(c) Microsoft Corporation
// All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the License); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS
// OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY
// IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABILITY OR NON-INFRINGEMENT.
//
// See the Apache Version 2.0 License for specific language governing
// permissions and limitations under the License.

using FluentAssertions;
using Microsoft.Python.Core.IO;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.Python.Core.Tests {
[TestClass]
public class PathUtilsTests {
[TestMethod, Priority(0)]
public void ZipFileUNCPath() {
PathUtils.TryGetZipFilePath(@"\\server\home\share\test.zip", out var zipPath, out var relativeZipPath);
zipPath.Should().Be(@"\\server\home\share\test.zip");
relativeZipPath.Should().BeEmpty();

PathUtils.TryGetZipFilePath(@"\\server\home\share\test.zip\test\a.py", out zipPath, out relativeZipPath);
zipPath.Should().Be(@"\\server\home\share\test.zip");
relativeZipPath.Should().Be("test/a.py");

PathUtils.TryGetZipFilePath("\\path\\foo\\baz\\test.zip\\test\\a.py", out zipPath, out relativeZipPath);
zipPath.Should().Be("\\path\\foo\\baz\\test.zip");
relativeZipPath.Should().Be("test/a.py");
}

[TestMethod, Priority(0)]
public void ZipFilePath() {
PathUtils.TryGetZipFilePath("\\path\\foo\\baz\\test.zip", out var zipPath, out var relativeZipPath);
zipPath.Should().Be("\\path\\foo\\baz\\test.zip");
relativeZipPath.Should().BeEmpty();

PathUtils.TryGetZipFilePath("\\path\\foo\\baz\\test.zip\\test\\a.py", out zipPath, out relativeZipPath);
zipPath.Should().Be("\\path\\foo\\baz\\test.zip");
relativeZipPath.Should().Be("test/a.py");

PathUtils.TryGetZipFilePath("\\path\\foo\\baz\\test.zip\\test\\foo\\baz.py", out zipPath, out relativeZipPath);
zipPath.Should().Be("\\path\\foo\\baz\\test.zip");
relativeZipPath.Should().Be("test/foo/baz.py");
}
}
}
98 changes: 97 additions & 1 deletion src/LanguageServer/Test/ImportsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
using System;
using System.IO;
using System.Threading.Tasks;
using Microsoft.Python.Analysis;
using Microsoft.Python.Analysis.Analyzer;
using Microsoft.Python.Analysis.Documents;
using Microsoft.Python.Analysis.Tests.FluentAssertions;
using Microsoft.Python.Analysis.Types;
using Microsoft.Python.Core.Text;
using Microsoft.Python.LanguageServer.Completion;
using Microsoft.Python.LanguageServer.Sources;
Expand Down Expand Up @@ -830,5 +831,100 @@ import module2
comps = cs.GetCompletions(analysis, new SourceLocation(4, 9));
comps.Should().HaveLabels("Y");
}

[DataRow("Basic.egg")]
[DataRow("Basic.zip")]
[DataTestMethod, Priority(0)]
public async Task BasicEggZip(string eggZipFilePath) {
var root = Path.Combine(GetAnalysisTestDataFilesPath(), "EggZip");
await CreateServicesAsync(root, PythonVersions.LatestAvailable3X, searchPaths: new[] { root, Path.Combine(root, eggZipFilePath) });
var rdt = Services.GetService<IRunningDocumentTable>();
var analyzer = Services.GetService<IPythonAnalyzer>();

var uriPath = Path.Combine(root, "BasicEggZip.py");
var code = await File.ReadAllTextAsync(uriPath);
var moduleUri = TestData.GetTestSpecificUri(uriPath);
var module = rdt.OpenDocument(moduleUri, code);

await analyzer.WaitForCompleteAnalysisAsync();
var analysis = await module.GetAnalysisAsync(-1);
analysis.Should().HaveVariable("i").OfType(BuiltinTypeId.Int);
}

[DataRow("ZipImports.zip")]
[DataRow("EggImports.egg")]
[DataTestMethod, Priority(0)]
public async Task EggZipImports(string eggZipFilePath) {
var root = Path.Combine(GetAnalysisTestDataFilesPath(), "EggZip");
await CreateServicesAsync(root, PythonVersions.LatestAvailable3X, searchPaths: new[] { root, Path.Combine(root, eggZipFilePath, "test") });
var rdt = Services.GetService<IRunningDocumentTable>();
var analyzer = Services.GetService<IPythonAnalyzer>();

var uriPath = Path.Combine(root, "EggZipImports.py");
var code = await File.ReadAllTextAsync(uriPath);
var moduleUri = TestData.GetTestSpecificUri(uriPath);
var module = rdt.OpenDocument(moduleUri, code);

await analyzer.WaitForCompleteAnalysisAsync();
var analysis = await module.GetAnalysisAsync(-1);
analysis.Should().HaveVariable("h").OfType("X");
analysis.Should().HaveVariable("y").OfType(BuiltinTypeId.Int);
analysis.Should().HaveVariable("b").OfType("A");
analysis.Should().HaveVariable("i").OfType(BuiltinTypeId.Int);
}

[DataRow("ZipRelativeImports.zip")]
[DataRow("EggRelativeImports.egg")]
[DataTestMethod, Priority(0)]
public async Task EggZipRelativeImports(string eggZipFilePath) {
var root = Path.Combine(GetAnalysisTestDataFilesPath(), "EggZip");
await CreateServicesAsync(root, PythonVersions.LatestAvailable3X, searchPaths: new[] { root, Path.Combine(root, eggZipFilePath, "test") });
var rdt = Services.GetService<IRunningDocumentTable>();
var analyzer = Services.GetService<IPythonAnalyzer>();

var uriPath = Path.Combine(root, "EggZipRelativeImports.py");
var code = await File.ReadAllTextAsync(uriPath);
var moduleUri = TestData.GetTestSpecificUri(uriPath);
var module = rdt.OpenDocument(moduleUri, code);

await analyzer.WaitForCompleteAnalysisAsync();
var analysis = await module.GetAnalysisAsync(-1);
analysis.Should().HaveVariable("h").OfType(BuiltinTypeId.Float);
analysis.Should().HaveVariable("i").OfType(BuiltinTypeId.Int);
analysis.Should().HaveVariable("s").OfType(BuiltinTypeId.Str);
}

[DataRow("simplejson.egg")]
[DataRow("simplejson.zip")]
[DataTestMethod, Priority(0)]
public async Task SimpleJsonEggZip(string eggZipFilePath) {
var root = Path.Combine(GetAnalysisTestDataFilesPath(), "EggZip");
await CreateServicesAsync(root, PythonVersions.LatestAvailable3X, searchPaths: new[] { root, Path.Combine(root, eggZipFilePath) });
var rdt = Services.GetService<IRunningDocumentTable>();
var analyzer = Services.GetService<IPythonAnalyzer>();

const string code = "import simplejson";
var uriPath = Path.Combine(root, "test.py");
var moduleUri = TestData.GetTestSpecificUri(uriPath);
var module = rdt.OpenDocument(moduleUri, code);

await analyzer.WaitForCompleteAnalysisAsync();
var analysis = await module.GetAnalysisAsync(-1);
analysis.Should().HaveVariable("simplejson").Which.Should().HaveMembers(
"Decimal",
"JSONDecodeError",
"JSONDecoder",
"JSONEncoder",
"JSONEncoderForHTML",
"OrderedDict",
"RawJSON",
"dump",
"dumps",
"load",
"loads",
"simple_first"
);
}

}
}
Binary file not shown.
Binary file not shown.
5 changes: 5 additions & 0 deletions src/UnitTests/TestData/AstAnalysis/EggZip/BasicEggZip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import sys
import test.a

a = test.a.A()
i = a.test()
Binary file not shown.
Binary file not shown.
Loading